gen.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. import (
  7. "bytes"
  8. "log"
  9. "sort"
  10. "strings"
  11. "golang.org/x/text/internal/gen"
  12. "golang.org/x/text/internal/gen/bitfield"
  13. "golang.org/x/text/internal/ucd"
  14. )
  15. var (
  16. // computed by computeDirectOffsets
  17. directOffsets = map[string]int{}
  18. directData bytes.Buffer
  19. // computed by computeEntries
  20. entries []entry
  21. singleData bytes.Buffer
  22. index []uint16
  23. )
  24. type entry struct {
  25. start rune `bitfield:"21,startRune"`
  26. numRunes int `bitfield:"16"`
  27. end rune
  28. index int `bitfield:"16"`
  29. base int `bitfield:"6"`
  30. direct bool `bitfield:""`
  31. name string
  32. }
  33. func main() {
  34. gen.Init()
  35. w := gen.NewCodeWriter()
  36. defer w.WriteVersionedGoFile("tables.go", "runenames")
  37. gen.WriteUnicodeVersion(w)
  38. computeDirectOffsets()
  39. computeEntries()
  40. if err := bitfield.Gen(w, entry{}, nil); err != nil {
  41. log.Fatal(err)
  42. }
  43. type entry uint64 // trick the generation code to use the entry type
  44. packed := []entry{}
  45. for _, e := range entries {
  46. e.numRunes = int(e.end - e.start + 1)
  47. v, err := bitfield.Pack(e, nil)
  48. if err != nil {
  49. log.Fatal(err)
  50. }
  51. packed = append(packed, entry(v))
  52. }
  53. index = append(index, uint16(singleData.Len()))
  54. w.WriteVar("entries", packed)
  55. w.WriteVar("index", index)
  56. w.WriteConst("directData", directData.String())
  57. w.WriteConst("singleData", singleData.String())
  58. }
  59. func computeDirectOffsets() {
  60. counts := map[string]int{}
  61. p := ucd.New(gen.OpenUCDFile("UnicodeData.txt"), ucd.KeepRanges)
  62. for p.Next() {
  63. start, end := p.Range(0)
  64. counts[getName(p)] += int(end-start) + 1
  65. }
  66. direct := []string{}
  67. for k, v := range counts {
  68. if v > 1 {
  69. direct = append(direct, k)
  70. }
  71. }
  72. sort.Strings(direct)
  73. for _, s := range direct {
  74. directOffsets[s] = directData.Len()
  75. directData.WriteString(s)
  76. }
  77. }
  78. func computeEntries() {
  79. p := ucd.New(gen.OpenUCDFile("UnicodeData.txt"), ucd.KeepRanges)
  80. for p.Next() {
  81. start, end := p.Range(0)
  82. last := entry{}
  83. if len(entries) > 0 {
  84. last = entries[len(entries)-1]
  85. }
  86. name := getName(p)
  87. if index, ok := directOffsets[name]; ok {
  88. if last.name == name && last.end+1 == start {
  89. entries[len(entries)-1].end = end
  90. continue
  91. }
  92. entries = append(entries, entry{
  93. start: start,
  94. end: end,
  95. index: index,
  96. base: len(name),
  97. direct: true,
  98. name: name,
  99. })
  100. continue
  101. }
  102. if start != end {
  103. log.Fatalf("Expected start == end, found %x != %x", start, end)
  104. }
  105. offset := singleData.Len()
  106. base := offset >> 16
  107. index = append(index, uint16(offset))
  108. singleData.WriteString(name)
  109. if last.base == base && last.end+1 == start {
  110. entries[len(entries)-1].end = start
  111. continue
  112. }
  113. entries = append(entries, entry{
  114. start: start,
  115. end: end,
  116. index: len(index) - 1,
  117. base: base,
  118. name: name,
  119. })
  120. }
  121. }
  122. func getName(p *ucd.Parser) string {
  123. s := p.String(ucd.Name)
  124. if s == "" {
  125. return ""
  126. }
  127. if s[0] == '<' {
  128. const first = ", First>"
  129. if i := strings.Index(s, first); i >= 0 {
  130. s = s[:i] + ">"
  131. }
  132. }
  133. return s
  134. }