gen_common.go 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build ignore
  5. // +build ignore
  6. package main
  7. // This code is shared between the main code generator and the test code.
  8. import (
  9. "flag"
  10. "log"
  11. "strconv"
  12. "strings"
  13. "golang.org/x/text/internal/gen"
  14. "golang.org/x/text/internal/ucd"
  15. )
  16. var (
  17. outputFile = flag.String("out", "tables.go", "output file")
  18. )
  19. var typeMap = map[string]elem{
  20. "A": tagAmbiguous,
  21. "N": tagNeutral,
  22. "Na": tagNarrow,
  23. "W": tagWide,
  24. "F": tagFullwidth,
  25. "H": tagHalfwidth,
  26. }
  27. // getWidthData calls f for every entry for which it is defined.
  28. //
  29. // f may be called multiple times for the same rune. The last call to f is the
  30. // correct value. f is not called for all runes. The default tag type is
  31. // Neutral.
  32. func getWidthData(f func(r rune, tag elem, alt rune)) {
  33. // Set the default values for Unified Ideographs. In line with Annex 11,
  34. // we encode full ranges instead of the defined runes in Unified_Ideograph.
  35. for _, b := range []struct{ lo, hi rune }{
  36. {0x4E00, 0x9FFF}, // the CJK Unified Ideographs block,
  37. {0x3400, 0x4DBF}, // the CJK Unified Ideographs Externsion A block,
  38. {0xF900, 0xFAFF}, // the CJK Compatibility Ideographs block,
  39. {0x20000, 0x2FFFF}, // the Supplementary Ideographic Plane,
  40. {0x30000, 0x3FFFF}, // the Tertiary Ideographic Plane,
  41. } {
  42. for r := b.lo; r <= b.hi; r++ {
  43. f(r, tagWide, 0)
  44. }
  45. }
  46. inverse := map[rune]rune{}
  47. maps := map[string]bool{
  48. "<wide>": true,
  49. "<narrow>": true,
  50. }
  51. // We cannot reuse package norm's decomposition, as we need an unexpanded
  52. // decomposition. We make use of the opportunity to verify that the
  53. // decomposition type is as expected.
  54. ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
  55. r := p.Rune(0)
  56. s := strings.SplitN(p.String(ucd.DecompMapping), " ", 2)
  57. if !maps[s[0]] {
  58. return
  59. }
  60. x, err := strconv.ParseUint(s[1], 16, 32)
  61. if err != nil {
  62. log.Fatalf("Error parsing rune %q", s[1])
  63. }
  64. if inverse[r] != 0 || inverse[rune(x)] != 0 {
  65. log.Fatalf("Circular dependency in mapping between %U and %U", r, x)
  66. }
  67. inverse[r] = rune(x)
  68. inverse[rune(x)] = r
  69. })
  70. // <rune range>;<type>
  71. ucd.Parse(gen.OpenUCDFile("EastAsianWidth.txt"), func(p *ucd.Parser) {
  72. tag, ok := typeMap[p.String(1)]
  73. if !ok {
  74. log.Fatalf("Unknown width type %q", p.String(1))
  75. }
  76. r := p.Rune(0)
  77. alt, ok := inverse[r]
  78. if tag == tagFullwidth || tag == tagHalfwidth && r != wonSign {
  79. tag |= tagNeedsFold
  80. if !ok {
  81. log.Fatalf("Narrow or wide rune %U has no decomposition", r)
  82. }
  83. }
  84. f(r, tag, alt)
  85. })
  86. }