gen_common.go 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. // This code is shared between the main code generator and the test code.
  7. import (
  8. "flag"
  9. "log"
  10. "strconv"
  11. "strings"
  12. "golang.org/x/text/internal/gen"
  13. "golang.org/x/text/internal/ucd"
  14. )
  15. var (
  16. outputFile = flag.String("out", "tables.go", "output file")
  17. )
  18. var typeMap = map[string]elem{
  19. "A": tagAmbiguous,
  20. "N": tagNeutral,
  21. "Na": tagNarrow,
  22. "W": tagWide,
  23. "F": tagFullwidth,
  24. "H": tagHalfwidth,
  25. }
  26. // getWidthData calls f for every entry for which it is defined.
  27. //
  28. // f may be called multiple times for the same rune. The last call to f is the
  29. // correct value. f is not called for all runes. The default tag type is
  30. // Neutral.
  31. func getWidthData(f func(r rune, tag elem, alt rune)) {
  32. // Set the default values for Unified Ideographs. In line with Annex 11,
  33. // we encode full ranges instead of the defined runes in Unified_Ideograph.
  34. for _, b := range []struct{ lo, hi rune }{
  35. {0x4E00, 0x9FFF}, // the CJK Unified Ideographs block,
  36. {0x3400, 0x4DBF}, // the CJK Unified Ideographs Externsion A block,
  37. {0xF900, 0xFAFF}, // the CJK Compatibility Ideographs block,
  38. {0x20000, 0x2FFFF}, // the Supplementary Ideographic Plane,
  39. {0x30000, 0x3FFFF}, // the Tertiary Ideographic Plane,
  40. } {
  41. for r := b.lo; r <= b.hi; r++ {
  42. f(r, tagWide, 0)
  43. }
  44. }
  45. inverse := map[rune]rune{}
  46. maps := map[string]bool{
  47. "<wide>": true,
  48. "<narrow>": true,
  49. }
  50. // We cannot reuse package norm's decomposition, as we need an unexpanded
  51. // decomposition. We make use of the opportunity to verify that the
  52. // decomposition type is as expected.
  53. ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
  54. r := p.Rune(0)
  55. s := strings.SplitN(p.String(ucd.DecompMapping), " ", 2)
  56. if !maps[s[0]] {
  57. return
  58. }
  59. x, err := strconv.ParseUint(s[1], 16, 32)
  60. if err != nil {
  61. log.Fatalf("Error parsing rune %q", s[1])
  62. }
  63. if inverse[r] != 0 || inverse[rune(x)] != 0 {
  64. log.Fatalf("Circular dependency in mapping between %U and %U", r, x)
  65. }
  66. inverse[r] = rune(x)
  67. inverse[rune(x)] = r
  68. })
  69. // <rune range>;<type>
  70. ucd.Parse(gen.OpenUCDFile("EastAsianWidth.txt"), func(p *ucd.Parser) {
  71. tag, ok := typeMap[p.String(1)]
  72. if !ok {
  73. log.Fatalf("Unknown width type %q", p.String(1))
  74. }
  75. r := p.Rune(0)
  76. alt, ok := inverse[r]
  77. if tag == tagFullwidth || tag == tagHalfwidth && r != wonSign {
  78. tag |= tagNeedsFold
  79. if !ok {
  80. log.Fatalf("Narrow or wide rune %U has no decomposition", r)
  81. }
  82. }
  83. f(r, tag, alt)
  84. })
  85. }