iter_test.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package norm
  5. import (
  6. "strings"
  7. "testing"
  8. )
  9. func doIterNormString(f Form, s string) []byte {
  10. acc := []byte{}
  11. i := Iter{}
  12. i.InitString(f, s)
  13. for !i.Done() {
  14. acc = append(acc, i.Next()...)
  15. }
  16. return acc
  17. }
  18. func doIterNorm(f Form, s string) []byte {
  19. acc := []byte{}
  20. i := Iter{}
  21. i.Init(f, []byte(s))
  22. for !i.Done() {
  23. acc = append(acc, i.Next()...)
  24. }
  25. return acc
  26. }
  27. func TestIterNext(t *testing.T) {
  28. runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte {
  29. return doIterNormString(f, string(append(out, s...)))
  30. })
  31. runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte {
  32. return doIterNorm(f, string(append(out, s...)))
  33. })
  34. }
  35. type SegmentTest struct {
  36. in string
  37. out []string
  38. }
  39. var segmentTests = []SegmentTest{
  40. {"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
  41. {rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
  42. {rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
  43. {rep('a', segSize) + "\u0300aa",
  44. append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
  45. // U+0f73 is NOT treated as a starter as it is a modifier
  46. {"a" + grave(29) + "\u0f73", []string{"a" + grave(29), cgj + "\u0f73"}},
  47. {"a\u0f73", []string{"a\u0f73"}},
  48. // U+ff9e is treated as a non-starter.
  49. // TODO: should we? Note that this will only affect iteration, as whether
  50. // or not we do so does not affect the normalization output and will either
  51. // way result in consistent iteration output.
  52. {"a" + grave(30) + "\uff9e", []string{"a" + grave(30), cgj + "\uff9e"}},
  53. {"a\uff9e", []string{"a\uff9e"}},
  54. }
  55. var segmentTestsK = []SegmentTest{
  56. {"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
  57. // last segment of multi-segment decomposition needs normalization
  58. {"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
  59. {"\u320E", []string{"\x28", "\uAC00", "\x29"}},
  60. // last segment should be copied to start of buffer.
  61. {"\ufdfa", []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645", ""}},
  62. {"\ufdfa" + grave(30), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), ""}},
  63. {"\uFDFA" + grave(64), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), cgj + grave(30), cgj + grave(4), ""}},
  64. // Hangul and Jamo are grouped together.
  65. {"\uAC00", []string{"\u1100\u1161", ""}},
  66. {"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
  67. {"\u1100\u1161", []string{"\u1100\u1161", ""}},
  68. }
  69. // Note that, by design, segmentation is equal for composing and decomposing forms.
  70. func TestIterSegmentation(t *testing.T) {
  71. segmentTest(t, "SegmentTestD", NFD, segmentTests)
  72. segmentTest(t, "SegmentTestC", NFC, segmentTests)
  73. segmentTest(t, "SegmentTestKD", NFKD, segmentTestsK)
  74. segmentTest(t, "SegmentTestKC", NFKC, segmentTestsK)
  75. }
  76. func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
  77. iter := Iter{}
  78. for i, tt := range tests {
  79. iter.InitString(f, tt.in)
  80. for j, seg := range tt.out {
  81. if seg == "" {
  82. if !iter.Done() {
  83. res := string(iter.Next())
  84. t.Errorf(`%s:%d:%d: expected Done()==true, found segment %+q`, name, i, j, res)
  85. }
  86. continue
  87. }
  88. if iter.Done() {
  89. t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
  90. }
  91. seg = f.String(seg)
  92. if res := string(iter.Next()); res != seg {
  93. t.Errorf(`%s:%d:%d" segment was %+q (%d); want %+q (%d)`, name, i, j, pc(res), len(res), pc(seg), len(seg))
  94. }
  95. }
  96. }
  97. }