context_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package cases
  5. import (
  6. "strings"
  7. "testing"
  8. "unicode"
  9. "golang.org/x/text/internal/testtext"
  10. "golang.org/x/text/language"
  11. "golang.org/x/text/transform"
  12. "golang.org/x/text/unicode/norm"
  13. "golang.org/x/text/unicode/rangetable"
  14. )
  15. // The following definitions are taken directly from Chapter 3 of The Unicode
  16. // Standard.
  17. func propCased(r rune) bool {
  18. return propLower(r) || propUpper(r) || unicode.IsTitle(r)
  19. }
  20. func propLower(r rune) bool {
  21. return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
  22. }
  23. func propUpper(r rune) bool {
  24. return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
  25. }
  26. func propIgnore(r rune) bool {
  27. if unicode.In(r, unicode.Mn, unicode.Me, unicode.Cf, unicode.Lm, unicode.Sk) {
  28. return true
  29. }
  30. return caseIgnorable[r]
  31. }
  32. func hasBreakProp(r rune) bool {
  33. // binary search over ranges
  34. lo := 0
  35. hi := len(breakProp)
  36. for lo < hi {
  37. m := lo + (hi-lo)/2
  38. bp := &breakProp[m]
  39. if bp.lo <= r && r <= bp.hi {
  40. return true
  41. }
  42. if r < bp.lo {
  43. hi = m
  44. } else {
  45. lo = m + 1
  46. }
  47. }
  48. return false
  49. }
  50. func contextFromRune(r rune) *context {
  51. c := context{dst: make([]byte, 128), src: []byte(string(r)), atEOF: true}
  52. c.next()
  53. return &c
  54. }
  55. func TestCaseProperties(t *testing.T) {
  56. if unicode.Version != UnicodeVersion {
  57. // Properties of existing code points may change by Unicode version, so
  58. // we need to skip.
  59. t.Skipf("Skipping as core Unicode version %s different than %s", unicode.Version, UnicodeVersion)
  60. }
  61. assigned := rangetable.Assigned(UnicodeVersion)
  62. coreVersion := rangetable.Assigned(unicode.Version)
  63. for r := rune(0); r <= lastRuneForTesting; r++ {
  64. if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
  65. continue
  66. }
  67. c := contextFromRune(r)
  68. if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want {
  69. t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info)
  70. }
  71. // New letters may change case types, but existing case pairings should
  72. // not change. See Case Pair Stability in
  73. // https://unicode.org/policies/stability_policy.html.
  74. if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) {
  75. if got, want := c.info.isCased(), propCased(r); got != want {
  76. t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info)
  77. }
  78. if got, want := c.caseType() == cUpper, propUpper(r); got != want {
  79. t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info)
  80. }
  81. if got, want := c.caseType() == cLower, propLower(r); got != want {
  82. t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info)
  83. }
  84. }
  85. if got, want := c.info.isBreak(), hasBreakProp(r); got != want {
  86. t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info)
  87. }
  88. }
  89. // TODO: get title case from unicode file.
  90. }
  91. func TestMapping(t *testing.T) {
  92. assigned := rangetable.Assigned(UnicodeVersion)
  93. coreVersion := rangetable.Assigned(unicode.Version)
  94. if coreVersion == nil {
  95. coreVersion = assigned
  96. }
  97. apply := func(r rune, f func(c *context) bool) string {
  98. c := contextFromRune(r)
  99. f(c)
  100. return string(c.dst[:c.pDst])
  101. }
  102. for r, tt := range special {
  103. if got, want := apply(r, lower), tt.toLower; got != want {
  104. t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want)
  105. }
  106. if got, want := apply(r, title), tt.toTitle; got != want {
  107. t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want)
  108. }
  109. if got, want := apply(r, upper), tt.toUpper; got != want {
  110. t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want)
  111. }
  112. }
  113. for r := rune(0); r <= lastRuneForTesting; r++ {
  114. if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
  115. continue
  116. }
  117. if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) {
  118. continue
  119. }
  120. if _, ok := special[r]; ok {
  121. continue
  122. }
  123. want := string(unicode.ToLower(r))
  124. if got := apply(r, lower); got != want {
  125. t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
  126. }
  127. want = string(unicode.ToUpper(r))
  128. if got := apply(r, upper); got != want {
  129. t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
  130. }
  131. want = string(unicode.ToTitle(r))
  132. if got := apply(r, title); got != want {
  133. t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
  134. }
  135. }
  136. }
  137. func runeFoldData(r rune) (x struct{ simple, full, special string }) {
  138. x = foldMap[r]
  139. if x.simple == "" {
  140. x.simple = string(unicode.ToLower(r))
  141. }
  142. if x.full == "" {
  143. x.full = string(unicode.ToLower(r))
  144. }
  145. if x.special == "" {
  146. x.special = x.full
  147. }
  148. return
  149. }
  150. func TestFoldData(t *testing.T) {
  151. assigned := rangetable.Assigned(UnicodeVersion)
  152. coreVersion := rangetable.Assigned(unicode.Version)
  153. if coreVersion == nil {
  154. coreVersion = assigned
  155. }
  156. apply := func(r rune, f func(c *context) bool) (string, info) {
  157. c := contextFromRune(r)
  158. f(c)
  159. return string(c.dst[:c.pDst]), c.info.cccType()
  160. }
  161. for r := rune(0); r <= lastRuneForTesting; r++ {
  162. if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
  163. continue
  164. }
  165. x := runeFoldData(r)
  166. if got, info := apply(r, foldFull); got != x.full {
  167. t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info)
  168. }
  169. // TODO: special and simple.
  170. }
  171. }
  172. func TestCCC(t *testing.T) {
  173. assigned := rangetable.Assigned(UnicodeVersion)
  174. normVersion := rangetable.Assigned(norm.Version)
  175. for r := rune(0); r <= lastRuneForTesting; r++ {
  176. if !unicode.In(r, assigned) || !unicode.In(r, normVersion) {
  177. continue
  178. }
  179. c := contextFromRune(r)
  180. p := norm.NFC.PropertiesString(string(r))
  181. want := cccOther
  182. switch p.CCC() {
  183. case 0:
  184. want = cccZero
  185. case above:
  186. want = cccAbove
  187. }
  188. if got := c.info.cccType(); got != want {
  189. t.Errorf("%U: got %x; want %x", r, got, want)
  190. }
  191. }
  192. }
  193. func TestWordBreaks(t *testing.T) {
  194. for _, tt := range breakTest {
  195. testtext.Run(t, tt, func(t *testing.T) {
  196. parts := strings.Split(tt, "|")
  197. want := ""
  198. for _, s := range parts {
  199. found := false
  200. // This algorithm implements title casing given word breaks
  201. // as defined in the Unicode standard 3.13 R3.
  202. for _, r := range s {
  203. title := unicode.ToTitle(r)
  204. lower := unicode.ToLower(r)
  205. if !found && title != lower {
  206. found = true
  207. want += string(title)
  208. } else {
  209. want += string(lower)
  210. }
  211. }
  212. }
  213. src := strings.Join(parts, "")
  214. got := Title(language.Und).String(src)
  215. if got != want {
  216. t.Errorf("got %q; want %q", got, want)
  217. }
  218. })
  219. }
  220. }
  221. func TestContext(t *testing.T) {
  222. tests := []struct {
  223. desc string
  224. dstSize int
  225. atEOF bool
  226. src string
  227. out string
  228. nSrc int
  229. err error
  230. ops string
  231. prefixArg string
  232. prefixWant bool
  233. }{{
  234. desc: "next: past end, atEOF, no checkpoint",
  235. dstSize: 10,
  236. atEOF: true,
  237. src: "12",
  238. out: "",
  239. nSrc: 2,
  240. ops: "next;next;next",
  241. // Test that calling prefix with a non-empty argument when the buffer
  242. // is depleted returns false.
  243. prefixArg: "x",
  244. prefixWant: false,
  245. }, {
  246. desc: "next: not at end, atEOF, no checkpoint",
  247. dstSize: 10,
  248. atEOF: false,
  249. src: "12",
  250. out: "",
  251. nSrc: 0,
  252. err: transform.ErrShortSrc,
  253. ops: "next;next",
  254. prefixArg: "",
  255. prefixWant: true,
  256. }, {
  257. desc: "next: past end, !atEOF, no checkpoint",
  258. dstSize: 10,
  259. atEOF: false,
  260. src: "12",
  261. out: "",
  262. nSrc: 0,
  263. err: transform.ErrShortSrc,
  264. ops: "next;next;next",
  265. prefixArg: "",
  266. prefixWant: true,
  267. }, {
  268. desc: "next: past end, !atEOF, checkpoint",
  269. dstSize: 10,
  270. atEOF: false,
  271. src: "12",
  272. out: "",
  273. nSrc: 2,
  274. ops: "next;next;checkpoint;next",
  275. prefixArg: "",
  276. prefixWant: true,
  277. }, {
  278. desc: "copy: exact count, atEOF, no checkpoint",
  279. dstSize: 2,
  280. atEOF: true,
  281. src: "12",
  282. out: "12",
  283. nSrc: 2,
  284. ops: "next;copy;next;copy;next",
  285. prefixArg: "",
  286. prefixWant: true,
  287. }, {
  288. desc: "copy: past end, !atEOF, no checkpoint",
  289. dstSize: 2,
  290. atEOF: false,
  291. src: "12",
  292. out: "",
  293. nSrc: 0,
  294. err: transform.ErrShortSrc,
  295. ops: "next;copy;next;copy;next",
  296. prefixArg: "",
  297. prefixWant: true,
  298. }, {
  299. desc: "copy: past end, !atEOF, checkpoint",
  300. dstSize: 2,
  301. atEOF: false,
  302. src: "12",
  303. out: "12",
  304. nSrc: 2,
  305. ops: "next;copy;next;copy;checkpoint;next",
  306. prefixArg: "",
  307. prefixWant: true,
  308. }, {
  309. desc: "copy: short dst",
  310. dstSize: 1,
  311. atEOF: false,
  312. src: "12",
  313. out: "",
  314. nSrc: 0,
  315. err: transform.ErrShortDst,
  316. ops: "next;copy;next;copy;checkpoint;next",
  317. prefixArg: "12",
  318. prefixWant: false,
  319. }, {
  320. desc: "copy: short dst, checkpointed",
  321. dstSize: 1,
  322. atEOF: false,
  323. src: "12",
  324. out: "1",
  325. nSrc: 1,
  326. err: transform.ErrShortDst,
  327. ops: "next;copy;checkpoint;next;copy;next",
  328. prefixArg: "",
  329. prefixWant: true,
  330. }, {
  331. desc: "writeString: simple",
  332. dstSize: 3,
  333. atEOF: true,
  334. src: "1",
  335. out: "1ab",
  336. nSrc: 1,
  337. ops: "next;copy;writeab;next",
  338. prefixArg: "",
  339. prefixWant: true,
  340. }, {
  341. desc: "writeString: short dst",
  342. dstSize: 2,
  343. atEOF: true,
  344. src: "12",
  345. out: "",
  346. nSrc: 0,
  347. err: transform.ErrShortDst,
  348. ops: "next;copy;writeab;next",
  349. prefixArg: "2",
  350. prefixWant: true,
  351. }, {
  352. desc: "writeString: simple",
  353. dstSize: 3,
  354. atEOF: true,
  355. src: "12",
  356. out: "1ab",
  357. nSrc: 2,
  358. ops: "next;copy;next;writeab;next",
  359. prefixArg: "",
  360. prefixWant: true,
  361. }, {
  362. desc: "writeString: short dst",
  363. dstSize: 2,
  364. atEOF: true,
  365. src: "12",
  366. out: "",
  367. nSrc: 0,
  368. err: transform.ErrShortDst,
  369. ops: "next;copy;next;writeab;next",
  370. prefixArg: "1",
  371. prefixWant: false,
  372. }, {
  373. desc: "prefix",
  374. dstSize: 2,
  375. atEOF: true,
  376. src: "12",
  377. out: "",
  378. nSrc: 0,
  379. // Context will assign an ErrShortSrc if the input wasn't exhausted.
  380. err: transform.ErrShortSrc,
  381. prefixArg: "12",
  382. prefixWant: true,
  383. }}
  384. for _, tt := range tests {
  385. c := context{dst: make([]byte, tt.dstSize), src: []byte(tt.src), atEOF: tt.atEOF}
  386. for _, op := range strings.Split(tt.ops, ";") {
  387. switch op {
  388. case "next":
  389. c.next()
  390. case "checkpoint":
  391. c.checkpoint()
  392. case "writeab":
  393. c.writeString("ab")
  394. case "copy":
  395. c.copy()
  396. case "":
  397. default:
  398. t.Fatalf("unknown op %q", op)
  399. }
  400. }
  401. if got := c.hasPrefix(tt.prefixArg); got != tt.prefixWant {
  402. t.Errorf("%s:\nprefix was %v; want %v", tt.desc, got, tt.prefixWant)
  403. }
  404. nDst, nSrc, err := c.ret()
  405. if err != tt.err {
  406. t.Errorf("%s:\nerror was %v; want %v", tt.desc, err, tt.err)
  407. }
  408. if out := string(c.dst[:nDst]); out != tt.out {
  409. t.Errorf("%s:\nout was %q; want %q", tt.desc, out, tt.out)
  410. }
  411. if nSrc != tt.nSrc {
  412. t.Errorf("%s:\nnSrc was %d; want %d", tt.desc, nSrc, tt.nSrc)
  413. }
  414. }
  415. }