transform_test.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package width
  5. import (
  6. "bytes"
  7. "strings"
  8. "testing"
  9. "golang.org/x/text/internal/testtext"
  10. "golang.org/x/text/transform"
  11. )
  12. func foldRune(r rune) (folded rune, ok bool) {
  13. alt, ok := mapRunes[r]
  14. if ok && alt.e&tagNeedsFold != 0 {
  15. return alt.r, true
  16. }
  17. return r, false
  18. }
  19. func widenRune(r rune) (wide rune, ok bool) {
  20. alt, ok := mapRunes[r]
  21. if k := alt.e.kind(); k == EastAsianHalfwidth || k == EastAsianNarrow {
  22. return alt.r, true
  23. }
  24. return r, false
  25. }
  26. func narrowRune(r rune) (narrow rune, ok bool) {
  27. alt, ok := mapRunes[r]
  28. if k := alt.e.kind(); k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous {
  29. return alt.r, true
  30. }
  31. return r, false
  32. }
  33. func TestFoldSingleRunes(t *testing.T) {
  34. for r := rune(0); r < 0x1FFFF; r++ {
  35. if loSurrogate <= r && r <= hiSurrogate {
  36. continue
  37. }
  38. x, _ := foldRune(r)
  39. want := string(x)
  40. got := Fold.String(string(r))
  41. if got != want {
  42. t.Errorf("Fold().String(%U) = %+q; want %+q", r, got, want)
  43. }
  44. }
  45. }
  46. type transformTest struct {
  47. desc string
  48. src string
  49. nBuf int
  50. nDst int
  51. atEOF bool
  52. dst string
  53. nSrc int
  54. err error
  55. nSpan int
  56. errSpan error
  57. }
  58. func (tc *transformTest) doTest(t *testing.T, tr Transformer) {
  59. testtext.Run(t, tc.desc, func(t *testing.T) {
  60. b := make([]byte, tc.nBuf)
  61. nDst, nSrc, err := tr.Transform(b, []byte(tc.src), tc.atEOF)
  62. if got := string(b[:nDst]); got != tc.dst[:nDst] {
  63. t.Errorf("dst was %+q; want %+q", got, tc.dst)
  64. }
  65. if nDst != tc.nDst {
  66. t.Errorf("nDst was %d; want %d", nDst, tc.nDst)
  67. }
  68. if nSrc != tc.nSrc {
  69. t.Errorf("nSrc was %d; want %d", nSrc, tc.nSrc)
  70. }
  71. if err != tc.err {
  72. t.Errorf("error was %v; want %v", err, tc.err)
  73. }
  74. if got := tr.String(tc.src); got != tc.dst {
  75. t.Errorf("String(%q) = %q; want %q", tc.src, got, tc.dst)
  76. }
  77. n, err := tr.Span([]byte(tc.src), tc.atEOF)
  78. if n != tc.nSpan || err != tc.errSpan {
  79. t.Errorf("Span: got %d, %v; want %d, %v", n, err, tc.nSpan, tc.errSpan)
  80. }
  81. })
  82. }
  83. func TestFold(t *testing.T) {
  84. for _, tc := range []transformTest{{
  85. desc: "empty",
  86. src: "",
  87. nBuf: 10,
  88. dst: "",
  89. nDst: 0,
  90. nSrc: 0,
  91. atEOF: false,
  92. err: nil,
  93. nSpan: 0,
  94. errSpan: nil,
  95. }, {
  96. desc: "short source 1",
  97. src: "a\xc2",
  98. nBuf: 10,
  99. dst: "a\xc2",
  100. nDst: 1,
  101. nSrc: 1,
  102. atEOF: false,
  103. err: transform.ErrShortSrc,
  104. nSpan: 1,
  105. errSpan: transform.ErrShortSrc,
  106. }, {
  107. desc: "short source 2",
  108. src: "a\xe0\x80",
  109. nBuf: 10,
  110. dst: "a\xe0\x80",
  111. nDst: 1,
  112. nSrc: 1,
  113. atEOF: false,
  114. err: transform.ErrShortSrc,
  115. nSpan: 1,
  116. errSpan: transform.ErrShortSrc,
  117. }, {
  118. desc: "incomplete but terminated source 1",
  119. src: "a\xc2",
  120. nBuf: 10,
  121. dst: "a\xc2",
  122. nDst: 2,
  123. nSrc: 2,
  124. atEOF: true,
  125. err: nil,
  126. nSpan: 2,
  127. errSpan: nil,
  128. }, {
  129. desc: "incomplete but terminated source 2",
  130. src: "a\xe0\x80",
  131. nBuf: 10,
  132. dst: "a\xe0\x80",
  133. nDst: 3,
  134. nSrc: 3,
  135. atEOF: true,
  136. err: nil,
  137. nSpan: 3,
  138. errSpan: nil,
  139. }, {
  140. desc: "exact fit dst",
  141. src: "a\uff01",
  142. nBuf: 2,
  143. dst: "a!",
  144. nDst: 2,
  145. nSrc: 4,
  146. atEOF: false,
  147. err: nil,
  148. nSpan: 1,
  149. errSpan: transform.ErrEndOfSpan,
  150. }, {
  151. desc: "exact fit dst and src ascii",
  152. src: "ab",
  153. nBuf: 2,
  154. dst: "ab",
  155. nDst: 2,
  156. nSrc: 2,
  157. atEOF: true,
  158. err: nil,
  159. nSpan: 2,
  160. errSpan: nil,
  161. }, {
  162. desc: "empty dst",
  163. src: "\u0300",
  164. nBuf: 0,
  165. dst: "\u0300",
  166. nDst: 0,
  167. nSrc: 0,
  168. atEOF: true,
  169. err: transform.ErrShortDst,
  170. nSpan: 2,
  171. errSpan: nil,
  172. }, {
  173. desc: "empty dst ascii",
  174. src: "a",
  175. nBuf: 0,
  176. dst: "a",
  177. nDst: 0,
  178. nSrc: 0,
  179. atEOF: true,
  180. err: transform.ErrShortDst,
  181. nSpan: 1,
  182. errSpan: nil,
  183. }, {
  184. desc: "short dst 1",
  185. src: "a\uffe0", // ¢
  186. nBuf: 2,
  187. dst: "a\u00a2", // ¢
  188. nDst: 1,
  189. nSrc: 1,
  190. atEOF: false,
  191. err: transform.ErrShortDst,
  192. nSpan: 1,
  193. errSpan: transform.ErrEndOfSpan,
  194. }, {
  195. desc: "short dst 2",
  196. src: "不夠",
  197. nBuf: 3,
  198. dst: "不夠",
  199. nDst: 3,
  200. nSrc: 3,
  201. atEOF: true,
  202. err: transform.ErrShortDst,
  203. nSpan: 6,
  204. errSpan: nil,
  205. }, {
  206. desc: "short dst fast path",
  207. src: "fast",
  208. nDst: 3,
  209. dst: "fast",
  210. nBuf: 3,
  211. nSrc: 3,
  212. atEOF: true,
  213. err: transform.ErrShortDst,
  214. nSpan: 4,
  215. errSpan: nil,
  216. }, {
  217. desc: "short dst larger buffer",
  218. src: "\uff21" + strings.Repeat("0", 127) + "B",
  219. nBuf: 128,
  220. dst: "A" + strings.Repeat("0", 127) + "B",
  221. nDst: 128,
  222. nSrc: 130,
  223. atEOF: true,
  224. err: transform.ErrShortDst,
  225. nSpan: 0,
  226. errSpan: transform.ErrEndOfSpan,
  227. }, {
  228. desc: "fast path alternation",
  229. src: "fast路徑fast路徑",
  230. nBuf: 20,
  231. dst: "fast路徑fast路徑",
  232. nDst: 20,
  233. nSrc: 20,
  234. atEOF: true,
  235. err: nil,
  236. nSpan: 20,
  237. errSpan: nil,
  238. }} {
  239. tc.doTest(t, Fold)
  240. }
  241. }
  242. func TestWidenSingleRunes(t *testing.T) {
  243. for r := rune(0); r < 0x1FFFF; r++ {
  244. if loSurrogate <= r && r <= hiSurrogate {
  245. continue
  246. }
  247. alt, _ := widenRune(r)
  248. want := string(alt)
  249. got := Widen.String(string(r))
  250. if got != want {
  251. t.Errorf("Widen().String(%U) = %+q; want %+q", r, got, want)
  252. }
  253. }
  254. }
  255. func TestWiden(t *testing.T) {
  256. for _, tc := range []transformTest{{
  257. desc: "empty",
  258. src: "",
  259. nBuf: 10,
  260. dst: "",
  261. nDst: 0,
  262. nSrc: 0,
  263. atEOF: false,
  264. err: nil,
  265. nSpan: 0,
  266. errSpan: nil,
  267. }, {
  268. desc: "short source 1",
  269. src: "a\xc2",
  270. nBuf: 10,
  271. dst: "a\xc2",
  272. nDst: 3,
  273. nSrc: 1,
  274. atEOF: false,
  275. err: transform.ErrShortSrc,
  276. nSpan: 0,
  277. errSpan: transform.ErrEndOfSpan,
  278. }, {
  279. desc: "short source 2",
  280. src: "a\xe0\x80",
  281. nBuf: 10,
  282. dst: "a\xe0\x80",
  283. nDst: 3,
  284. nSrc: 1,
  285. atEOF: false,
  286. err: transform.ErrShortSrc,
  287. nSpan: 0,
  288. errSpan: transform.ErrEndOfSpan,
  289. }, {
  290. desc: "incomplete but terminated source 1",
  291. src: "a\xc2",
  292. nBuf: 10,
  293. dst: "a\xc2",
  294. nDst: 4,
  295. nSrc: 2,
  296. atEOF: true,
  297. err: nil,
  298. nSpan: 0,
  299. errSpan: transform.ErrEndOfSpan,
  300. }, {
  301. desc: "incomplete but terminated source 2",
  302. src: "a\xe0\x80",
  303. nBuf: 10,
  304. dst: "a\xe0\x80",
  305. nDst: 5,
  306. nSrc: 3,
  307. atEOF: true,
  308. err: nil,
  309. nSpan: 0,
  310. errSpan: transform.ErrEndOfSpan,
  311. }, {
  312. desc: "short source 1 some span",
  313. src: "a\xc2",
  314. nBuf: 10,
  315. dst: "a\xc2",
  316. nDst: 3,
  317. nSrc: 3,
  318. atEOF: false,
  319. err: transform.ErrShortSrc,
  320. nSpan: 3,
  321. errSpan: transform.ErrShortSrc,
  322. }, {
  323. desc: "short source 2 some span",
  324. src: "a\xe0\x80",
  325. nBuf: 10,
  326. dst: "a\xe0\x80",
  327. nDst: 3,
  328. nSrc: 3,
  329. atEOF: false,
  330. err: transform.ErrShortSrc,
  331. nSpan: 3,
  332. errSpan: transform.ErrShortSrc,
  333. }, {
  334. desc: "incomplete but terminated source 1 some span",
  335. src: "a\xc2",
  336. nBuf: 10,
  337. dst: "a\xc2",
  338. nDst: 4,
  339. nSrc: 4,
  340. atEOF: true,
  341. err: nil,
  342. nSpan: 4,
  343. errSpan: nil,
  344. }, {
  345. desc: "incomplete but terminated source 2 some span",
  346. src: "a\xe0\x80",
  347. nBuf: 10,
  348. dst: "a\xe0\x80",
  349. nDst: 5,
  350. nSrc: 5,
  351. atEOF: true,
  352. err: nil,
  353. nSpan: 5,
  354. errSpan: nil,
  355. }, {
  356. desc: "exact fit dst",
  357. src: "a!",
  358. nBuf: 6,
  359. dst: "a\uff01",
  360. nDst: 6,
  361. nSrc: 2,
  362. atEOF: false,
  363. err: nil,
  364. nSpan: 0,
  365. errSpan: transform.ErrEndOfSpan,
  366. }, {
  367. desc: "empty dst",
  368. src: "\u0300",
  369. nBuf: 0,
  370. dst: "\u0300",
  371. nDst: 0,
  372. nSrc: 0,
  373. atEOF: true,
  374. err: transform.ErrShortDst,
  375. nSpan: 2,
  376. errSpan: nil,
  377. }, {
  378. desc: "empty dst ascii",
  379. src: "a",
  380. nBuf: 0,
  381. dst: "a",
  382. nDst: 0,
  383. nSrc: 0,
  384. atEOF: true,
  385. err: transform.ErrShortDst,
  386. nSpan: 0,
  387. errSpan: transform.ErrEndOfSpan,
  388. }, {
  389. desc: "short dst 1",
  390. src: "a\uffe0",
  391. nBuf: 4,
  392. dst: "a\uffe0",
  393. nDst: 3,
  394. nSrc: 1,
  395. atEOF: false,
  396. err: transform.ErrShortDst,
  397. nSpan: 0,
  398. errSpan: transform.ErrEndOfSpan,
  399. }, {
  400. desc: "short dst 2",
  401. src: "不夠",
  402. nBuf: 3,
  403. dst: "不夠",
  404. nDst: 3,
  405. nSrc: 3,
  406. atEOF: true,
  407. err: transform.ErrShortDst,
  408. nSpan: 6,
  409. errSpan: nil,
  410. }, {
  411. desc: "short dst ascii",
  412. src: "ascii",
  413. nBuf: 3,
  414. dst: "ascii", // U+ff41, ...
  415. nDst: 3,
  416. nSrc: 1,
  417. atEOF: true,
  418. err: transform.ErrShortDst,
  419. nSpan: 0,
  420. errSpan: transform.ErrEndOfSpan,
  421. }, {
  422. desc: "ambiguous",
  423. src: "\uffe9",
  424. nBuf: 4,
  425. dst: "\u2190",
  426. nDst: 3,
  427. nSrc: 3,
  428. atEOF: false,
  429. err: nil,
  430. nSpan: 0,
  431. errSpan: transform.ErrEndOfSpan,
  432. }} {
  433. tc.doTest(t, Widen)
  434. }
  435. }
  436. func TestNarrowSingleRunes(t *testing.T) {
  437. for r := rune(0); r < 0x1FFFF; r++ {
  438. if loSurrogate <= r && r <= hiSurrogate {
  439. continue
  440. }
  441. alt, _ := narrowRune(r)
  442. want := string(alt)
  443. got := Narrow.String(string(r))
  444. if got != want {
  445. t.Errorf("Narrow().String(%U) = %+q; want %+q", r, got, want)
  446. }
  447. }
  448. }
  449. func TestNarrow(t *testing.T) {
  450. for _, tc := range []transformTest{{
  451. desc: "empty",
  452. src: "",
  453. nBuf: 10,
  454. dst: "",
  455. nDst: 0,
  456. nSrc: 0,
  457. atEOF: false,
  458. err: nil,
  459. nSpan: 0,
  460. errSpan: nil,
  461. }, {
  462. desc: "short source 1",
  463. src: "a\xc2",
  464. nBuf: 10,
  465. dst: "a\xc2",
  466. nDst: 1,
  467. nSrc: 1,
  468. atEOF: false,
  469. err: transform.ErrShortSrc,
  470. nSpan: 1,
  471. errSpan: transform.ErrShortSrc,
  472. }, {
  473. desc: "short source 2",
  474. src: "a\xe0\x80",
  475. nBuf: 10,
  476. dst: "a\xe0\x80",
  477. nDst: 1,
  478. nSrc: 3,
  479. atEOF: false,
  480. err: transform.ErrShortSrc,
  481. nSpan: 0,
  482. errSpan: transform.ErrEndOfSpan,
  483. }, {
  484. desc: "incomplete but terminated source 1",
  485. src: "a\xc2",
  486. nBuf: 10,
  487. dst: "a\xc2",
  488. nDst: 2,
  489. nSrc: 4,
  490. atEOF: true,
  491. err: nil,
  492. nSpan: 0,
  493. errSpan: transform.ErrEndOfSpan,
  494. }, {
  495. desc: "incomplete but terminated source 2",
  496. src: "a\xe0\x80",
  497. nBuf: 10,
  498. dst: "a\xe0\x80",
  499. nDst: 3,
  500. nSrc: 5,
  501. atEOF: true,
  502. err: nil,
  503. nSpan: 0,
  504. errSpan: transform.ErrEndOfSpan,
  505. }, {
  506. desc: "exact fit dst",
  507. src: "a\uff01",
  508. nBuf: 2,
  509. dst: "a!",
  510. nDst: 2,
  511. nSrc: 6,
  512. atEOF: false,
  513. err: nil,
  514. nSpan: 0,
  515. errSpan: transform.ErrEndOfSpan,
  516. }, {
  517. desc: "exact fit dst some span",
  518. src: "a\uff01",
  519. nBuf: 2,
  520. dst: "a!",
  521. nDst: 2,
  522. nSrc: 4,
  523. atEOF: false,
  524. err: nil,
  525. nSpan: 1,
  526. errSpan: transform.ErrEndOfSpan,
  527. }, {
  528. desc: "empty dst",
  529. src: "\u0300",
  530. nBuf: 0,
  531. dst: "\u0300",
  532. nDst: 0,
  533. nSrc: 0,
  534. atEOF: true,
  535. err: transform.ErrShortDst,
  536. nSpan: 2,
  537. errSpan: nil,
  538. }, {
  539. desc: "empty dst ascii",
  540. src: "a",
  541. nBuf: 0,
  542. dst: "a",
  543. nDst: 0,
  544. nSrc: 0,
  545. atEOF: true,
  546. err: transform.ErrShortDst,
  547. nSpan: 1,
  548. errSpan: nil,
  549. }, {
  550. desc: "short dst 1",
  551. src: "a\uffe0", // ¢
  552. nBuf: 2,
  553. dst: "a\u00a2", // ¢
  554. nDst: 1,
  555. nSrc: 3,
  556. atEOF: false,
  557. err: transform.ErrShortDst,
  558. nSpan: 0,
  559. errSpan: transform.ErrEndOfSpan,
  560. }, {
  561. desc: "short dst 2",
  562. src: "不夠",
  563. nBuf: 3,
  564. dst: "不夠",
  565. nDst: 3,
  566. nSrc: 3,
  567. atEOF: true,
  568. err: transform.ErrShortDst,
  569. nSpan: 6,
  570. errSpan: nil,
  571. }, {
  572. // Create a narrow variant of ambiguous runes, if they exist.
  573. desc: "ambiguous",
  574. src: "\u2190",
  575. nBuf: 4,
  576. dst: "\uffe9",
  577. nDst: 3,
  578. nSrc: 3,
  579. atEOF: false,
  580. err: nil,
  581. nSpan: 0,
  582. errSpan: transform.ErrEndOfSpan,
  583. }, {
  584. desc: "short dst fast path",
  585. src: "fast",
  586. nBuf: 3,
  587. dst: "fast",
  588. nDst: 3,
  589. nSrc: 3,
  590. atEOF: true,
  591. err: transform.ErrShortDst,
  592. nSpan: 4,
  593. errSpan: nil,
  594. }, {
  595. desc: "short dst larger buffer",
  596. src: "\uff21" + strings.Repeat("0", 127) + "B",
  597. nBuf: 128,
  598. dst: "A" + strings.Repeat("0", 127) + "B",
  599. nDst: 128,
  600. nSrc: 130,
  601. atEOF: true,
  602. err: transform.ErrShortDst,
  603. nSpan: 0,
  604. errSpan: transform.ErrEndOfSpan,
  605. }, {
  606. desc: "fast path alternation",
  607. src: "fast路徑fast路徑",
  608. nBuf: 20,
  609. dst: "fast路徑fast路徑",
  610. nDst: 20,
  611. nSrc: 20,
  612. atEOF: true,
  613. err: nil,
  614. nSpan: 20,
  615. errSpan: nil,
  616. }} {
  617. tc.doTest(t, Narrow)
  618. }
  619. }
  620. func bench(b *testing.B, t Transformer, s string) {
  621. dst := make([]byte, 1024)
  622. src := []byte(s)
  623. b.SetBytes(int64(len(src)))
  624. b.ResetTimer()
  625. for i := 0; i < b.N; i++ {
  626. t.Transform(dst, src, true)
  627. }
  628. }
  629. func changingRunes(f func(r rune) (rune, bool)) string {
  630. buf := &bytes.Buffer{}
  631. for r := rune(0); r <= 0xFFFF; r++ {
  632. if _, ok := foldRune(r); ok {
  633. buf.WriteRune(r)
  634. }
  635. }
  636. return buf.String()
  637. }
  638. func BenchmarkFoldASCII(b *testing.B) {
  639. bench(b, Fold, testtext.ASCII)
  640. }
  641. func BenchmarkFoldCJK(b *testing.B) {
  642. bench(b, Fold, testtext.CJK)
  643. }
  644. func BenchmarkFoldNonCanonical(b *testing.B) {
  645. bench(b, Fold, changingRunes(foldRune))
  646. }
  647. func BenchmarkFoldOther(b *testing.B) {
  648. bench(b, Fold, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
  649. }
  650. func BenchmarkWideASCII(b *testing.B) {
  651. bench(b, Widen, testtext.ASCII)
  652. }
  653. func BenchmarkWideCJK(b *testing.B) {
  654. bench(b, Widen, testtext.CJK)
  655. }
  656. func BenchmarkWideNonCanonical(b *testing.B) {
  657. bench(b, Widen, changingRunes(widenRune))
  658. }
  659. func BenchmarkWideOther(b *testing.B) {
  660. bench(b, Widen, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
  661. }
  662. func BenchmarkNarrowASCII(b *testing.B) {
  663. bench(b, Narrow, testtext.ASCII)
  664. }
  665. func BenchmarkNarrowCJK(b *testing.B) {
  666. bench(b, Narrow, testtext.CJK)
  667. }
  668. func BenchmarkNarrowNonCanonical(b *testing.B) {
  669. bench(b, Narrow, changingRunes(narrowRune))
  670. }
  671. func BenchmarkNarrowOther(b *testing.B) {
  672. bench(b, Narrow, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
  673. }