runes_test.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package runes
  5. import (
  6. "strings"
  7. "testing"
  8. "unicode/utf8"
  9. "golang.org/x/text/internal/testtext"
  10. "golang.org/x/text/transform"
  11. )
  12. type transformTest struct {
  13. desc string
  14. szDst int
  15. atEOF bool
  16. repl string
  17. in string
  18. out string // result string of first call to Transform
  19. outFull string // transform of entire input string
  20. err error
  21. errSpan error
  22. nSpan int
  23. t transform.SpanningTransformer
  24. }
  25. const large = 10240
  26. func (tt *transformTest) check(t *testing.T, i int) {
  27. if tt.t == nil {
  28. return
  29. }
  30. dst := make([]byte, tt.szDst)
  31. src := []byte(tt.in)
  32. nDst, nSrc, err := tt.t.Transform(dst, src, tt.atEOF)
  33. if err != tt.err {
  34. t.Errorf("%d:%s:error: got %v; want %v", i, tt.desc, err, tt.err)
  35. }
  36. if got := string(dst[:nDst]); got != tt.out {
  37. t.Errorf("%d:%s:out: got %q; want %q", i, tt.desc, got, tt.out)
  38. }
  39. // Calls tt.t.Transform for the remainder of the input. We use this to test
  40. // the nSrc return value.
  41. out := make([]byte, large)
  42. n := copy(out, dst[:nDst])
  43. nDst, _, _ = tt.t.Transform(out[n:], src[nSrc:], true)
  44. if got, want := string(out[:n+nDst]), tt.outFull; got != want {
  45. t.Errorf("%d:%s:outFull: got %q; want %q", i, tt.desc, got, want)
  46. }
  47. tt.t.Reset()
  48. p := 0
  49. for ; p < len(tt.in) && p < len(tt.outFull) && tt.in[p] == tt.outFull[p]; p++ {
  50. }
  51. if tt.nSpan != 0 {
  52. p = tt.nSpan
  53. }
  54. if n, err = tt.t.Span([]byte(tt.in), tt.atEOF); n != p || err != tt.errSpan {
  55. t.Errorf("%d:%s:span: got %d, %v; want %d, %v", i, tt.desc, n, err, p, tt.errSpan)
  56. }
  57. }
  58. func idem(r rune) rune { return r }
  59. func TestMap(t *testing.T) {
  60. runes := []rune{'a', 'ç', '中', '\U00012345', 'a'}
  61. // Default mapper used for this test.
  62. rotate := Map(func(r rune) rune {
  63. for i, m := range runes {
  64. if m == r {
  65. return runes[i+1]
  66. }
  67. }
  68. return r
  69. })
  70. for i, tt := range []transformTest{{
  71. desc: "empty",
  72. szDst: large,
  73. atEOF: true,
  74. in: "",
  75. out: "",
  76. outFull: "",
  77. t: rotate,
  78. }, {
  79. desc: "no change",
  80. szDst: 1,
  81. atEOF: true,
  82. in: "b",
  83. out: "b",
  84. outFull: "b",
  85. t: rotate,
  86. }, {
  87. desc: "short dst",
  88. szDst: 2,
  89. atEOF: true,
  90. in: "aaaa",
  91. out: "ç",
  92. outFull: "çççç",
  93. err: transform.ErrShortDst,
  94. errSpan: transform.ErrEndOfSpan,
  95. t: rotate,
  96. }, {
  97. desc: "short dst ascii, no change",
  98. szDst: 2,
  99. atEOF: true,
  100. in: "bbb",
  101. out: "bb",
  102. outFull: "bbb",
  103. err: transform.ErrShortDst,
  104. t: rotate,
  105. }, {
  106. desc: "short dst writing error",
  107. szDst: 2,
  108. atEOF: false,
  109. in: "a\x80",
  110. out: "ç",
  111. outFull: "ç\ufffd",
  112. err: transform.ErrShortDst,
  113. errSpan: transform.ErrEndOfSpan,
  114. t: rotate,
  115. }, {
  116. desc: "short dst writing incomplete rune",
  117. szDst: 2,
  118. atEOF: true,
  119. in: "a\xc0",
  120. out: "ç",
  121. outFull: "ç\ufffd",
  122. err: transform.ErrShortDst,
  123. errSpan: transform.ErrEndOfSpan,
  124. t: rotate,
  125. }, {
  126. desc: "short dst, longer",
  127. szDst: 5,
  128. atEOF: true,
  129. in: "Hellø",
  130. out: "Hell",
  131. outFull: "Hellø",
  132. err: transform.ErrShortDst,
  133. t: rotate,
  134. }, {
  135. desc: "short dst, single",
  136. szDst: 1,
  137. atEOF: false,
  138. in: "ø",
  139. out: "",
  140. outFull: "ø",
  141. err: transform.ErrShortDst,
  142. t: Map(idem),
  143. }, {
  144. desc: "short dst, longer, writing error",
  145. szDst: 8,
  146. atEOF: false,
  147. in: "\x80Hello\x80",
  148. out: "\ufffdHello",
  149. outFull: "\ufffdHello\ufffd",
  150. err: transform.ErrShortDst,
  151. errSpan: transform.ErrEndOfSpan,
  152. t: rotate,
  153. }, {
  154. desc: "short src",
  155. szDst: 2,
  156. atEOF: false,
  157. in: "a\xc2",
  158. out: "ç",
  159. outFull: "ç\ufffd",
  160. err: transform.ErrShortSrc,
  161. errSpan: transform.ErrEndOfSpan,
  162. t: rotate,
  163. }, {
  164. desc: "invalid input, atEOF",
  165. szDst: large,
  166. atEOF: true,
  167. in: "\x80",
  168. out: "\ufffd",
  169. outFull: "\ufffd",
  170. errSpan: transform.ErrEndOfSpan,
  171. t: rotate,
  172. }, {
  173. desc: "invalid input, !atEOF",
  174. szDst: large,
  175. atEOF: false,
  176. in: "\x80",
  177. out: "\ufffd",
  178. outFull: "\ufffd",
  179. errSpan: transform.ErrEndOfSpan,
  180. t: rotate,
  181. }, {
  182. desc: "incomplete rune !atEOF",
  183. szDst: large,
  184. atEOF: false,
  185. in: "\xc2",
  186. out: "",
  187. outFull: "\ufffd",
  188. err: transform.ErrShortSrc,
  189. errSpan: transform.ErrShortSrc,
  190. t: rotate,
  191. }, {
  192. desc: "invalid input, incomplete rune atEOF",
  193. szDst: large,
  194. atEOF: true,
  195. in: "\xc2",
  196. out: "\ufffd",
  197. outFull: "\ufffd",
  198. errSpan: transform.ErrEndOfSpan,
  199. t: rotate,
  200. }, {
  201. desc: "misc correct",
  202. szDst: large,
  203. atEOF: true,
  204. in: "a\U00012345 ç!",
  205. out: "ça 中!",
  206. outFull: "ça 中!",
  207. errSpan: transform.ErrEndOfSpan,
  208. t: rotate,
  209. }, {
  210. desc: "misc correct and invalid",
  211. szDst: large,
  212. atEOF: true,
  213. in: "Hello\x80 w\x80orl\xc0d!\xc0",
  214. out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
  215. outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
  216. errSpan: transform.ErrEndOfSpan,
  217. t: rotate,
  218. }, {
  219. desc: "misc correct and invalid, short src",
  220. szDst: large,
  221. atEOF: false,
  222. in: "Hello\x80 w\x80orl\xc0d!\xc2",
  223. out: "Hello\ufffd w\ufffdorl\ufffdd!",
  224. outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
  225. err: transform.ErrShortSrc,
  226. errSpan: transform.ErrEndOfSpan,
  227. t: rotate,
  228. }, {
  229. desc: "misc correct and invalid, short src, replacing RuneError",
  230. szDst: large,
  231. atEOF: false,
  232. in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
  233. out: "Hel?lo? w?orl?d!",
  234. outFull: "Hel?lo? w?orl?d!?",
  235. errSpan: transform.ErrEndOfSpan,
  236. err: transform.ErrShortSrc,
  237. t: Map(func(r rune) rune {
  238. if r == utf8.RuneError {
  239. return '?'
  240. }
  241. return r
  242. }),
  243. }} {
  244. tt.check(t, i)
  245. }
  246. }
  247. func TestRemove(t *testing.T) {
  248. remove := Remove(Predicate(func(r rune) bool {
  249. return strings.ContainsRune("aeiou\u0300\uFF24\U00012345", r)
  250. }))
  251. for i, tt := range []transformTest{
  252. 0: {
  253. szDst: large,
  254. atEOF: true,
  255. in: "",
  256. out: "",
  257. outFull: "",
  258. t: remove,
  259. },
  260. 1: {
  261. szDst: 0,
  262. atEOF: true,
  263. in: "aaaa",
  264. out: "",
  265. outFull: "",
  266. errSpan: transform.ErrEndOfSpan,
  267. t: remove,
  268. },
  269. 2: {
  270. szDst: 1,
  271. atEOF: true,
  272. in: "aaaa",
  273. out: "",
  274. outFull: "",
  275. errSpan: transform.ErrEndOfSpan,
  276. t: remove,
  277. },
  278. 3: {
  279. szDst: 1,
  280. atEOF: true,
  281. in: "baaaa",
  282. out: "b",
  283. outFull: "b",
  284. errSpan: transform.ErrEndOfSpan,
  285. t: remove,
  286. },
  287. 4: {
  288. szDst: 2,
  289. atEOF: true,
  290. in: "açaaa",
  291. out: "ç",
  292. outFull: "ç",
  293. errSpan: transform.ErrEndOfSpan,
  294. t: remove,
  295. },
  296. 5: {
  297. szDst: 2,
  298. atEOF: true,
  299. in: "aaaç",
  300. out: "ç",
  301. outFull: "ç",
  302. errSpan: transform.ErrEndOfSpan,
  303. t: remove,
  304. },
  305. 6: {
  306. szDst: 2,
  307. atEOF: false,
  308. in: "a\x80",
  309. out: "",
  310. outFull: "\ufffd",
  311. err: transform.ErrShortDst,
  312. errSpan: transform.ErrEndOfSpan,
  313. t: remove,
  314. },
  315. 7: {
  316. szDst: 1,
  317. atEOF: true,
  318. in: "a\xc0",
  319. out: "",
  320. outFull: "\ufffd",
  321. err: transform.ErrShortDst,
  322. errSpan: transform.ErrEndOfSpan,
  323. t: remove,
  324. },
  325. 8: {
  326. szDst: 1,
  327. atEOF: false,
  328. in: "a\xc2",
  329. out: "",
  330. outFull: "\ufffd",
  331. err: transform.ErrShortSrc,
  332. errSpan: transform.ErrEndOfSpan,
  333. t: remove,
  334. },
  335. 9: {
  336. szDst: large,
  337. atEOF: true,
  338. in: "\x80",
  339. out: "\ufffd",
  340. outFull: "\ufffd",
  341. errSpan: transform.ErrEndOfSpan,
  342. t: remove,
  343. },
  344. 10: {
  345. szDst: large,
  346. atEOF: false,
  347. in: "\x80",
  348. out: "\ufffd",
  349. outFull: "\ufffd",
  350. errSpan: transform.ErrEndOfSpan,
  351. t: remove,
  352. },
  353. 11: {
  354. szDst: large,
  355. atEOF: true,
  356. in: "\xc2",
  357. out: "\ufffd",
  358. outFull: "\ufffd",
  359. errSpan: transform.ErrEndOfSpan,
  360. t: remove,
  361. },
  362. 12: {
  363. szDst: large,
  364. atEOF: false,
  365. in: "\xc2",
  366. out: "",
  367. outFull: "\ufffd",
  368. err: transform.ErrShortSrc,
  369. errSpan: transform.ErrShortSrc,
  370. t: remove,
  371. },
  372. 13: {
  373. szDst: large,
  374. atEOF: true,
  375. in: "Hello \U00012345world!",
  376. out: "Hll wrld!",
  377. outFull: "Hll wrld!",
  378. errSpan: transform.ErrEndOfSpan,
  379. t: remove,
  380. },
  381. 14: {
  382. szDst: large,
  383. atEOF: true,
  384. in: "Hello\x80 w\x80orl\xc0d!\xc0",
  385. out: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
  386. outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
  387. errSpan: transform.ErrEndOfSpan,
  388. t: remove,
  389. },
  390. 15: {
  391. szDst: large,
  392. atEOF: false,
  393. in: "Hello\x80 w\x80orl\xc0d!\xc2",
  394. out: "Hll\ufffd w\ufffdrl\ufffdd!",
  395. outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
  396. err: transform.ErrShortSrc,
  397. errSpan: transform.ErrEndOfSpan,
  398. t: remove,
  399. },
  400. 16: {
  401. szDst: large,
  402. atEOF: false,
  403. in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
  404. out: "Hello world!",
  405. outFull: "Hello world!",
  406. err: transform.ErrShortSrc,
  407. errSpan: transform.ErrEndOfSpan,
  408. t: Remove(Predicate(func(r rune) bool { return r == utf8.RuneError })),
  409. },
  410. 17: {
  411. szDst: 4,
  412. atEOF: true,
  413. in: "Hellø",
  414. out: "Hll",
  415. outFull: "Hllø",
  416. err: transform.ErrShortDst,
  417. errSpan: transform.ErrEndOfSpan,
  418. t: remove,
  419. },
  420. 18: {
  421. szDst: 4,
  422. atEOF: false,
  423. in: "Hellø",
  424. out: "Hll",
  425. outFull: "Hllø",
  426. err: transform.ErrShortDst,
  427. errSpan: transform.ErrEndOfSpan,
  428. t: remove,
  429. },
  430. 19: {
  431. szDst: 8,
  432. atEOF: false,
  433. in: "\x80Hello\uFF24\x80",
  434. out: "\ufffdHll",
  435. outFull: "\ufffdHll\ufffd",
  436. err: transform.ErrShortDst,
  437. errSpan: transform.ErrEndOfSpan,
  438. t: remove,
  439. },
  440. 20: {
  441. szDst: 8,
  442. atEOF: false,
  443. in: "Hllll",
  444. out: "Hllll",
  445. outFull: "Hllll",
  446. t: remove,
  447. }} {
  448. tt.check(t, i)
  449. }
  450. }
  451. func TestReplaceIllFormed(t *testing.T) {
  452. replace := ReplaceIllFormed()
  453. for i, tt := range []transformTest{
  454. 0: {
  455. szDst: large,
  456. atEOF: true,
  457. in: "",
  458. out: "",
  459. outFull: "",
  460. t: replace,
  461. },
  462. 1: {
  463. szDst: 1,
  464. atEOF: true,
  465. in: "aa",
  466. out: "a",
  467. outFull: "aa",
  468. err: transform.ErrShortDst,
  469. t: replace,
  470. },
  471. 2: {
  472. szDst: 1,
  473. atEOF: true,
  474. in: "a\x80",
  475. out: "a",
  476. outFull: "a\ufffd",
  477. err: transform.ErrShortDst,
  478. errSpan: transform.ErrEndOfSpan,
  479. t: replace,
  480. },
  481. 3: {
  482. szDst: 1,
  483. atEOF: true,
  484. in: "a\xc2",
  485. out: "a",
  486. outFull: "a\ufffd",
  487. err: transform.ErrShortDst,
  488. errSpan: transform.ErrEndOfSpan,
  489. t: replace,
  490. },
  491. 4: {
  492. szDst: large,
  493. atEOF: true,
  494. in: "\x80",
  495. out: "\ufffd",
  496. outFull: "\ufffd",
  497. errSpan: transform.ErrEndOfSpan,
  498. t: replace,
  499. },
  500. 5: {
  501. szDst: large,
  502. atEOF: false,
  503. in: "\x80",
  504. out: "\ufffd",
  505. outFull: "\ufffd",
  506. errSpan: transform.ErrEndOfSpan,
  507. t: replace,
  508. },
  509. 6: {
  510. szDst: large,
  511. atEOF: true,
  512. in: "\xc2",
  513. out: "\ufffd",
  514. outFull: "\ufffd",
  515. errSpan: transform.ErrEndOfSpan,
  516. t: replace,
  517. },
  518. 7: {
  519. szDst: large,
  520. atEOF: false,
  521. in: "\xc2",
  522. out: "",
  523. outFull: "\ufffd",
  524. err: transform.ErrShortSrc,
  525. errSpan: transform.ErrShortSrc,
  526. t: replace,
  527. },
  528. 8: {
  529. szDst: large,
  530. atEOF: true,
  531. in: "Hello world!",
  532. out: "Hello world!",
  533. outFull: "Hello world!",
  534. t: replace,
  535. },
  536. 9: {
  537. szDst: large,
  538. atEOF: true,
  539. in: "Hello\x80 w\x80orl\xc2d!\xc2",
  540. out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
  541. outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
  542. errSpan: transform.ErrEndOfSpan,
  543. t: replace,
  544. },
  545. 10: {
  546. szDst: large,
  547. atEOF: false,
  548. in: "Hello\x80 w\x80orl\xc2d!\xc2",
  549. out: "Hello\ufffd w\ufffdorl\ufffdd!",
  550. outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
  551. err: transform.ErrShortSrc,
  552. errSpan: transform.ErrEndOfSpan,
  553. t: replace,
  554. },
  555. 16: {
  556. szDst: 10,
  557. atEOF: false,
  558. in: "\x80Hello\x80",
  559. out: "\ufffdHello",
  560. outFull: "\ufffdHello\ufffd",
  561. err: transform.ErrShortDst,
  562. errSpan: transform.ErrEndOfSpan,
  563. t: replace,
  564. },
  565. 17: {
  566. szDst: 10,
  567. atEOF: false,
  568. in: "\ufffdHello\ufffd",
  569. out: "\ufffdHello",
  570. outFull: "\ufffdHello\ufffd",
  571. err: transform.ErrShortDst,
  572. t: replace,
  573. },
  574. } {
  575. tt.check(t, i)
  576. }
  577. }
  578. func TestMapAlloc(t *testing.T) {
  579. if n := testtext.AllocsPerRun(3, func() {
  580. Map(idem).Transform(nil, nil, false)
  581. }); n > 0 {
  582. t.Errorf("got %f; want 0", n)
  583. }
  584. }
  585. func rmNop(r rune) bool { return false }
  586. func TestRemoveAlloc(t *testing.T) {
  587. if n := testtext.AllocsPerRun(3, func() {
  588. Remove(Predicate(rmNop)).Transform(nil, nil, false)
  589. }); n > 0 {
  590. t.Errorf("got %f; want 0", n)
  591. }
  592. }
  593. func TestReplaceIllFormedAlloc(t *testing.T) {
  594. if n := testtext.AllocsPerRun(3, func() {
  595. ReplaceIllFormed().Transform(nil, nil, false)
  596. }); n > 0 {
  597. t.Errorf("got %f; want 0", n)
  598. }
  599. }
  600. func doBench(b *testing.B, t Transformer) {
  601. for _, bc := range []struct{ name, data string }{
  602. {"ascii", testtext.ASCII},
  603. {"3byte", testtext.ThreeByteUTF8},
  604. } {
  605. dst := make([]byte, 2*len(bc.data))
  606. src := []byte(bc.data)
  607. testtext.Bench(b, bc.name+"/transform", func(b *testing.B) {
  608. b.SetBytes(int64(len(src)))
  609. for i := 0; i < b.N; i++ {
  610. t.Transform(dst, src, true)
  611. }
  612. })
  613. src = t.Bytes(src)
  614. t.Reset()
  615. testtext.Bench(b, bc.name+"/span", func(b *testing.B) {
  616. b.SetBytes(int64(len(src)))
  617. for i := 0; i < b.N; i++ {
  618. t.Span(src, true)
  619. }
  620. })
  621. }
  622. }
  623. func BenchmarkRemove(b *testing.B) {
  624. doBench(b, Remove(Predicate(func(r rune) bool { return r == 'e' })))
  625. }
  626. func BenchmarkMapAll(b *testing.B) {
  627. doBench(b, Map(func(r rune) rune { return 'a' }))
  628. }
  629. func BenchmarkMapNone(b *testing.B) {
  630. doBench(b, Map(func(r rune) rune { return r }))
  631. }
  632. func BenchmarkReplaceIllFormed(b *testing.B) {
  633. doBench(b, ReplaceIllFormed())
  634. }
  635. var (
  636. input = strings.Repeat("Thé qüick brøwn føx jumps øver the lazy døg. ", 100)
  637. )