cond_test.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package runes
  5. import (
  6. "strings"
  7. "testing"
  8. "unicode"
  9. "golang.org/x/text/cases"
  10. "golang.org/x/text/language"
  11. "golang.org/x/text/transform"
  12. )
  13. var (
  14. toUpper = cases.Upper(language.Und)
  15. toLower = cases.Lower(language.Und)
  16. )
  17. type spanformer interface {
  18. transform.SpanningTransformer
  19. }
  20. func TestPredicate(t *testing.T) {
  21. testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
  22. return If(Predicate(func(r rune) bool {
  23. return unicode.Is(rt, r)
  24. }), t, f)
  25. })
  26. }
  27. func TestIn(t *testing.T) {
  28. testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
  29. return If(In(rt), t, f)
  30. })
  31. }
  32. func TestNotIn(t *testing.T) {
  33. testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
  34. return If(NotIn(rt), f, t)
  35. })
  36. }
  37. func testConditional(t *testing.T, f func(rt *unicode.RangeTable, t, f spanformer) spanformer) {
  38. lower := f(unicode.Latin, toLower, toLower)
  39. for i, tt := range []transformTest{{
  40. desc: "empty",
  41. szDst: large,
  42. atEOF: true,
  43. in: "",
  44. out: "",
  45. outFull: "",
  46. t: lower,
  47. }, {
  48. desc: "small",
  49. szDst: 1,
  50. atEOF: true,
  51. in: "B",
  52. out: "b",
  53. outFull: "b",
  54. errSpan: transform.ErrEndOfSpan,
  55. t: lower,
  56. }, {
  57. desc: "short dst",
  58. szDst: 2,
  59. atEOF: true,
  60. in: "AAA",
  61. out: "aa",
  62. outFull: "aaa",
  63. err: transform.ErrShortDst,
  64. errSpan: transform.ErrEndOfSpan,
  65. t: lower,
  66. }, {
  67. desc: "short dst writing error",
  68. szDst: 1,
  69. atEOF: false,
  70. in: "A\x80",
  71. out: "a",
  72. outFull: "a\x80",
  73. err: transform.ErrShortDst,
  74. errSpan: transform.ErrEndOfSpan,
  75. t: lower,
  76. }, {
  77. desc: "short dst writing incomplete rune",
  78. szDst: 2,
  79. atEOF: true,
  80. in: "Σ\xc2",
  81. out: "Σ",
  82. outFull: "Σ\xc2",
  83. err: transform.ErrShortDst,
  84. t: f(unicode.Latin, toLower, nil),
  85. }, {
  86. desc: "short dst, longer",
  87. szDst: 5,
  88. atEOF: true,
  89. in: "Hellø",
  90. out: "Hell",
  91. outFull: "Hellø",
  92. err: transform.ErrShortDst,
  93. // idem is used to test short buffers by forcing processing of full-rune increments.
  94. t: f(unicode.Latin, Map(idem), nil),
  95. }, {
  96. desc: "short dst, longer, writing error",
  97. szDst: 6,
  98. atEOF: false,
  99. in: "\x80Hello\x80",
  100. out: "\x80Hello",
  101. outFull: "\x80Hello\x80",
  102. err: transform.ErrShortDst,
  103. t: f(unicode.Latin, Map(idem), nil),
  104. }, {
  105. desc: "short src",
  106. szDst: 2,
  107. atEOF: false,
  108. in: "A\xc2",
  109. out: "a",
  110. outFull: "a\xc2",
  111. err: transform.ErrShortSrc,
  112. errSpan: transform.ErrEndOfSpan,
  113. t: lower,
  114. }, {
  115. desc: "short src no change",
  116. szDst: 2,
  117. atEOF: false,
  118. in: "a\xc2",
  119. out: "a",
  120. outFull: "a\xc2",
  121. err: transform.ErrShortSrc,
  122. errSpan: transform.ErrShortSrc,
  123. nSpan: 1,
  124. t: lower,
  125. }, {
  126. desc: "invalid input, atEOF",
  127. szDst: large,
  128. atEOF: true,
  129. in: "\x80",
  130. out: "\x80",
  131. outFull: "\x80",
  132. t: lower,
  133. }, {
  134. desc: "invalid input, !atEOF",
  135. szDst: large,
  136. atEOF: false,
  137. in: "\x80",
  138. out: "\x80",
  139. outFull: "\x80",
  140. t: lower,
  141. }, {
  142. desc: "invalid input, incomplete rune atEOF",
  143. szDst: large,
  144. atEOF: true,
  145. in: "\xc2",
  146. out: "\xc2",
  147. outFull: "\xc2",
  148. t: lower,
  149. }, {
  150. desc: "nop",
  151. szDst: large,
  152. atEOF: true,
  153. in: "Hello World!",
  154. out: "Hello World!",
  155. outFull: "Hello World!",
  156. t: f(unicode.Latin, nil, nil),
  157. }, {
  158. desc: "nop in",
  159. szDst: large,
  160. atEOF: true,
  161. in: "THIS IS α ΤΕΣΤ",
  162. out: "this is α ΤΕΣΤ",
  163. outFull: "this is α ΤΕΣΤ",
  164. errSpan: transform.ErrEndOfSpan,
  165. t: f(unicode.Greek, nil, toLower),
  166. }, {
  167. desc: "nop in latin",
  168. szDst: large,
  169. atEOF: true,
  170. in: "THIS IS α ΤΕΣΤ",
  171. out: "THIS IS α τεστ",
  172. outFull: "THIS IS α τεστ",
  173. errSpan: transform.ErrEndOfSpan,
  174. t: f(unicode.Latin, nil, toLower),
  175. }, {
  176. desc: "nop not in",
  177. szDst: large,
  178. atEOF: true,
  179. in: "THIS IS α ΤΕΣΤ",
  180. out: "this is α ΤΕΣΤ",
  181. outFull: "this is α ΤΕΣΤ",
  182. errSpan: transform.ErrEndOfSpan,
  183. t: f(unicode.Latin, toLower, nil),
  184. }, {
  185. desc: "pass atEOF is true when at end",
  186. szDst: large,
  187. atEOF: true,
  188. in: "hello",
  189. out: "HELLO",
  190. outFull: "HELLO",
  191. errSpan: transform.ErrEndOfSpan,
  192. t: f(unicode.Latin, upperAtEOF{}, nil),
  193. }, {
  194. desc: "pass atEOF is true when at end of segment",
  195. szDst: large,
  196. atEOF: true,
  197. in: "hello ",
  198. out: "HELLO ",
  199. outFull: "HELLO ",
  200. errSpan: transform.ErrEndOfSpan,
  201. t: f(unicode.Latin, upperAtEOF{}, nil),
  202. }, {
  203. desc: "don't pass atEOF is true when atEOF is false",
  204. szDst: large,
  205. atEOF: false,
  206. in: "hello",
  207. out: "",
  208. outFull: "HELLO",
  209. err: transform.ErrShortSrc,
  210. errSpan: transform.ErrShortSrc,
  211. t: f(unicode.Latin, upperAtEOF{}, nil),
  212. }, {
  213. desc: "pass atEOF is true when at end, no change",
  214. szDst: large,
  215. atEOF: true,
  216. in: "HELLO",
  217. out: "HELLO",
  218. outFull: "HELLO",
  219. t: f(unicode.Latin, upperAtEOF{}, nil),
  220. }, {
  221. desc: "pass atEOF is true when at end of segment, no change",
  222. szDst: large,
  223. atEOF: true,
  224. in: "HELLO ",
  225. out: "HELLO ",
  226. outFull: "HELLO ",
  227. t: f(unicode.Latin, upperAtEOF{}, nil),
  228. }, {
  229. desc: "large input ASCII",
  230. szDst: 12000,
  231. atEOF: false,
  232. in: strings.Repeat("HELLO", 2000),
  233. out: strings.Repeat("hello", 2000),
  234. outFull: strings.Repeat("hello", 2000),
  235. errSpan: transform.ErrEndOfSpan,
  236. err: nil,
  237. t: lower,
  238. }, {
  239. desc: "large input non-ASCII",
  240. szDst: 12000,
  241. atEOF: false,
  242. in: strings.Repeat("\u3333", 2000),
  243. out: strings.Repeat("\u3333", 2000),
  244. outFull: strings.Repeat("\u3333", 2000),
  245. err: nil,
  246. t: lower,
  247. }} {
  248. tt.check(t, i)
  249. }
  250. }
  251. // upperAtEOF is a strange Transformer that converts text to uppercase, but only
  252. // if atEOF is true.
  253. type upperAtEOF struct{ transform.NopResetter }
  254. func (upperAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  255. if !atEOF {
  256. return 0, 0, transform.ErrShortSrc
  257. }
  258. return toUpper.Transform(dst, src, atEOF)
  259. }
  260. func (upperAtEOF) Span(src []byte, atEOF bool) (n int, err error) {
  261. if !atEOF {
  262. return 0, transform.ErrShortSrc
  263. }
  264. return toUpper.Span(src, atEOF)
  265. }
  266. func BenchmarkConditional(b *testing.B) {
  267. doBench(b, If(In(unicode.Hangul), transform.Nop, transform.Nop))
  268. }