transform.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package width
  5. import (
  6. "unicode/utf8"
  7. "golang.org/x/text/transform"
  8. )
  9. type foldTransform struct {
  10. transform.NopResetter
  11. }
  12. func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
  13. for n < len(src) {
  14. if src[n] < utf8.RuneSelf {
  15. // ASCII fast path.
  16. for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
  17. }
  18. continue
  19. }
  20. v, size := trie.lookup(src[n:])
  21. if size == 0 { // incomplete UTF-8 encoding
  22. if !atEOF {
  23. err = transform.ErrShortSrc
  24. } else {
  25. n = len(src)
  26. }
  27. break
  28. }
  29. if elem(v)&tagNeedsFold != 0 {
  30. err = transform.ErrEndOfSpan
  31. break
  32. }
  33. n += size
  34. }
  35. return n, err
  36. }
  37. func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  38. for nSrc < len(src) {
  39. if src[nSrc] < utf8.RuneSelf {
  40. // ASCII fast path.
  41. start, end := nSrc, len(src)
  42. if d := len(dst) - nDst; d < end-start {
  43. end = nSrc + d
  44. }
  45. for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
  46. }
  47. n := copy(dst[nDst:], src[start:nSrc])
  48. if nDst += n; nDst == len(dst) {
  49. nSrc = start + n
  50. if nSrc == len(src) {
  51. return nDst, nSrc, nil
  52. }
  53. if src[nSrc] < utf8.RuneSelf {
  54. return nDst, nSrc, transform.ErrShortDst
  55. }
  56. }
  57. continue
  58. }
  59. v, size := trie.lookup(src[nSrc:])
  60. if size == 0 { // incomplete UTF-8 encoding
  61. if !atEOF {
  62. return nDst, nSrc, transform.ErrShortSrc
  63. }
  64. size = 1 // gobble 1 byte
  65. }
  66. if elem(v)&tagNeedsFold == 0 {
  67. if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
  68. return nDst, nSrc, transform.ErrShortDst
  69. }
  70. nDst += size
  71. } else {
  72. data := inverseData[byte(v)]
  73. if len(dst)-nDst < int(data[0]) {
  74. return nDst, nSrc, transform.ErrShortDst
  75. }
  76. i := 1
  77. for end := int(data[0]); i < end; i++ {
  78. dst[nDst] = data[i]
  79. nDst++
  80. }
  81. dst[nDst] = data[i] ^ src[nSrc+size-1]
  82. nDst++
  83. }
  84. nSrc += size
  85. }
  86. return nDst, nSrc, nil
  87. }
  88. type narrowTransform struct {
  89. transform.NopResetter
  90. }
  91. func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
  92. for n < len(src) {
  93. if src[n] < utf8.RuneSelf {
  94. // ASCII fast path.
  95. for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
  96. }
  97. continue
  98. }
  99. v, size := trie.lookup(src[n:])
  100. if size == 0 { // incomplete UTF-8 encoding
  101. if !atEOF {
  102. err = transform.ErrShortSrc
  103. } else {
  104. n = len(src)
  105. }
  106. break
  107. }
  108. if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
  109. } else {
  110. err = transform.ErrEndOfSpan
  111. break
  112. }
  113. n += size
  114. }
  115. return n, err
  116. }
  117. func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  118. for nSrc < len(src) {
  119. if src[nSrc] < utf8.RuneSelf {
  120. // ASCII fast path.
  121. start, end := nSrc, len(src)
  122. if d := len(dst) - nDst; d < end-start {
  123. end = nSrc + d
  124. }
  125. for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
  126. }
  127. n := copy(dst[nDst:], src[start:nSrc])
  128. if nDst += n; nDst == len(dst) {
  129. nSrc = start + n
  130. if nSrc == len(src) {
  131. return nDst, nSrc, nil
  132. }
  133. if src[nSrc] < utf8.RuneSelf {
  134. return nDst, nSrc, transform.ErrShortDst
  135. }
  136. }
  137. continue
  138. }
  139. v, size := trie.lookup(src[nSrc:])
  140. if size == 0 { // incomplete UTF-8 encoding
  141. if !atEOF {
  142. return nDst, nSrc, transform.ErrShortSrc
  143. }
  144. size = 1 // gobble 1 byte
  145. }
  146. if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
  147. if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
  148. return nDst, nSrc, transform.ErrShortDst
  149. }
  150. nDst += size
  151. } else {
  152. data := inverseData[byte(v)]
  153. if len(dst)-nDst < int(data[0]) {
  154. return nDst, nSrc, transform.ErrShortDst
  155. }
  156. i := 1
  157. for end := int(data[0]); i < end; i++ {
  158. dst[nDst] = data[i]
  159. nDst++
  160. }
  161. dst[nDst] = data[i] ^ src[nSrc+size-1]
  162. nDst++
  163. }
  164. nSrc += size
  165. }
  166. return nDst, nSrc, nil
  167. }
  168. type wideTransform struct {
  169. transform.NopResetter
  170. }
  171. func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
  172. for n < len(src) {
  173. // TODO: Consider ASCII fast path. Special-casing ASCII handling can
  174. // reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
  175. // not enough to warrant the extra code and complexity.
  176. v, size := trie.lookup(src[n:])
  177. if size == 0 { // incomplete UTF-8 encoding
  178. if !atEOF {
  179. err = transform.ErrShortSrc
  180. } else {
  181. n = len(src)
  182. }
  183. break
  184. }
  185. if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
  186. } else {
  187. err = transform.ErrEndOfSpan
  188. break
  189. }
  190. n += size
  191. }
  192. return n, err
  193. }
  194. func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  195. for nSrc < len(src) {
  196. // TODO: Consider ASCII fast path. Special-casing ASCII handling can
  197. // reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
  198. // not enough to warrant the extra code and complexity.
  199. v, size := trie.lookup(src[nSrc:])
  200. if size == 0 { // incomplete UTF-8 encoding
  201. if !atEOF {
  202. return nDst, nSrc, transform.ErrShortSrc
  203. }
  204. size = 1 // gobble 1 byte
  205. }
  206. if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
  207. if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
  208. return nDst, nSrc, transform.ErrShortDst
  209. }
  210. nDst += size
  211. } else {
  212. data := inverseData[byte(v)]
  213. if len(dst)-nDst < int(data[0]) {
  214. return nDst, nSrc, transform.ErrShortDst
  215. }
  216. i := 1
  217. for end := int(data[0]); i < end; i++ {
  218. dst[nDst] = data[i]
  219. nDst++
  220. }
  221. dst[nDst] = data[i] ^ src[nSrc+size-1]
  222. nDst++
  223. }
  224. nSrc += size
  225. }
  226. return nDst, nSrc, nil
  227. }