context.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package cases
  5. import "golang.org/x/text/transform"
  6. // A context is used for iterating over source bytes, fetching case info and
  7. // writing to a destination buffer.
  8. //
  9. // Casing operations may need more than one rune of context to decide how a rune
  10. // should be cased. Casing implementations should call checkpoint on context
  11. // whenever it is known to be safe to return the runes processed so far.
  12. //
  13. // It is recommended for implementations to not allow for more than 30 case
  14. // ignorables as lookahead (analogous to the limit in norm) and to use state if
  15. // unbounded lookahead is needed for cased runes.
  16. type context struct {
  17. dst, src []byte
  18. atEOF bool
  19. pDst int // pDst points past the last written rune in dst.
  20. pSrc int // pSrc points to the start of the currently scanned rune.
  21. // checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
  22. nDst, nSrc int
  23. err error
  24. sz int // size of current rune
  25. info info // case information of currently scanned rune
  26. // State preserved across calls to Transform.
  27. isMidWord bool // false if next cased letter needs to be title-cased.
  28. }
  29. func (c *context) Reset() {
  30. c.isMidWord = false
  31. }
  32. // ret returns the return values for the Transform method. It checks whether
  33. // there were insufficient bytes in src to complete and introduces an error
  34. // accordingly, if necessary.
  35. func (c *context) ret() (nDst, nSrc int, err error) {
  36. if c.err != nil || c.nSrc == len(c.src) {
  37. return c.nDst, c.nSrc, c.err
  38. }
  39. // This point is only reached by mappers if there was no short destination
  40. // buffer. This means that the source buffer was exhausted and that c.sz was
  41. // set to 0 by next.
  42. if c.atEOF && c.pSrc == len(c.src) {
  43. return c.pDst, c.pSrc, nil
  44. }
  45. return c.nDst, c.nSrc, transform.ErrShortSrc
  46. }
  47. // retSpan returns the return values for the Span method. It checks whether
  48. // there were insufficient bytes in src to complete and introduces an error
  49. // accordingly, if necessary.
  50. func (c *context) retSpan() (n int, err error) {
  51. _, nSrc, err := c.ret()
  52. return nSrc, err
  53. }
  54. // checkpoint sets the return value buffer points for Transform to the current
  55. // positions.
  56. func (c *context) checkpoint() {
  57. if c.err == nil {
  58. c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
  59. }
  60. }
  61. // unreadRune causes the last rune read by next to be reread on the next
  62. // invocation of next. Only one unreadRune may be called after a call to next.
  63. func (c *context) unreadRune() {
  64. c.sz = 0
  65. }
  66. func (c *context) next() bool {
  67. c.pSrc += c.sz
  68. if c.pSrc == len(c.src) || c.err != nil {
  69. c.info, c.sz = 0, 0
  70. return false
  71. }
  72. v, sz := trie.lookup(c.src[c.pSrc:])
  73. c.info, c.sz = info(v), sz
  74. if c.sz == 0 {
  75. if c.atEOF {
  76. // A zero size means we have an incomplete rune. If we are atEOF,
  77. // this means it is an illegal rune, which we will consume one
  78. // byte at a time.
  79. c.sz = 1
  80. } else {
  81. c.err = transform.ErrShortSrc
  82. return false
  83. }
  84. }
  85. return true
  86. }
  87. // writeBytes adds bytes to dst.
  88. func (c *context) writeBytes(b []byte) bool {
  89. if len(c.dst)-c.pDst < len(b) {
  90. c.err = transform.ErrShortDst
  91. return false
  92. }
  93. // This loop is faster than using copy.
  94. for _, ch := range b {
  95. c.dst[c.pDst] = ch
  96. c.pDst++
  97. }
  98. return true
  99. }
  100. // writeString writes the given string to dst.
  101. func (c *context) writeString(s string) bool {
  102. if len(c.dst)-c.pDst < len(s) {
  103. c.err = transform.ErrShortDst
  104. return false
  105. }
  106. // This loop is faster than using copy.
  107. for i := 0; i < len(s); i++ {
  108. c.dst[c.pDst] = s[i]
  109. c.pDst++
  110. }
  111. return true
  112. }
  113. // copy writes the current rune to dst.
  114. func (c *context) copy() bool {
  115. return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
  116. }
  117. // copyXOR copies the current rune to dst and modifies it by applying the XOR
  118. // pattern of the case info. It is the responsibility of the caller to ensure
  119. // that this is a rune with a XOR pattern defined.
  120. func (c *context) copyXOR() bool {
  121. if !c.copy() {
  122. return false
  123. }
  124. if c.info&xorIndexBit == 0 {
  125. // Fast path for 6-bit XOR pattern, which covers most cases.
  126. c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
  127. } else {
  128. // Interpret XOR bits as an index.
  129. // TODO: test performance for unrolling this loop. Verify that we have
  130. // at least two bytes and at most three.
  131. idx := c.info >> xorShift
  132. for p := c.pDst - 1; ; p-- {
  133. c.dst[p] ^= xorData[idx]
  134. idx--
  135. if xorData[idx] == 0 {
  136. break
  137. }
  138. }
  139. }
  140. return true
  141. }
  142. // hasPrefix returns true if src[pSrc:] starts with the given string.
  143. func (c *context) hasPrefix(s string) bool {
  144. b := c.src[c.pSrc:]
  145. if len(b) < len(s) {
  146. return false
  147. }
  148. for i, c := range b[:len(s)] {
  149. if c != s[i] {
  150. return false
  151. }
  152. }
  153. return true
  154. }
  155. // caseType returns an info with only the case bits, normalized to either
  156. // cLower, cUpper, cTitle or cUncased.
  157. func (c *context) caseType() info {
  158. cm := c.info & 0x7
  159. if cm < 4 {
  160. return cm
  161. }
  162. if cm >= cXORCase {
  163. // xor the last bit of the rune with the case type bits.
  164. b := c.src[c.pSrc+c.sz-1]
  165. return info(b&1) ^ cm&0x3
  166. }
  167. if cm == cIgnorableCased {
  168. return cLower
  169. }
  170. return cUncased
  171. }
  172. // lower writes the lowercase version of the current rune to dst.
  173. func lower(c *context) bool {
  174. ct := c.caseType()
  175. if c.info&hasMappingMask == 0 || ct == cLower {
  176. return c.copy()
  177. }
  178. if c.info&exceptionBit == 0 {
  179. return c.copyXOR()
  180. }
  181. e := exceptions[c.info>>exceptionShift:]
  182. offset := 2 + e[0]&lengthMask // size of header + fold string
  183. if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
  184. return c.writeString(e[offset : offset+nLower])
  185. }
  186. return c.copy()
  187. }
  188. func isLower(c *context) bool {
  189. ct := c.caseType()
  190. if c.info&hasMappingMask == 0 || ct == cLower {
  191. return true
  192. }
  193. if c.info&exceptionBit == 0 {
  194. c.err = transform.ErrEndOfSpan
  195. return false
  196. }
  197. e := exceptions[c.info>>exceptionShift:]
  198. if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
  199. c.err = transform.ErrEndOfSpan
  200. return false
  201. }
  202. return true
  203. }
  204. // upper writes the uppercase version of the current rune to dst.
  205. func upper(c *context) bool {
  206. ct := c.caseType()
  207. if c.info&hasMappingMask == 0 || ct == cUpper {
  208. return c.copy()
  209. }
  210. if c.info&exceptionBit == 0 {
  211. return c.copyXOR()
  212. }
  213. e := exceptions[c.info>>exceptionShift:]
  214. offset := 2 + e[0]&lengthMask // size of header + fold string
  215. // Get length of first special case mapping.
  216. n := (e[1] >> lengthBits) & lengthMask
  217. if ct == cTitle {
  218. // The first special case mapping is for lower. Set n to the second.
  219. if n == noChange {
  220. n = 0
  221. }
  222. n, e = e[1]&lengthMask, e[n:]
  223. }
  224. if n != noChange {
  225. return c.writeString(e[offset : offset+n])
  226. }
  227. return c.copy()
  228. }
  229. // isUpper writes the isUppercase version of the current rune to dst.
  230. func isUpper(c *context) bool {
  231. ct := c.caseType()
  232. if c.info&hasMappingMask == 0 || ct == cUpper {
  233. return true
  234. }
  235. if c.info&exceptionBit == 0 {
  236. c.err = transform.ErrEndOfSpan
  237. return false
  238. }
  239. e := exceptions[c.info>>exceptionShift:]
  240. // Get length of first special case mapping.
  241. n := (e[1] >> lengthBits) & lengthMask
  242. if ct == cTitle {
  243. n = e[1] & lengthMask
  244. }
  245. if n != noChange {
  246. c.err = transform.ErrEndOfSpan
  247. return false
  248. }
  249. return true
  250. }
  251. // title writes the title case version of the current rune to dst.
  252. func title(c *context) bool {
  253. ct := c.caseType()
  254. if c.info&hasMappingMask == 0 || ct == cTitle {
  255. return c.copy()
  256. }
  257. if c.info&exceptionBit == 0 {
  258. if ct == cLower {
  259. return c.copyXOR()
  260. }
  261. return c.copy()
  262. }
  263. // Get the exception data.
  264. e := exceptions[c.info>>exceptionShift:]
  265. offset := 2 + e[0]&lengthMask // size of header + fold string
  266. nFirst := (e[1] >> lengthBits) & lengthMask
  267. if nTitle := e[1] & lengthMask; nTitle != noChange {
  268. if nFirst != noChange {
  269. e = e[nFirst:]
  270. }
  271. return c.writeString(e[offset : offset+nTitle])
  272. }
  273. if ct == cLower && nFirst != noChange {
  274. // Use the uppercase version instead.
  275. return c.writeString(e[offset : offset+nFirst])
  276. }
  277. // Already in correct case.
  278. return c.copy()
  279. }
  280. // isTitle reports whether the current rune is in title case.
  281. func isTitle(c *context) bool {
  282. ct := c.caseType()
  283. if c.info&hasMappingMask == 0 || ct == cTitle {
  284. return true
  285. }
  286. if c.info&exceptionBit == 0 {
  287. if ct == cLower {
  288. c.err = transform.ErrEndOfSpan
  289. return false
  290. }
  291. return true
  292. }
  293. // Get the exception data.
  294. e := exceptions[c.info>>exceptionShift:]
  295. if nTitle := e[1] & lengthMask; nTitle != noChange {
  296. c.err = transform.ErrEndOfSpan
  297. return false
  298. }
  299. nFirst := (e[1] >> lengthBits) & lengthMask
  300. if ct == cLower && nFirst != noChange {
  301. c.err = transform.ErrEndOfSpan
  302. return false
  303. }
  304. return true
  305. }
  306. // foldFull writes the foldFull version of the current rune to dst.
  307. func foldFull(c *context) bool {
  308. if c.info&hasMappingMask == 0 {
  309. return c.copy()
  310. }
  311. ct := c.caseType()
  312. if c.info&exceptionBit == 0 {
  313. if ct != cLower || c.info&inverseFoldBit != 0 {
  314. return c.copyXOR()
  315. }
  316. return c.copy()
  317. }
  318. e := exceptions[c.info>>exceptionShift:]
  319. n := e[0] & lengthMask
  320. if n == 0 {
  321. if ct == cLower {
  322. return c.copy()
  323. }
  324. n = (e[1] >> lengthBits) & lengthMask
  325. }
  326. return c.writeString(e[2 : 2+n])
  327. }
  328. // isFoldFull reports whether the current run is mapped to foldFull
  329. func isFoldFull(c *context) bool {
  330. if c.info&hasMappingMask == 0 {
  331. return true
  332. }
  333. ct := c.caseType()
  334. if c.info&exceptionBit == 0 {
  335. if ct != cLower || c.info&inverseFoldBit != 0 {
  336. c.err = transform.ErrEndOfSpan
  337. return false
  338. }
  339. return true
  340. }
  341. e := exceptions[c.info>>exceptionShift:]
  342. n := e[0] & lengthMask
  343. if n == 0 && ct == cLower {
  344. return true
  345. }
  346. c.err = transform.ErrEndOfSpan
  347. return false
  348. }