data_test.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876
  1. // This file is generated with "go test -tags generate". DO NOT EDIT!
  2. //go:build !generate
  3. // +build !generate
  4. package triegen_test
  5. // lookup returns the trie value for the first UTF-8 encoding in s and
  6. // the width in bytes of this encoding. The size will be 0 if s does not
  7. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  8. func (t *randTrie) lookup(s []byte) (v uint8, sz int) {
  9. c0 := s[0]
  10. switch {
  11. case c0 < 0x80: // is ASCII
  12. return randValues[c0], 1
  13. case c0 < 0xC2:
  14. return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
  15. case c0 < 0xE0: // 2-byte UTF-8
  16. if len(s) < 2 {
  17. return 0, 0
  18. }
  19. i := randIndex[c0]
  20. c1 := s[1]
  21. if c1 < 0x80 || 0xC0 <= c1 {
  22. return 0, 1 // Illegal UTF-8: not a continuation byte.
  23. }
  24. return t.lookupValue(uint32(i), c1), 2
  25. case c0 < 0xF0: // 3-byte UTF-8
  26. if len(s) < 3 {
  27. return 0, 0
  28. }
  29. i := randIndex[c0]
  30. c1 := s[1]
  31. if c1 < 0x80 || 0xC0 <= c1 {
  32. return 0, 1 // Illegal UTF-8: not a continuation byte.
  33. }
  34. o := uint32(i)<<6 + uint32(c1)
  35. i = randIndex[o]
  36. c2 := s[2]
  37. if c2 < 0x80 || 0xC0 <= c2 {
  38. return 0, 2 // Illegal UTF-8: not a continuation byte.
  39. }
  40. return t.lookupValue(uint32(i), c2), 3
  41. case c0 < 0xF8: // 4-byte UTF-8
  42. if len(s) < 4 {
  43. return 0, 0
  44. }
  45. i := randIndex[c0]
  46. c1 := s[1]
  47. if c1 < 0x80 || 0xC0 <= c1 {
  48. return 0, 1 // Illegal UTF-8: not a continuation byte.
  49. }
  50. o := uint32(i)<<6 + uint32(c1)
  51. i = randIndex[o]
  52. c2 := s[2]
  53. if c2 < 0x80 || 0xC0 <= c2 {
  54. return 0, 2 // Illegal UTF-8: not a continuation byte.
  55. }
  56. o = uint32(i)<<6 + uint32(c2)
  57. i = randIndex[o]
  58. c3 := s[3]
  59. if c3 < 0x80 || 0xC0 <= c3 {
  60. return 0, 3 // Illegal UTF-8: not a continuation byte.
  61. }
  62. return t.lookupValue(uint32(i), c3), 4
  63. }
  64. // Illegal rune
  65. return 0, 1
  66. }
  67. // lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
  68. // s must start with a full and valid UTF-8 encoded rune.
  69. func (t *randTrie) lookupUnsafe(s []byte) uint8 {
  70. c0 := s[0]
  71. if c0 < 0x80 { // is ASCII
  72. return randValues[c0]
  73. }
  74. i := randIndex[c0]
  75. if c0 < 0xE0 { // 2-byte UTF-8
  76. return t.lookupValue(uint32(i), s[1])
  77. }
  78. i = randIndex[uint32(i)<<6+uint32(s[1])]
  79. if c0 < 0xF0 { // 3-byte UTF-8
  80. return t.lookupValue(uint32(i), s[2])
  81. }
  82. i = randIndex[uint32(i)<<6+uint32(s[2])]
  83. if c0 < 0xF8 { // 4-byte UTF-8
  84. return t.lookupValue(uint32(i), s[3])
  85. }
  86. return 0
  87. }
  88. // lookupString returns the trie value for the first UTF-8 encoding in s and
  89. // the width in bytes of this encoding. The size will be 0 if s does not
  90. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  91. func (t *randTrie) lookupString(s string) (v uint8, sz int) {
  92. c0 := s[0]
  93. switch {
  94. case c0 < 0x80: // is ASCII
  95. return randValues[c0], 1
  96. case c0 < 0xC2:
  97. return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
  98. case c0 < 0xE0: // 2-byte UTF-8
  99. if len(s) < 2 {
  100. return 0, 0
  101. }
  102. i := randIndex[c0]
  103. c1 := s[1]
  104. if c1 < 0x80 || 0xC0 <= c1 {
  105. return 0, 1 // Illegal UTF-8: not a continuation byte.
  106. }
  107. return t.lookupValue(uint32(i), c1), 2
  108. case c0 < 0xF0: // 3-byte UTF-8
  109. if len(s) < 3 {
  110. return 0, 0
  111. }
  112. i := randIndex[c0]
  113. c1 := s[1]
  114. if c1 < 0x80 || 0xC0 <= c1 {
  115. return 0, 1 // Illegal UTF-8: not a continuation byte.
  116. }
  117. o := uint32(i)<<6 + uint32(c1)
  118. i = randIndex[o]
  119. c2 := s[2]
  120. if c2 < 0x80 || 0xC0 <= c2 {
  121. return 0, 2 // Illegal UTF-8: not a continuation byte.
  122. }
  123. return t.lookupValue(uint32(i), c2), 3
  124. case c0 < 0xF8: // 4-byte UTF-8
  125. if len(s) < 4 {
  126. return 0, 0
  127. }
  128. i := randIndex[c0]
  129. c1 := s[1]
  130. if c1 < 0x80 || 0xC0 <= c1 {
  131. return 0, 1 // Illegal UTF-8: not a continuation byte.
  132. }
  133. o := uint32(i)<<6 + uint32(c1)
  134. i = randIndex[o]
  135. c2 := s[2]
  136. if c2 < 0x80 || 0xC0 <= c2 {
  137. return 0, 2 // Illegal UTF-8: not a continuation byte.
  138. }
  139. o = uint32(i)<<6 + uint32(c2)
  140. i = randIndex[o]
  141. c3 := s[3]
  142. if c3 < 0x80 || 0xC0 <= c3 {
  143. return 0, 3 // Illegal UTF-8: not a continuation byte.
  144. }
  145. return t.lookupValue(uint32(i), c3), 4
  146. }
  147. // Illegal rune
  148. return 0, 1
  149. }
  150. // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
  151. // s must start with a full and valid UTF-8 encoded rune.
  152. func (t *randTrie) lookupStringUnsafe(s string) uint8 {
  153. c0 := s[0]
  154. if c0 < 0x80 { // is ASCII
  155. return randValues[c0]
  156. }
  157. i := randIndex[c0]
  158. if c0 < 0xE0 { // 2-byte UTF-8
  159. return t.lookupValue(uint32(i), s[1])
  160. }
  161. i = randIndex[uint32(i)<<6+uint32(s[1])]
  162. if c0 < 0xF0 { // 3-byte UTF-8
  163. return t.lookupValue(uint32(i), s[2])
  164. }
  165. i = randIndex[uint32(i)<<6+uint32(s[2])]
  166. if c0 < 0xF8 { // 4-byte UTF-8
  167. return t.lookupValue(uint32(i), s[3])
  168. }
  169. return 0
  170. }
  171. // randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f.
  172. type randTrie struct{}
  173. func newRandTrie(i int) *randTrie {
  174. return &randTrie{}
  175. }
  176. // lookupValue determines the type of block n and looks up the value for b.
  177. func (t *randTrie) lookupValue(n uint32, b byte) uint8 {
  178. switch {
  179. default:
  180. return uint8(randValues[n<<6+uint32(b)])
  181. }
  182. }
  183. // randValues: 56 blocks, 3584 entries, 3584 bytes
  184. // The third block is the zero block.
  185. var randValues = [3584]uint8{
  186. // Block 0x0, offset 0x0
  187. // Block 0x1, offset 0x40
  188. // Block 0x2, offset 0x80
  189. // Block 0x3, offset 0xc0
  190. 0xc9: 0x0001,
  191. // Block 0x4, offset 0x100
  192. 0x100: 0x0001,
  193. // Block 0x5, offset 0x140
  194. 0x155: 0x0001,
  195. // Block 0x6, offset 0x180
  196. 0x196: 0x0001,
  197. // Block 0x7, offset 0x1c0
  198. 0x1ef: 0x0001,
  199. // Block 0x8, offset 0x200
  200. 0x206: 0x0001,
  201. // Block 0x9, offset 0x240
  202. 0x258: 0x0001,
  203. // Block 0xa, offset 0x280
  204. 0x288: 0x0001,
  205. // Block 0xb, offset 0x2c0
  206. 0x2f2: 0x0001,
  207. // Block 0xc, offset 0x300
  208. 0x304: 0x0001,
  209. // Block 0xd, offset 0x340
  210. 0x34b: 0x0001,
  211. // Block 0xe, offset 0x380
  212. 0x3ba: 0x0001,
  213. // Block 0xf, offset 0x3c0
  214. 0x3f5: 0x0001,
  215. // Block 0x10, offset 0x400
  216. 0x41d: 0x0001,
  217. // Block 0x11, offset 0x440
  218. 0x442: 0x0001,
  219. // Block 0x12, offset 0x480
  220. 0x4bb: 0x0001,
  221. // Block 0x13, offset 0x4c0
  222. 0x4e9: 0x0001,
  223. // Block 0x14, offset 0x500
  224. 0x53e: 0x0001,
  225. // Block 0x15, offset 0x540
  226. 0x55f: 0x0001,
  227. // Block 0x16, offset 0x580
  228. 0x5b7: 0x0001,
  229. // Block 0x17, offset 0x5c0
  230. 0x5d9: 0x0001,
  231. // Block 0x18, offset 0x600
  232. 0x60e: 0x0001,
  233. // Block 0x19, offset 0x640
  234. 0x652: 0x0001,
  235. // Block 0x1a, offset 0x680
  236. 0x68f: 0x0001,
  237. // Block 0x1b, offset 0x6c0
  238. 0x6dc: 0x0001,
  239. // Block 0x1c, offset 0x700
  240. 0x703: 0x0001,
  241. // Block 0x1d, offset 0x740
  242. 0x741: 0x0001,
  243. // Block 0x1e, offset 0x780
  244. 0x79b: 0x0001,
  245. // Block 0x1f, offset 0x7c0
  246. 0x7f1: 0x0001,
  247. // Block 0x20, offset 0x800
  248. 0x833: 0x0001,
  249. // Block 0x21, offset 0x840
  250. 0x853: 0x0001,
  251. // Block 0x22, offset 0x880
  252. 0x8a2: 0x0001,
  253. // Block 0x23, offset 0x8c0
  254. 0x8f8: 0x0001,
  255. // Block 0x24, offset 0x900
  256. 0x917: 0x0001,
  257. // Block 0x25, offset 0x940
  258. 0x945: 0x0001,
  259. // Block 0x26, offset 0x980
  260. 0x99e: 0x0001,
  261. // Block 0x27, offset 0x9c0
  262. 0x9fd: 0x0001,
  263. // Block 0x28, offset 0xa00
  264. 0xa0d: 0x0001,
  265. // Block 0x29, offset 0xa40
  266. 0xa66: 0x0001,
  267. // Block 0x2a, offset 0xa80
  268. 0xaab: 0x0001,
  269. // Block 0x2b, offset 0xac0
  270. 0xaea: 0x0001,
  271. // Block 0x2c, offset 0xb00
  272. 0xb2d: 0x0001,
  273. // Block 0x2d, offset 0xb40
  274. 0xb54: 0x0001,
  275. // Block 0x2e, offset 0xb80
  276. 0xb90: 0x0001,
  277. // Block 0x2f, offset 0xbc0
  278. 0xbe5: 0x0001,
  279. // Block 0x30, offset 0xc00
  280. 0xc28: 0x0001,
  281. // Block 0x31, offset 0xc40
  282. 0xc7c: 0x0001,
  283. // Block 0x32, offset 0xc80
  284. 0xcbf: 0x0001,
  285. // Block 0x33, offset 0xcc0
  286. 0xcc7: 0x0001,
  287. // Block 0x34, offset 0xd00
  288. 0xd34: 0x0001,
  289. // Block 0x35, offset 0xd40
  290. 0xd61: 0x0001,
  291. // Block 0x36, offset 0xd80
  292. 0xdb9: 0x0001,
  293. // Block 0x37, offset 0xdc0
  294. 0xdda: 0x0001,
  295. }
  296. // randIndex: 89 blocks, 5696 entries, 5696 bytes
  297. // Block 0 is the zero block.
  298. var randIndex = [5696]uint8{
  299. // Block 0x0, offset 0x0
  300. // Block 0x1, offset 0x40
  301. // Block 0x2, offset 0x80
  302. // Block 0x3, offset 0xc0
  303. 0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04,
  304. 0xea: 0x05, 0xeb: 0x06, 0xec: 0x07,
  305. 0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56,
  306. // Block 0x4, offset 0x100
  307. 0x107: 0x01,
  308. // Block 0x5, offset 0x140
  309. 0x16c: 0x02,
  310. // Block 0x6, offset 0x180
  311. 0x19c: 0x03,
  312. 0x1ae: 0x04,
  313. // Block 0x7, offset 0x1c0
  314. 0x1d8: 0x05,
  315. 0x1f7: 0x06,
  316. // Block 0x8, offset 0x200
  317. 0x20c: 0x07,
  318. // Block 0x9, offset 0x240
  319. 0x24a: 0x08,
  320. // Block 0xa, offset 0x280
  321. 0x2b6: 0x09,
  322. // Block 0xb, offset 0x2c0
  323. 0x2d5: 0x0a,
  324. // Block 0xc, offset 0x300
  325. 0x31a: 0x0b,
  326. // Block 0xd, offset 0x340
  327. 0x373: 0x0c,
  328. // Block 0xe, offset 0x380
  329. 0x38b: 0x0d,
  330. // Block 0xf, offset 0x3c0
  331. 0x3f0: 0x0e,
  332. // Block 0x10, offset 0x400
  333. 0x433: 0x0f,
  334. // Block 0x11, offset 0x440
  335. 0x45d: 0x10,
  336. // Block 0x12, offset 0x480
  337. 0x491: 0x08, 0x494: 0x09, 0x497: 0x0a,
  338. 0x49b: 0x0b, 0x49c: 0x0c,
  339. 0x4a1: 0x0d,
  340. 0x4ad: 0x0e,
  341. 0x4ba: 0x0f,
  342. // Block 0x13, offset 0x4c0
  343. 0x4c1: 0x11,
  344. // Block 0x14, offset 0x500
  345. 0x531: 0x12,
  346. // Block 0x15, offset 0x540
  347. 0x546: 0x13,
  348. // Block 0x16, offset 0x580
  349. 0x5ab: 0x14,
  350. // Block 0x17, offset 0x5c0
  351. 0x5d4: 0x11,
  352. 0x5fe: 0x11,
  353. // Block 0x18, offset 0x600
  354. 0x618: 0x0a,
  355. // Block 0x19, offset 0x640
  356. 0x65b: 0x15,
  357. // Block 0x1a, offset 0x680
  358. 0x6a0: 0x16,
  359. // Block 0x1b, offset 0x6c0
  360. 0x6d2: 0x17,
  361. 0x6f6: 0x18,
  362. // Block 0x1c, offset 0x700
  363. 0x711: 0x19,
  364. // Block 0x1d, offset 0x740
  365. 0x768: 0x1a,
  366. // Block 0x1e, offset 0x780
  367. 0x783: 0x1b,
  368. // Block 0x1f, offset 0x7c0
  369. 0x7f9: 0x1c,
  370. // Block 0x20, offset 0x800
  371. 0x831: 0x1d,
  372. // Block 0x21, offset 0x840
  373. 0x85e: 0x1e,
  374. // Block 0x22, offset 0x880
  375. 0x898: 0x1f,
  376. // Block 0x23, offset 0x8c0
  377. 0x8c7: 0x18,
  378. 0x8d5: 0x14,
  379. 0x8f7: 0x20,
  380. 0x8fe: 0x1f,
  381. // Block 0x24, offset 0x900
  382. 0x905: 0x21,
  383. // Block 0x25, offset 0x940
  384. 0x966: 0x03,
  385. // Block 0x26, offset 0x980
  386. 0x981: 0x07, 0x983: 0x11,
  387. 0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15,
  388. 0x992: 0x16, 0x995: 0x17, 0x996: 0x18,
  389. 0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c,
  390. 0x9a3: 0x1d,
  391. 0x9ad: 0x1e, 0x9af: 0x1f,
  392. 0x9b0: 0x20, 0x9b1: 0x21,
  393. 0x9b8: 0x22, 0x9bd: 0x23,
  394. // Block 0x27, offset 0x9c0
  395. 0x9cd: 0x22,
  396. // Block 0x28, offset 0xa00
  397. 0xa0c: 0x08,
  398. // Block 0x29, offset 0xa40
  399. 0xa6f: 0x1c,
  400. // Block 0x2a, offset 0xa80
  401. 0xa90: 0x1a,
  402. 0xaaf: 0x23,
  403. // Block 0x2b, offset 0xac0
  404. 0xae3: 0x19,
  405. 0xae8: 0x24,
  406. 0xafc: 0x25,
  407. // Block 0x2c, offset 0xb00
  408. 0xb13: 0x26,
  409. // Block 0x2d, offset 0xb40
  410. 0xb67: 0x1c,
  411. // Block 0x2e, offset 0xb80
  412. 0xb8f: 0x0b,
  413. // Block 0x2f, offset 0xbc0
  414. 0xbcb: 0x27,
  415. 0xbe7: 0x26,
  416. // Block 0x30, offset 0xc00
  417. 0xc34: 0x16,
  418. // Block 0x31, offset 0xc40
  419. 0xc62: 0x03,
  420. // Block 0x32, offset 0xc80
  421. 0xcbb: 0x12,
  422. // Block 0x33, offset 0xcc0
  423. 0xcdf: 0x09,
  424. // Block 0x34, offset 0xd00
  425. 0xd34: 0x0a,
  426. // Block 0x35, offset 0xd40
  427. 0xd41: 0x1e,
  428. // Block 0x36, offset 0xd80
  429. 0xd83: 0x28,
  430. // Block 0x37, offset 0xdc0
  431. 0xdc0: 0x15,
  432. // Block 0x38, offset 0xe00
  433. 0xe1a: 0x15,
  434. // Block 0x39, offset 0xe40
  435. 0xe65: 0x29,
  436. // Block 0x3a, offset 0xe80
  437. 0xe86: 0x1f,
  438. // Block 0x3b, offset 0xec0
  439. 0xeec: 0x18,
  440. // Block 0x3c, offset 0xf00
  441. 0xf28: 0x2a,
  442. // Block 0x3d, offset 0xf40
  443. 0xf53: 0x08,
  444. // Block 0x3e, offset 0xf80
  445. 0xfa2: 0x2b,
  446. 0xfaa: 0x17,
  447. // Block 0x3f, offset 0xfc0
  448. 0xfc0: 0x25, 0xfc2: 0x26,
  449. 0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29,
  450. 0xfd5: 0x2a,
  451. 0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d,
  452. 0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31,
  453. 0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35,
  454. 0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39,
  455. 0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c,
  456. // Block 0x40, offset 0x1000
  457. 0x102c: 0x2c,
  458. // Block 0x41, offset 0x1040
  459. 0x1074: 0x2c,
  460. // Block 0x42, offset 0x1080
  461. 0x108c: 0x08,
  462. 0x10a0: 0x2d,
  463. // Block 0x43, offset 0x10c0
  464. 0x10e8: 0x10,
  465. // Block 0x44, offset 0x1100
  466. 0x110f: 0x13,
  467. // Block 0x45, offset 0x1140
  468. 0x114b: 0x2e,
  469. // Block 0x46, offset 0x1180
  470. 0x118b: 0x23,
  471. 0x119d: 0x0c,
  472. // Block 0x47, offset 0x11c0
  473. 0x11c3: 0x12,
  474. 0x11f9: 0x0f,
  475. // Block 0x48, offset 0x1200
  476. 0x121e: 0x1b,
  477. // Block 0x49, offset 0x1240
  478. 0x1270: 0x2f,
  479. // Block 0x4a, offset 0x1280
  480. 0x128a: 0x1b,
  481. 0x12a7: 0x02,
  482. // Block 0x4b, offset 0x12c0
  483. 0x12fb: 0x14,
  484. // Block 0x4c, offset 0x1300
  485. 0x1333: 0x30,
  486. // Block 0x4d, offset 0x1340
  487. 0x134d: 0x31,
  488. // Block 0x4e, offset 0x1380
  489. 0x138e: 0x15,
  490. // Block 0x4f, offset 0x13c0
  491. 0x13f4: 0x32,
  492. // Block 0x50, offset 0x1400
  493. 0x141b: 0x33,
  494. // Block 0x51, offset 0x1440
  495. 0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41,
  496. 0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45,
  497. 0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a,
  498. 0x1472: 0x4b, 0x1473: 0x4c,
  499. 0x1479: 0x4d, 0x147b: 0x4e,
  500. // Block 0x52, offset 0x1480
  501. 0x1480: 0x34,
  502. 0x1499: 0x11,
  503. 0x14b6: 0x2c,
  504. // Block 0x53, offset 0x14c0
  505. 0x14e4: 0x0d,
  506. // Block 0x54, offset 0x1500
  507. 0x1527: 0x08,
  508. // Block 0x55, offset 0x1540
  509. 0x1555: 0x2b,
  510. // Block 0x56, offset 0x1580
  511. 0x15b2: 0x35,
  512. // Block 0x57, offset 0x15c0
  513. 0x15f2: 0x1c, 0x15f4: 0x29,
  514. // Block 0x58, offset 0x1600
  515. 0x1600: 0x50, 0x1603: 0x51,
  516. 0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55,
  517. }
  518. // lookup returns the trie value for the first UTF-8 encoding in s and
  519. // the width in bytes of this encoding. The size will be 0 if s does not
  520. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  521. func (t *multiTrie) lookup(s []byte) (v uint64, sz int) {
  522. c0 := s[0]
  523. switch {
  524. case c0 < 0x80: // is ASCII
  525. return t.ascii[c0], 1
  526. case c0 < 0xC2:
  527. return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
  528. case c0 < 0xE0: // 2-byte UTF-8
  529. if len(s) < 2 {
  530. return 0, 0
  531. }
  532. i := t.utf8Start[c0]
  533. c1 := s[1]
  534. if c1 < 0x80 || 0xC0 <= c1 {
  535. return 0, 1 // Illegal UTF-8: not a continuation byte.
  536. }
  537. return t.lookupValue(uint32(i), c1), 2
  538. case c0 < 0xF0: // 3-byte UTF-8
  539. if len(s) < 3 {
  540. return 0, 0
  541. }
  542. i := t.utf8Start[c0]
  543. c1 := s[1]
  544. if c1 < 0x80 || 0xC0 <= c1 {
  545. return 0, 1 // Illegal UTF-8: not a continuation byte.
  546. }
  547. o := uint32(i)<<6 + uint32(c1)
  548. i = multiIndex[o]
  549. c2 := s[2]
  550. if c2 < 0x80 || 0xC0 <= c2 {
  551. return 0, 2 // Illegal UTF-8: not a continuation byte.
  552. }
  553. return t.lookupValue(uint32(i), c2), 3
  554. case c0 < 0xF8: // 4-byte UTF-8
  555. if len(s) < 4 {
  556. return 0, 0
  557. }
  558. i := t.utf8Start[c0]
  559. c1 := s[1]
  560. if c1 < 0x80 || 0xC0 <= c1 {
  561. return 0, 1 // Illegal UTF-8: not a continuation byte.
  562. }
  563. o := uint32(i)<<6 + uint32(c1)
  564. i = multiIndex[o]
  565. c2 := s[2]
  566. if c2 < 0x80 || 0xC0 <= c2 {
  567. return 0, 2 // Illegal UTF-8: not a continuation byte.
  568. }
  569. o = uint32(i)<<6 + uint32(c2)
  570. i = multiIndex[o]
  571. c3 := s[3]
  572. if c3 < 0x80 || 0xC0 <= c3 {
  573. return 0, 3 // Illegal UTF-8: not a continuation byte.
  574. }
  575. return t.lookupValue(uint32(i), c3), 4
  576. }
  577. // Illegal rune
  578. return 0, 1
  579. }
  580. // lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
  581. // s must start with a full and valid UTF-8 encoded rune.
  582. func (t *multiTrie) lookupUnsafe(s []byte) uint64 {
  583. c0 := s[0]
  584. if c0 < 0x80 { // is ASCII
  585. return t.ascii[c0]
  586. }
  587. i := t.utf8Start[c0]
  588. if c0 < 0xE0 { // 2-byte UTF-8
  589. return t.lookupValue(uint32(i), s[1])
  590. }
  591. i = multiIndex[uint32(i)<<6+uint32(s[1])]
  592. if c0 < 0xF0 { // 3-byte UTF-8
  593. return t.lookupValue(uint32(i), s[2])
  594. }
  595. i = multiIndex[uint32(i)<<6+uint32(s[2])]
  596. if c0 < 0xF8 { // 4-byte UTF-8
  597. return t.lookupValue(uint32(i), s[3])
  598. }
  599. return 0
  600. }
  601. // lookupString returns the trie value for the first UTF-8 encoding in s and
  602. // the width in bytes of this encoding. The size will be 0 if s does not
  603. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  604. func (t *multiTrie) lookupString(s string) (v uint64, sz int) {
  605. c0 := s[0]
  606. switch {
  607. case c0 < 0x80: // is ASCII
  608. return t.ascii[c0], 1
  609. case c0 < 0xC2:
  610. return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
  611. case c0 < 0xE0: // 2-byte UTF-8
  612. if len(s) < 2 {
  613. return 0, 0
  614. }
  615. i := t.utf8Start[c0]
  616. c1 := s[1]
  617. if c1 < 0x80 || 0xC0 <= c1 {
  618. return 0, 1 // Illegal UTF-8: not a continuation byte.
  619. }
  620. return t.lookupValue(uint32(i), c1), 2
  621. case c0 < 0xF0: // 3-byte UTF-8
  622. if len(s) < 3 {
  623. return 0, 0
  624. }
  625. i := t.utf8Start[c0]
  626. c1 := s[1]
  627. if c1 < 0x80 || 0xC0 <= c1 {
  628. return 0, 1 // Illegal UTF-8: not a continuation byte.
  629. }
  630. o := uint32(i)<<6 + uint32(c1)
  631. i = multiIndex[o]
  632. c2 := s[2]
  633. if c2 < 0x80 || 0xC0 <= c2 {
  634. return 0, 2 // Illegal UTF-8: not a continuation byte.
  635. }
  636. return t.lookupValue(uint32(i), c2), 3
  637. case c0 < 0xF8: // 4-byte UTF-8
  638. if len(s) < 4 {
  639. return 0, 0
  640. }
  641. i := t.utf8Start[c0]
  642. c1 := s[1]
  643. if c1 < 0x80 || 0xC0 <= c1 {
  644. return 0, 1 // Illegal UTF-8: not a continuation byte.
  645. }
  646. o := uint32(i)<<6 + uint32(c1)
  647. i = multiIndex[o]
  648. c2 := s[2]
  649. if c2 < 0x80 || 0xC0 <= c2 {
  650. return 0, 2 // Illegal UTF-8: not a continuation byte.
  651. }
  652. o = uint32(i)<<6 + uint32(c2)
  653. i = multiIndex[o]
  654. c3 := s[3]
  655. if c3 < 0x80 || 0xC0 <= c3 {
  656. return 0, 3 // Illegal UTF-8: not a continuation byte.
  657. }
  658. return t.lookupValue(uint32(i), c3), 4
  659. }
  660. // Illegal rune
  661. return 0, 1
  662. }
  663. // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
  664. // s must start with a full and valid UTF-8 encoded rune.
  665. func (t *multiTrie) lookupStringUnsafe(s string) uint64 {
  666. c0 := s[0]
  667. if c0 < 0x80 { // is ASCII
  668. return t.ascii[c0]
  669. }
  670. i := t.utf8Start[c0]
  671. if c0 < 0xE0 { // 2-byte UTF-8
  672. return t.lookupValue(uint32(i), s[1])
  673. }
  674. i = multiIndex[uint32(i)<<6+uint32(s[1])]
  675. if c0 < 0xF0 { // 3-byte UTF-8
  676. return t.lookupValue(uint32(i), s[2])
  677. }
  678. i = multiIndex[uint32(i)<<6+uint32(s[2])]
  679. if c0 < 0xF8 { // 4-byte UTF-8
  680. return t.lookupValue(uint32(i), s[3])
  681. }
  682. return 0
  683. }
  684. // multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e.
  685. type multiTrie struct {
  686. ascii []uint64 // index for ASCII bytes
  687. utf8Start []uint8 // index for UTF-8 bytes >= 0xC0
  688. }
  689. func newMultiTrie(i int) *multiTrie {
  690. h := multiTrieHandles[i]
  691. return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]}
  692. }
  693. type multiTrieHandle struct {
  694. ascii, multi uint8
  695. }
  696. // multiTrieHandles: 5 handles, 10 bytes
  697. var multiTrieHandles = [5]multiTrieHandle{
  698. {0, 0}, // 8c1e77823143d35c: all
  699. {0, 23}, // 8fb58ff8243b45b0: ASCII only
  700. {0, 23}, // 8fb58ff8243b45b0: ASCII only 2
  701. {0, 24}, // 2ccc43994f11046f: BMP only
  702. {30, 25}, // ce448591bdcb4733: No BMP
  703. }
  704. // lookupValue determines the type of block n and looks up the value for b.
  705. func (t *multiTrie) lookupValue(n uint32, b byte) uint64 {
  706. switch {
  707. default:
  708. return uint64(multiValues[n<<6+uint32(b)])
  709. }
  710. }
  711. // multiValues: 32 blocks, 2048 entries, 16384 bytes
  712. // The third block is the zero block.
  713. var multiValues = [2048]uint64{
  714. // Block 0x0, offset 0x0
  715. 0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10,
  716. 0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551,
  717. 0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884,
  718. 0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8,
  719. 0x3f: 0x4fd3bcfa72bce8b0,
  720. // Block 0x1, offset 0x40
  721. 0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357,
  722. 0x7f: 0x782caa2d25a418a9,
  723. // Block 0x2, offset 0x80
  724. // Block 0x3, offset 0xc0
  725. 0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4,
  726. // Block 0x4, offset 0x100
  727. 0x13f: 0x56f8c4c82f5962dc,
  728. // Block 0x5, offset 0x140
  729. 0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d,
  730. // Block 0x6, offset 0x180
  731. 0x1bf: 0x7bf4d0ebf302a088,
  732. // Block 0x7, offset 0x1c0
  733. 0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7,
  734. // Block 0x8, offset 0x200
  735. 0x23f: 0x5de81c1dff6bf29d,
  736. // Block 0x9, offset 0x240
  737. 0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3,
  738. // Block 0xa, offset 0x280
  739. 0x2bf: 0x6a28f01979cbf059,
  740. // Block 0xb, offset 0x2c0
  741. 0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c,
  742. // Block 0xc, offset 0x300
  743. 0x33f: 0x5a10ffa9e29184fb,
  744. // Block 0xd, offset 0x340
  745. 0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79,
  746. // Block 0xe, offset 0x380
  747. 0x3bf: 0x74071288fff39c76,
  748. // Block 0xf, offset 0x3c0
  749. 0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849,
  750. // Block 0x10, offset 0x400
  751. 0x43f: 0x5676a62fd49c6bec,
  752. // Block 0x11, offset 0x440
  753. 0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f,
  754. // Block 0x12, offset 0x480
  755. 0x4bf: 0x69d6f0fe711fafc9,
  756. // Block 0x13, offset 0x4c0
  757. 0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02,
  758. // Block 0x14, offset 0x500
  759. 0x53f: 0xe03b31814c95f8b,
  760. // Block 0x15, offset 0x540
  761. 0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc,
  762. // Block 0x16, offset 0x580
  763. 0x5bf: 0x3c02ea92fb168559,
  764. // Block 0x17, offset 0x5c0
  765. 0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645,
  766. // Block 0x18, offset 0x600
  767. 0x63f: 0x3bb2ed2a72748f4b,
  768. // Block 0x19, offset 0x640
  769. 0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6,
  770. // Block 0x1a, offset 0x680
  771. 0x6bf: 0x352711cfb7236418,
  772. // Block 0x1b, offset 0x6c0
  773. 0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1,
  774. // Block 0x1c, offset 0x700
  775. 0x73f: 0x7191a77b28d23110,
  776. // Block 0x1d, offset 0x740
  777. 0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de,
  778. // Block 0x1e, offset 0x780
  779. // Block 0x1f, offset 0x7c0
  780. }
  781. // multiIndex: 29 blocks, 1856 entries, 1856 bytes
  782. // Block 0 is the zero block.
  783. var multiIndex = [1856]uint8{
  784. // Block 0x0, offset 0x0
  785. // Block 0x1, offset 0x40
  786. // Block 0x2, offset 0x80
  787. // Block 0x3, offset 0xc0
  788. 0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04,
  789. 0xc8: 0x05, 0xcf: 0x06,
  790. 0xd0: 0x07,
  791. 0xdf: 0x08,
  792. 0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07,
  793. 0xe8: 0x08, 0xef: 0x09,
  794. 0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17,
  795. // Block 0x4, offset 0x100
  796. 0x120: 0x09,
  797. 0x13f: 0x0a,
  798. // Block 0x5, offset 0x140
  799. 0x140: 0x0b,
  800. 0x17f: 0x0c,
  801. // Block 0x6, offset 0x180
  802. 0x180: 0x0d,
  803. // Block 0x7, offset 0x1c0
  804. 0x1ff: 0x0e,
  805. // Block 0x8, offset 0x200
  806. 0x200: 0x0f,
  807. // Block 0x9, offset 0x240
  808. 0x27f: 0x10,
  809. // Block 0xa, offset 0x280
  810. 0x280: 0x11,
  811. // Block 0xb, offset 0x2c0
  812. 0x2ff: 0x12,
  813. // Block 0xc, offset 0x300
  814. 0x300: 0x13,
  815. // Block 0xd, offset 0x340
  816. 0x37f: 0x14,
  817. // Block 0xe, offset 0x380
  818. 0x380: 0x15,
  819. // Block 0xf, offset 0x3c0
  820. 0x3ff: 0x16,
  821. // Block 0x10, offset 0x400
  822. 0x410: 0x0a,
  823. 0x41f: 0x0b,
  824. 0x420: 0x0c,
  825. 0x43f: 0x0d,
  826. // Block 0x11, offset 0x440
  827. 0x440: 0x17,
  828. // Block 0x12, offset 0x480
  829. 0x4bf: 0x18,
  830. // Block 0x13, offset 0x4c0
  831. 0x4c0: 0x0f,
  832. 0x4ff: 0x10,
  833. // Block 0x14, offset 0x500
  834. 0x500: 0x19,
  835. // Block 0x15, offset 0x540
  836. 0x540: 0x12,
  837. // Block 0x16, offset 0x580
  838. 0x5bf: 0x1a,
  839. // Block 0x17, offset 0x5c0
  840. 0x5ff: 0x14,
  841. // Block 0x18, offset 0x600
  842. 0x600: 0x1b,
  843. // Block 0x19, offset 0x640
  844. 0x640: 0x16,
  845. // Block 0x1a, offset 0x680
  846. // Block 0x1b, offset 0x6c0
  847. 0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04,
  848. 0x6c8: 0x05, 0x6cf: 0x06,
  849. 0x6d0: 0x07,
  850. 0x6df: 0x08,
  851. 0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07,
  852. 0x6e8: 0x08, 0x6ef: 0x09,
  853. // Block 0x1c, offset 0x700
  854. 0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17,
  855. }