gen.go 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. // This program generates table.go and table_test.go.
  7. // Invoke as
  8. //
  9. // go run gen.go |gofmt >table.go
  10. // go run gen.go -test |gofmt >table_test.go
  11. import (
  12. "flag"
  13. "fmt"
  14. "math/rand"
  15. "os"
  16. "sort"
  17. "strings"
  18. )
  19. // identifier converts s to a Go exported identifier.
  20. // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
  21. func identifier(s string) string {
  22. b := make([]byte, 0, len(s))
  23. cap := true
  24. for _, c := range s {
  25. if c == '-' {
  26. cap = true
  27. continue
  28. }
  29. if cap && 'a' <= c && c <= 'z' {
  30. c -= 'a' - 'A'
  31. }
  32. cap = false
  33. b = append(b, byte(c))
  34. }
  35. return string(b)
  36. }
  37. var test = flag.Bool("test", false, "generate table_test.go")
  38. func main() {
  39. flag.Parse()
  40. var all []string
  41. all = append(all, elements...)
  42. all = append(all, attributes...)
  43. all = append(all, eventHandlers...)
  44. all = append(all, extra...)
  45. sort.Strings(all)
  46. if *test {
  47. fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
  48. fmt.Printf("package atom\n\n")
  49. fmt.Printf("var testAtomList = []string{\n")
  50. for _, s := range all {
  51. fmt.Printf("\t%q,\n", s)
  52. }
  53. fmt.Printf("}\n")
  54. return
  55. }
  56. // uniq - lists have dups
  57. // compute max len too
  58. maxLen := 0
  59. w := 0
  60. for _, s := range all {
  61. if w == 0 || all[w-1] != s {
  62. if maxLen < len(s) {
  63. maxLen = len(s)
  64. }
  65. all[w] = s
  66. w++
  67. }
  68. }
  69. all = all[:w]
  70. // Find hash that minimizes table size.
  71. var best *table
  72. for i := 0; i < 1000000; i++ {
  73. if best != nil && 1<<(best.k-1) < len(all) {
  74. break
  75. }
  76. h := rand.Uint32()
  77. for k := uint(0); k <= 16; k++ {
  78. if best != nil && k >= best.k {
  79. break
  80. }
  81. var t table
  82. if t.init(h, k, all) {
  83. best = &t
  84. break
  85. }
  86. }
  87. }
  88. if best == nil {
  89. fmt.Fprintf(os.Stderr, "failed to construct string table\n")
  90. os.Exit(1)
  91. }
  92. // Lay out strings, using overlaps when possible.
  93. layout := append([]string{}, all...)
  94. // Remove strings that are substrings of other strings
  95. for changed := true; changed; {
  96. changed = false
  97. for i, s := range layout {
  98. if s == "" {
  99. continue
  100. }
  101. for j, t := range layout {
  102. if i != j && t != "" && strings.Contains(s, t) {
  103. changed = true
  104. layout[j] = ""
  105. }
  106. }
  107. }
  108. }
  109. // Join strings where one suffix matches another prefix.
  110. for {
  111. // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
  112. // maximizing overlap length k.
  113. besti := -1
  114. bestj := -1
  115. bestk := 0
  116. for i, s := range layout {
  117. if s == "" {
  118. continue
  119. }
  120. for j, t := range layout {
  121. if i == j {
  122. continue
  123. }
  124. for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
  125. if s[len(s)-k:] == t[:k] {
  126. besti = i
  127. bestj = j
  128. bestk = k
  129. }
  130. }
  131. }
  132. }
  133. if bestk > 0 {
  134. layout[besti] += layout[bestj][bestk:]
  135. layout[bestj] = ""
  136. continue
  137. }
  138. break
  139. }
  140. text := strings.Join(layout, "")
  141. atom := map[string]uint32{}
  142. for _, s := range all {
  143. off := strings.Index(text, s)
  144. if off < 0 {
  145. panic("lost string " + s)
  146. }
  147. atom[s] = uint32(off<<8 | len(s))
  148. }
  149. // Generate the Go code.
  150. fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
  151. fmt.Printf("package atom\n\nconst (\n")
  152. for _, s := range all {
  153. fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
  154. }
  155. fmt.Printf(")\n\n")
  156. fmt.Printf("const hash0 = %#x\n\n", best.h0)
  157. fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
  158. fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
  159. for i, s := range best.tab {
  160. if s == "" {
  161. continue
  162. }
  163. fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
  164. }
  165. fmt.Printf("}\n")
  166. datasize := (1 << best.k) * 4
  167. fmt.Printf("const atomText =\n")
  168. textsize := len(text)
  169. for len(text) > 60 {
  170. fmt.Printf("\t%q +\n", text[:60])
  171. text = text[60:]
  172. }
  173. fmt.Printf("\t%q\n\n", text)
  174. fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
  175. }
  176. type byLen []string
  177. func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
  178. func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  179. func (x byLen) Len() int { return len(x) }
  180. // fnv computes the FNV hash with an arbitrary starting value h.
  181. func fnv(h uint32, s string) uint32 {
  182. for i := 0; i < len(s); i++ {
  183. h ^= uint32(s[i])
  184. h *= 16777619
  185. }
  186. return h
  187. }
  188. // A table represents an attempt at constructing the lookup table.
  189. // The lookup table uses cuckoo hashing, meaning that each string
  190. // can be found in one of two positions.
  191. type table struct {
  192. h0 uint32
  193. k uint
  194. mask uint32
  195. tab []string
  196. }
  197. // hash returns the two hashes for s.
  198. func (t *table) hash(s string) (h1, h2 uint32) {
  199. h := fnv(t.h0, s)
  200. h1 = h & t.mask
  201. h2 = (h >> 16) & t.mask
  202. return
  203. }
  204. // init initializes the table with the given parameters.
  205. // h0 is the initial hash value,
  206. // k is the number of bits of hash value to use, and
  207. // x is the list of strings to store in the table.
  208. // init returns false if the table cannot be constructed.
  209. func (t *table) init(h0 uint32, k uint, x []string) bool {
  210. t.h0 = h0
  211. t.k = k
  212. t.tab = make([]string, 1<<k)
  213. t.mask = 1<<k - 1
  214. for _, s := range x {
  215. if !t.insert(s) {
  216. return false
  217. }
  218. }
  219. return true
  220. }
  221. // insert inserts s in the table.
  222. func (t *table) insert(s string) bool {
  223. h1, h2 := t.hash(s)
  224. if t.tab[h1] == "" {
  225. t.tab[h1] = s
  226. return true
  227. }
  228. if t.tab[h2] == "" {
  229. t.tab[h2] = s
  230. return true
  231. }
  232. if t.push(h1, 0) {
  233. t.tab[h1] = s
  234. return true
  235. }
  236. if t.push(h2, 0) {
  237. t.tab[h2] = s
  238. return true
  239. }
  240. return false
  241. }
  242. // push attempts to push aside the entry in slot i.
  243. func (t *table) push(i uint32, depth int) bool {
  244. if depth > len(t.tab) {
  245. return false
  246. }
  247. s := t.tab[i]
  248. h1, h2 := t.hash(s)
  249. j := h1 + h2 - i
  250. if t.tab[j] != "" && !t.push(j, depth+1) {
  251. return false
  252. }
  253. t.tab[j] = s
  254. return true
  255. }
  256. // The lists of element names and attribute keys were taken from
  257. // http://www.whatwg.org/specs/web-apps/current-work/multipage/section-index.html
  258. // as of the "HTML Living Standard - Last Updated 30 May 2012" version.
  259. var elements = []string{
  260. "a",
  261. "abbr",
  262. "address",
  263. "area",
  264. "article",
  265. "aside",
  266. "audio",
  267. "b",
  268. "base",
  269. "bdi",
  270. "bdo",
  271. "blockquote",
  272. "body",
  273. "br",
  274. "button",
  275. "canvas",
  276. "caption",
  277. "cite",
  278. "code",
  279. "col",
  280. "colgroup",
  281. "command",
  282. "data",
  283. "datalist",
  284. "dd",
  285. "del",
  286. "details",
  287. "dfn",
  288. "dialog",
  289. "div",
  290. "dl",
  291. "dt",
  292. "em",
  293. "embed",
  294. "fieldset",
  295. "figcaption",
  296. "figure",
  297. "footer",
  298. "form",
  299. "h1",
  300. "h2",
  301. "h3",
  302. "h4",
  303. "h5",
  304. "h6",
  305. "head",
  306. "header",
  307. "hgroup",
  308. "hr",
  309. "html",
  310. "i",
  311. "iframe",
  312. "img",
  313. "input",
  314. "ins",
  315. "kbd",
  316. "keygen",
  317. "label",
  318. "legend",
  319. "li",
  320. "link",
  321. "map",
  322. "mark",
  323. "menu",
  324. "meta",
  325. "meter",
  326. "nav",
  327. "noscript",
  328. "object",
  329. "ol",
  330. "optgroup",
  331. "option",
  332. "output",
  333. "p",
  334. "param",
  335. "pre",
  336. "progress",
  337. "q",
  338. "rp",
  339. "rt",
  340. "ruby",
  341. "s",
  342. "samp",
  343. "script",
  344. "section",
  345. "select",
  346. "small",
  347. "source",
  348. "span",
  349. "strong",
  350. "style",
  351. "sub",
  352. "summary",
  353. "sup",
  354. "table",
  355. "tbody",
  356. "td",
  357. "textarea",
  358. "tfoot",
  359. "th",
  360. "thead",
  361. "time",
  362. "title",
  363. "tr",
  364. "track",
  365. "u",
  366. "ul",
  367. "var",
  368. "video",
  369. "wbr",
  370. }
  371. var attributes = []string{
  372. "accept",
  373. "accept-charset",
  374. "accesskey",
  375. "action",
  376. "alt",
  377. "async",
  378. "autocomplete",
  379. "autofocus",
  380. "autoplay",
  381. "border",
  382. "challenge",
  383. "charset",
  384. "checked",
  385. "cite",
  386. "class",
  387. "cols",
  388. "colspan",
  389. "command",
  390. "content",
  391. "contenteditable",
  392. "contextmenu",
  393. "controls",
  394. "coords",
  395. "crossorigin",
  396. "data",
  397. "datetime",
  398. "default",
  399. "defer",
  400. "dir",
  401. "dirname",
  402. "disabled",
  403. "download",
  404. "draggable",
  405. "dropzone",
  406. "enctype",
  407. "for",
  408. "form",
  409. "formaction",
  410. "formenctype",
  411. "formmethod",
  412. "formnovalidate",
  413. "formtarget",
  414. "headers",
  415. "height",
  416. "hidden",
  417. "high",
  418. "href",
  419. "hreflang",
  420. "http-equiv",
  421. "icon",
  422. "id",
  423. "inert",
  424. "ismap",
  425. "itemid",
  426. "itemprop",
  427. "itemref",
  428. "itemscope",
  429. "itemtype",
  430. "keytype",
  431. "kind",
  432. "label",
  433. "lang",
  434. "list",
  435. "loop",
  436. "low",
  437. "manifest",
  438. "max",
  439. "maxlength",
  440. "media",
  441. "mediagroup",
  442. "method",
  443. "min",
  444. "multiple",
  445. "muted",
  446. "name",
  447. "novalidate",
  448. "open",
  449. "optimum",
  450. "pattern",
  451. "ping",
  452. "placeholder",
  453. "poster",
  454. "preload",
  455. "radiogroup",
  456. "readonly",
  457. "rel",
  458. "required",
  459. "reversed",
  460. "rows",
  461. "rowspan",
  462. "sandbox",
  463. "spellcheck",
  464. "scope",
  465. "scoped",
  466. "seamless",
  467. "selected",
  468. "shape",
  469. "size",
  470. "sizes",
  471. "span",
  472. "src",
  473. "srcdoc",
  474. "srclang",
  475. "start",
  476. "step",
  477. "style",
  478. "tabindex",
  479. "target",
  480. "title",
  481. "translate",
  482. "type",
  483. "typemustmatch",
  484. "usemap",
  485. "value",
  486. "width",
  487. "wrap",
  488. }
  489. var eventHandlers = []string{
  490. "onabort",
  491. "onafterprint",
  492. "onbeforeprint",
  493. "onbeforeunload",
  494. "onblur",
  495. "oncancel",
  496. "oncanplay",
  497. "oncanplaythrough",
  498. "onchange",
  499. "onclick",
  500. "onclose",
  501. "oncontextmenu",
  502. "oncuechange",
  503. "ondblclick",
  504. "ondrag",
  505. "ondragend",
  506. "ondragenter",
  507. "ondragleave",
  508. "ondragover",
  509. "ondragstart",
  510. "ondrop",
  511. "ondurationchange",
  512. "onemptied",
  513. "onended",
  514. "onerror",
  515. "onfocus",
  516. "onhashchange",
  517. "oninput",
  518. "oninvalid",
  519. "onkeydown",
  520. "onkeypress",
  521. "onkeyup",
  522. "onload",
  523. "onloadeddata",
  524. "onloadedmetadata",
  525. "onloadstart",
  526. "onmessage",
  527. "onmousedown",
  528. "onmousemove",
  529. "onmouseout",
  530. "onmouseover",
  531. "onmouseup",
  532. "onmousewheel",
  533. "onoffline",
  534. "ononline",
  535. "onpagehide",
  536. "onpageshow",
  537. "onpause",
  538. "onplay",
  539. "onplaying",
  540. "onpopstate",
  541. "onprogress",
  542. "onratechange",
  543. "onreset",
  544. "onresize",
  545. "onscroll",
  546. "onseeked",
  547. "onseeking",
  548. "onselect",
  549. "onshow",
  550. "onstalled",
  551. "onstorage",
  552. "onsubmit",
  553. "onsuspend",
  554. "ontimeupdate",
  555. "onunload",
  556. "onvolumechange",
  557. "onwaiting",
  558. }
  559. // extra are ad-hoc values not covered by any of the lists above.
  560. var extra = []string{
  561. "align",
  562. "annotation",
  563. "annotation-xml",
  564. "applet",
  565. "basefont",
  566. "bgsound",
  567. "big",
  568. "blink",
  569. "center",
  570. "color",
  571. "desc",
  572. "face",
  573. "font",
  574. "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
  575. "foreignobject",
  576. "frame",
  577. "frameset",
  578. "image",
  579. "isindex",
  580. "listing",
  581. "malignmark",
  582. "marquee",
  583. "math",
  584. "mglyph",
  585. "mi",
  586. "mn",
  587. "mo",
  588. "ms",
  589. "mtext",
  590. "nobr",
  591. "noembed",
  592. "noframes",
  593. "plaintext",
  594. "prompt",
  595. "public",
  596. "spacer",
  597. "strike",
  598. "svg",
  599. "system",
  600. "tt",
  601. "xmp",
  602. }