123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- //go:build ignore
- // +build ignore
- package main
- import (
- "bytes"
- "encoding/json"
- "fmt"
- "log"
- "strings"
- "golang.org/x/text/internal/gen"
- )
- type group struct {
- Encodings []struct {
- Labels []string
- Name string
- }
- }
- func main() {
- gen.Init()
- r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
- var groups []group
- if err := json.NewDecoder(r).Decode(&groups); err != nil {
- log.Fatalf("Error reading encodings.json: %v", err)
- }
- w := &bytes.Buffer{}
- fmt.Fprintln(w, "type htmlEncoding byte")
- fmt.Fprintln(w, "const (")
- for i, g := range groups {
- for _, e := range g.Encodings {
- key := strings.ToLower(e.Name)
- name := consts[key]
- if name == "" {
- log.Fatalf("No const defined for %s.", key)
- }
- if i == 0 {
- fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
- } else {
- fmt.Fprintf(w, "%s\n", name)
- }
- }
- }
- fmt.Fprintln(w, "numEncodings")
- fmt.Fprint(w, ")\n\n")
- fmt.Fprintln(w, "var canonical = [numEncodings]string{")
- for _, g := range groups {
- for _, e := range g.Encodings {
- fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
- }
- }
- fmt.Fprint(w, "}\n\n")
- fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
- for _, g := range groups {
- for _, e := range g.Encodings {
- for _, l := range e.Labels {
- key := strings.ToLower(e.Name)
- name := consts[key]
- fmt.Fprintf(w, "%q: %s,\n", l, name)
- }
- }
- }
- fmt.Fprint(w, "}\n\n")
- var tags []string
- fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
- for _, loc := range locales {
- tags = append(tags, loc.tag)
- fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
- }
- fmt.Fprint(w, "}\n\n")
- fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
- gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
- }
- // consts maps canonical encoding name to internal constant.
- var consts = map[string]string{
- "utf-8": "utf8",
- "ibm866": "ibm866",
- "iso-8859-2": "iso8859_2",
- "iso-8859-3": "iso8859_3",
- "iso-8859-4": "iso8859_4",
- "iso-8859-5": "iso8859_5",
- "iso-8859-6": "iso8859_6",
- "iso-8859-7": "iso8859_7",
- "iso-8859-8": "iso8859_8",
- "iso-8859-8-i": "iso8859_8I",
- "iso-8859-10": "iso8859_10",
- "iso-8859-13": "iso8859_13",
- "iso-8859-14": "iso8859_14",
- "iso-8859-15": "iso8859_15",
- "iso-8859-16": "iso8859_16",
- "koi8-r": "koi8r",
- "koi8-u": "koi8u",
- "macintosh": "macintosh",
- "windows-874": "windows874",
- "windows-1250": "windows1250",
- "windows-1251": "windows1251",
- "windows-1252": "windows1252",
- "windows-1253": "windows1253",
- "windows-1254": "windows1254",
- "windows-1255": "windows1255",
- "windows-1256": "windows1256",
- "windows-1257": "windows1257",
- "windows-1258": "windows1258",
- "x-mac-cyrillic": "macintoshCyrillic",
- "gbk": "gbk",
- "gb18030": "gb18030",
- // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
- "big5": "big5",
- "euc-jp": "eucjp",
- "iso-2022-jp": "iso2022jp",
- "shift_jis": "shiftJIS",
- "euc-kr": "euckr",
- "replacement": "replacement",
- "utf-16be": "utf16be",
- "utf-16le": "utf16le",
- "x-user-defined": "xUserDefined",
- }
- // locales is taken from
- // https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
- var locales = []struct{ tag, name string }{
- // The default value. Explicitly state latin to benefit from the exact
- // script option, while still making 1252 the default encoding for languages
- // written in Latin script.
- {"und_Latn", "windows-1252"},
- {"ar", "windows-1256"},
- {"ba", "windows-1251"},
- {"be", "windows-1251"},
- {"bg", "windows-1251"},
- {"cs", "windows-1250"},
- {"el", "iso-8859-7"},
- {"et", "windows-1257"},
- {"fa", "windows-1256"},
- {"he", "windows-1255"},
- {"hr", "windows-1250"},
- {"hu", "iso-8859-2"},
- {"ja", "shift_jis"},
- {"kk", "windows-1251"},
- {"ko", "euc-kr"},
- {"ku", "windows-1254"},
- {"ky", "windows-1251"},
- {"lt", "windows-1257"},
- {"lv", "windows-1257"},
- {"mk", "windows-1251"},
- {"pl", "iso-8859-2"},
- {"ru", "windows-1251"},
- {"sah", "windows-1251"},
- {"sk", "windows-1250"},
- {"sl", "iso-8859-2"},
- {"sr", "windows-1251"},
- {"tg", "windows-1251"},
- {"th", "windows-874"},
- {"tr", "windows-1254"},
- {"tt", "windows-1251"},
- {"uk", "windows-1251"},
- {"vi", "windows-1258"},
- {"zh-hans", "gb18030"},
- {"zh-hant", "big5"},
- }
|