Browse Source

go.net/publicsuffix: use IDNA.

R=dr.volker.dobler
CC=golang-dev
https://golang.org/cl/6930054
Nigel Tao 13 years ago
parent
commit
cbecf2f725
4 changed files with 175 additions and 94 deletions
  1. 10 12
      publicsuffix/gen.go
  2. 34 0
      publicsuffix/list_test.go
  3. 99 82
      publicsuffix/table.go
  4. 32 0
      publicsuffix/table_test.go

+ 10 - 12
publicsuffix/gen.go

@@ -19,8 +19,6 @@ package main
 // To fetch a particular hg revision, such as 05b11a8d1ace, pass
 // To fetch a particular hg revision, such as 05b11a8d1ace, pass
 // -url "http://hg.mozilla.org/mozilla-central/raw-file/05b11a8d1ace/netwerk/dns/effective_tld_names.dat"
 // -url "http://hg.mozilla.org/mozilla-central/raw-file/05b11a8d1ace/netwerk/dns/effective_tld_names.dat"
 
 
-// TODO(nigeltao): decide what to do with non-ASCII entries.
-
 import (
 import (
 	"bufio"
 	"bufio"
 	"bytes"
 	"bytes"
@@ -32,6 +30,8 @@ import (
 	"os"
 	"os"
 	"sort"
 	"sort"
 	"strings"
 	"strings"
+
+	"code.google.com/p/go.net/idna"
 )
 )
 
 
 const (
 const (
@@ -105,9 +105,13 @@ func main1() error {
 			return err
 			return err
 		}
 		}
 		s = strings.TrimSpace(s)
 		s = strings.TrimSpace(s)
-		if s == "" || strings.HasPrefix(s, "//") || !isASCII(s) {
+		if s == "" || strings.HasPrefix(s, "//") {
 			continue
 			continue
 		}
 		}
+		s, err = idna.ToASCII(s)
+		if err != nil {
+			return err
+		}
 
 
 		if *subset {
 		if *subset {
 			switch {
 			switch {
@@ -118,7 +122,10 @@ func main1() error {
 			case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
 			case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
 			case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
 			case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
 			case s == "uk" || strings.HasSuffix(s, ".uk"):
 			case s == "uk" || strings.HasSuffix(s, ".uk"):
+			case s == "tw" || strings.HasSuffix(s, ".tw"):
 			case s == "zw" || strings.HasSuffix(s, ".zw"):
 			case s == "zw" || strings.HasSuffix(s, ".zw"):
+			case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
+				// xn--p1ai is Russian-Cyrillic "рф".
 			default:
 			default:
 				continue
 				continue
 			}
 			}
@@ -169,15 +176,6 @@ func main1() error {
 	return err
 	return err
 }
 }
 
 
-func isASCII(s string) bool {
-	for i := 0; i < len(s); i++ {
-		if s[i] < 32 || 127 < s[i] {
-			return false
-		}
-	}
-	return true
-}
-
 func printTest(w io.Writer, n *node) error {
 func printTest(w io.Writer, n *node) error {
 	fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
 	fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
 	fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")
 	fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")

+ 34 - 0
publicsuffix/list_test.go

@@ -138,6 +138,34 @@ var publicSuffixTestCases = []struct {
 	{"b.ide.kyoto.jp", "ide.kyoto.jp"},
 	{"b.ide.kyoto.jp", "ide.kyoto.jp"},
 	{"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
 	{"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
 
 
+	// The .tw rules are:
+	// tw
+	// edu.tw
+	// gov.tw
+	// mil.tw
+	// com.tw
+	// net.tw
+	// org.tw
+	// idv.tw
+	// game.tw
+	// ebiz.tw
+	// club.tw
+	// 網路.tw (xn--zf0ao64a.tw)
+	// 組織.tw (xn--uc0atv.tw)
+	// 商業.tw (xn--czrw28b.tw)
+	// blogspot.tw
+	{"tw", "tw"},
+	{"aaa.tw", "tw"},
+	{"www.aaa.tw", "tw"},
+	{"xn--czrw28b.aaa.tw", "tw"},
+	{"edu.tw", "edu.tw"},
+	{"www.edu.tw", "edu.tw"},
+	{"xn--czrw28b.edu.tw", "edu.tw"},
+	{"xn--czrw28b.tw", "xn--czrw28b.tw"},
+	{"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
+	{"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
+	{"xn--kpry57d.tw", "tw"},
+
 	// The .uk rules are:
 	// The .uk rules are:
 	// *.uk
 	// *.uk
 	// *.sch.uk
 	// *.sch.uk
@@ -163,6 +191,12 @@ var publicSuffixTestCases = []struct {
 	{"blogspot.nic.uk", "uk"},
 	{"blogspot.nic.uk", "uk"},
 	{"blogspot.sch.uk", "blogspot.sch.uk"},
 	{"blogspot.sch.uk", "blogspot.sch.uk"},
 
 
+	// The .рф rules are
+	// рф (xn--p1ai)
+	{"xn--p1ai", "xn--p1ai"},
+	{"aaa.xn--p1ai", "xn--p1ai"},
+	{"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
+
 	// The .zw rules are:
 	// The .zw rules are:
 	// *.zw
 	// *.zw
 	{"zw", "zw"},
 	{"zw", "zw"},

+ 99 - 82
publicsuffix/table.go

@@ -11,15 +11,16 @@ const (
 )
 )
 
 
 // numTLD is the number of top level domains.
 // numTLD is the number of top level domains.
-const numTLD = 6
+const numTLD = 8
 
 
 // Text is the combined text of all labels.
 // Text is the combined text of all labels.
-const text = "british-libraryawatarparliamentfukuchiyamashinacionakagyoyamazak" +
-	"itayabe164gvin-addretinagaokakyotambainelip6irisakyotanabejetjoy" +
-	"ojpblogspotkizujitawarakumiyamakyotangobiernoelectronicomecongre" +
-	"sodelalengua3kyotominamiyamashiromiyazurideducitymukobepromocion" +
-	"antanational-library-scotlandseikameokamodurnlschigashiyamaizuru" +
-	"bazwazuka"
+const text = "clubafukuchiyamashinacionakagyorgamecongresodelalengua3govgvin-a" +
+	"ddretinagaokakyotambainelip6irisakyotanabeducityjetjoyoyamazakit" +
+	"ajpblogspotkizuridebizwkumiyamakyotangobiernoelectronicomilkyoto" +
+	"minamiyamashiromiyazurnantanational-library-scotlandmukobenlschi" +
+	"gashiyamaizurujitawarapromocionetseikameokamodxn--czrw28british-" +
+	"libraryawatarparliamentwazukayabe164xn--p1aidvxn--uc0atvxn--zf0a" +
+	"o64a"
 
 
 // Nodes is the list of nodes. Each node is encoded as two uint32 values.
 // Nodes is the list of nodes. Each node is encoded as two uint32 values.
 //
 //
@@ -41,79 +42,95 @@ const text = "british-libraryawatarparliamentfukuchiyamashinacionakagyoyamazak"
 // envelope calculation suggests that at 6000 rows (of which 90% are leaves),
 // envelope calculation suggests that at 6000 rows (of which 90% are leaves),
 // this could save an extra 20KiB of data.
 // this could save an extra 20KiB of data.
 var nodes = [...][2]uint32{
 var nodes = [...][2]uint32{
-	{0x00060006, 0x00005902}, // 0x0000 (0x0006-0x000c) +  ao
-	{0xa00a000c, 0x00000c02}, // 0x0001 (0x000c-0x0016) o* ar
-	{0x80060017, 0x00001304}, // 0x0002 (0x0017-0x001d) o  arpa
-	{0x0002001d, 0x00008102}, // 0x0003 (0x001d-0x001f) +  jp
-	{0xa00b003f, 0x00002002}, // 0x0004 (0x003f-0x004a) o* uk
-	{0xa0000000, 0x00014202}, // 0x0005 (-------------) o* zw
-	{0x00000000, 0x0000b602}, // 0x0006 (-------------) +  co
-	{0x00000000, 0x0000ea02}, // 0x0007 (-------------) +  ed
-	{0x00000000, 0x00004a02}, // 0x0008 (-------------) +  gv
-	{0x00000000, 0x00000202}, // 0x0009 (-------------) +  it
-	{0x00000000, 0x00008502}, // 0x000a (-------------) +  og
-	{0x00000000, 0x00008202}, // 0x000b (-------------) +  pb
-	{0x80010016, 0x0000b603}, // 0x000c (0x0016-0x0017) o  com
-	{0x40000000, 0x0000ba13}, // 0x000d (-------------) !  congresodelalengua3
-	{0x40000000, 0x0000ea04}, // 0x000e (-------------) !  educ
-	{0x40000000, 0x0000a513}, // 0x000f (-------------) !  gobiernoelectronico
-	{0x40000000, 0x0000b805}, // 0x0010 (-------------) !  mecon
-	{0x40000000, 0x00002d06}, // 0x0011 (-------------) !  nacion
-	{0x40000000, 0x0000b403}, // 0x0012 (-------------) !  nic
-	{0x40000000, 0x0000f709}, // 0x0013 (-------------) !  promocion
-	{0x40000000, 0x00005206}, // 0x0014 (-------------) !  retina
-	{0x40000000, 0x00013f03}, // 0x0015 (-------------) !  uba
-	{0x00000000, 0x00008308}, // 0x0016 (-------------) +  blogspot
-	{0x00000000, 0x00004604}, // 0x0017 (-------------) +  e164
-	{0x00000000, 0x00004c07}, // 0x0018 (-------------) +  in-addr
-	{0x00000000, 0x00006903}, // 0x0019 (-------------) +  ip6
-	{0x00000000, 0x00006c04}, // 0x001a (-------------) +  iris
-	{0x00000000, 0x0000e603}, // 0x001b (-------------) +  uri
-	{0x00000000, 0x00012a03}, // 0x001c (-------------) +  urn
-	{0xa001001f, 0x0000f304}, // 0x001d (0x001f-0x0020) o* kobe
-	{0x001f0020, 0x0000cd05}, // 0x001e (0x0020-0x003f) +  kyoto
-	{0x40000000, 0x0000ed04}, // 0x001f (-------------) !  city
-	{0x00000000, 0x00004205}, // 0x0020 (-------------) +  ayabe
-	{0x00000000, 0x00001f0b}, // 0x0021 (-------------) +  fukuchiyama
-	{0x00000000, 0x0001300b}, // 0x0022 (-------------) +  higashiyama
-	{0x00000000, 0x0000e803}, // 0x0023 (-------------) +  ide
-	{0x00000000, 0x00006503}, // 0x0024 (-------------) +  ine
-	{0x00000000, 0x00007d04}, // 0x0025 (-------------) +  joyo
-	{0x00000000, 0x00012007}, // 0x0026 (-------------) +  kameoka
-	{0x00000000, 0x00012504}, // 0x0027 (-------------) +  kamo
-	{0x00000000, 0x00003f04}, // 0x0028 (-------------) +  kita
-	{0x00000000, 0x00008b04}, // 0x0029 (-------------) +  kizu
-	{0x00000000, 0x00009708}, // 0x002a (-------------) +  kumiyama
-	{0x00000000, 0x00005d08}, // 0x002b (-------------) +  kyotamba
-	{0x00000000, 0x00007109}, // 0x002c (-------------) +  kyotanabe
-	{0x00000000, 0x00009f08}, // 0x002d (-------------) +  kyotango
-	{0x00000000, 0x00013907}, // 0x002e (-------------) +  maizuru
-	{0x00000000, 0x0000d206}, // 0x002f (-------------) +  minami
-	{0x00000000, 0x0000d20f}, // 0x0030 (-------------) +  minamiyamashiro
-	{0x00000000, 0x0000e106}, // 0x0031 (-------------) +  miyazu
-	{0x00000000, 0x0000f104}, // 0x0032 (-------------) +  muko
-	{0x00000000, 0x0000560a}, // 0x0033 (-------------) +  nagaokakyo
-	{0x00000000, 0x00003207}, // 0x0034 (-------------) +  nakagyo
-	{0x00000000, 0x0000ff06}, // 0x0035 (-------------) +  nantan
-	{0x00000000, 0x00003809}, // 0x0036 (-------------) +  oyamazaki
-	{0x00000000, 0x00006f05}, // 0x0037 (-------------) +  sakyo
-	{0x00000000, 0x00011d05}, // 0x0038 (-------------) +  seika
-	{0x00000000, 0x00007406}, // 0x0039 (-------------) +  tanabe
-	{0x00000000, 0x00008e03}, // 0x003a (-------------) +  uji
-	{0x00000000, 0x00008e09}, // 0x003b (-------------) +  ujitawara
-	{0x00000000, 0x00014306}, // 0x003c (-------------) +  wazuka
-	{0x00000000, 0x00002609}, // 0x003d (-------------) +  yamashina
-	{0x00000000, 0x00000e06}, // 0x003e (-------------) +  yawata
-	{0x40000000, 0x00008302}, // 0x003f (-------------) !  bl
-	{0x40000000, 0x0000000f}, // 0x0040 (-------------) !  british-library
-	{0x8001004a, 0x0000b602}, // 0x0041 (0x004a-0x004b) o  co
-	{0x40000000, 0x00007a03}, // 0x0042 (-------------) !  jet
-	{0x40000000, 0x00012703}, // 0x0043 (-------------) !  mod
-	{0x40000000, 0x00010419}, // 0x0044 (-------------) !  national-library-scotland
-	{0x40000000, 0x00006603}, // 0x0045 (-------------) !  nel
-	{0x40000000, 0x0000b403}, // 0x0046 (-------------) !  nic
-	{0x40000000, 0x00012c03}, // 0x0047 (-------------) !  nls
-	{0x40000000, 0x0000150a}, // 0x0048 (-------------) !  parliament
-	{0xa0000000, 0x00012e03}, // 0x0049 (-------------) o* sch
-	{0x00000000, 0x00008308}, // 0x004a (-------------) +  blogspot
+	{0x00060008, 0x00004902}, // 0x0000 (0x0008-0x000e) +  ao
+	{0xa00a000e, 0x0000e802}, // 0x0001 (0x000e-0x0018) o* ar
+	{0x80060019, 0x00014b04}, // 0x0002 (0x0019-0x001f) o  arpa
+	{0x0002001f, 0x00008102}, // 0x0003 (0x001f-0x0021) +  jp
+	{0x000e0041, 0x00015602}, // 0x0004 (0x0041-0x004f) +  tw
+	{0xa00b004f, 0x00000602}, // 0x0005 (0x004f-0x005a) o* uk
+	{0x00000000, 0x00016408}, // 0x0006 (-------------) +  xn--p1ai
+	{0xa0000000, 0x00009502}, // 0x0007 (-------------) o* zw
+	{0x00000000, 0x00002402}, // 0x0008 (-------------) +  co
+	{0x00000000, 0x00006902}, // 0x0009 (-------------) +  ed
+	{0x00000000, 0x00003a02}, // 0x000a (-------------) +  gv
+	{0x00000000, 0x00006d02}, // 0x000b (-------------) +  it
+	{0x00000000, 0x00008502}, // 0x000c (-------------) +  og
+	{0x00000000, 0x00008202}, // 0x000d (-------------) +  pb
+	{0x80010018, 0x0000b603}, // 0x000e (0x0018-0x0019) o  com
+	{0x40000000, 0x00002413}, // 0x000f (-------------) !  congresodelalengua3
+	{0x40000000, 0x00006904}, // 0x0010 (-------------) !  educ
+	{0x40000000, 0x0000a513}, // 0x0011 (-------------) !  gobiernoelectronico
+	{0x40000000, 0x00002205}, // 0x0012 (-------------) !  mecon
+	{0x40000000, 0x00001306}, // 0x0013 (-------------) !  nacion
+	{0x40000000, 0x0000b403}, // 0x0014 (-------------) !  nic
+	{0x40000000, 0x00011609}, // 0x0015 (-------------) !  promocion
+	{0x40000000, 0x00004206}, // 0x0016 (-------------) !  retina
+	{0x40000000, 0x00000203}, // 0x0017 (-------------) !  uba
+	{0x00000000, 0x00008308}, // 0x0018 (-------------) +  blogspot
+	{0x00000000, 0x00016004}, // 0x0019 (-------------) +  e164
+	{0x00000000, 0x00003c07}, // 0x001a (-------------) +  in-addr
+	{0x00000000, 0x00005903}, // 0x001b (-------------) +  ip6
+	{0x00000000, 0x00005c04}, // 0x001c (-------------) +  iris
+	{0x00000000, 0x00008e03}, // 0x001d (-------------) +  uri
+	{0x00000000, 0x0000d403}, // 0x001e (-------------) +  urn
+	{0xa0010021, 0x0000f604}, // 0x001f (0x0021-0x0022) o* kobe
+	{0x001f0022, 0x0000bb05}, // 0x0020 (0x0022-0x0041) +  kyoto
+	{0x40000000, 0x00006c04}, // 0x0021 (-------------) !  city
+	{0x00000000, 0x00015c05}, // 0x0022 (-------------) +  ayabe
+	{0x00000000, 0x0000050b}, // 0x0023 (-------------) +  fukuchiyama
+	{0x00000000, 0x0000fe0b}, // 0x0024 (-------------) +  higashiyama
+	{0x00000000, 0x00009003}, // 0x0025 (-------------) +  ide
+	{0x00000000, 0x00005503}, // 0x0026 (-------------) +  ine
+	{0x00000000, 0x00007304}, // 0x0027 (-------------) +  joyo
+	{0x00000000, 0x00012407}, // 0x0028 (-------------) +  kameoka
+	{0x00000000, 0x00012904}, // 0x0029 (-------------) +  kamo
+	{0x00000000, 0x00007d04}, // 0x002a (-------------) +  kita
+	{0x00000000, 0x00008b04}, // 0x002b (-------------) +  kizu
+	{0x00000000, 0x00009708}, // 0x002c (-------------) +  kumiyama
+	{0x00000000, 0x00004d08}, // 0x002d (-------------) +  kyotamba
+	{0x00000000, 0x00006109}, // 0x002e (-------------) +  kyotanabe
+	{0x00000000, 0x00009f08}, // 0x002f (-------------) +  kyotango
+	{0x00000000, 0x00010707}, // 0x0030 (-------------) +  maizuru
+	{0x00000000, 0x0000c006}, // 0x0031 (-------------) +  minami
+	{0x00000000, 0x0000c00f}, // 0x0032 (-------------) +  minamiyamashiro
+	{0x00000000, 0x0000cf06}, // 0x0033 (-------------) +  miyazu
+	{0x00000000, 0x0000f404}, // 0x0034 (-------------) +  muko
+	{0x00000000, 0x0000460a}, // 0x0035 (-------------) +  nagaokakyo
+	{0x00000000, 0x00001807}, // 0x0036 (-------------) +  nakagyo
+	{0x00000000, 0x0000d606}, // 0x0037 (-------------) +  nantan
+	{0x00000000, 0x00007609}, // 0x0038 (-------------) +  oyamazaki
+	{0x00000000, 0x00005f05}, // 0x0039 (-------------) +  sakyo
+	{0x00000000, 0x00012105}, // 0x003a (-------------) +  seika
+	{0x00000000, 0x00006406}, // 0x003b (-------------) +  tanabe
+	{0x00000000, 0x00010d03}, // 0x003c (-------------) +  uji
+	{0x00000000, 0x00010d09}, // 0x003d (-------------) +  ujitawara
+	{0x00000000, 0x00015706}, // 0x003e (-------------) +  wazuka
+	{0x00000000, 0x00000c09}, // 0x003f (-------------) +  yamashina
+	{0x00000000, 0x00014606}, // 0x0040 (-------------) +  yawata
+	{0x00000000, 0x00008308}, // 0x0041 (-------------) +  blogspot
+	{0x00000000, 0x00000004}, // 0x0042 (-------------) +  club
+	{0x00000000, 0x0000b603}, // 0x0043 (-------------) +  com
+	{0x00000000, 0x00009204}, // 0x0044 (-------------) +  ebiz
+	{0x00000000, 0x00006903}, // 0x0045 (-------------) +  edu
+	{0x00000000, 0x00002004}, // 0x0046 (-------------) +  game
+	{0x00000000, 0x00003703}, // 0x0047 (-------------) +  gov
+	{0x00000000, 0x00016b03}, // 0x0048 (-------------) +  idv
+	{0x00000000, 0x0000b803}, // 0x0049 (-------------) +  mil
+	{0x00000000, 0x00011e03}, // 0x004a (-------------) +  net
+	{0x00000000, 0x00001e03}, // 0x004b (-------------) +  org
+	{0x00000000, 0x00012e0b}, // 0x004c (-------------) +  xn--czrw28b
+	{0x00000000, 0x00016e0a}, // 0x004d (-------------) +  xn--uc0atv
+	{0x00000000, 0x0001780c}, // 0x004e (-------------) +  xn--zf0ao64a
+	{0x40000000, 0x00008302}, // 0x004f (-------------) !  bl
+	{0x40000000, 0x0001380f}, // 0x0050 (-------------) !  british-library
+	{0x8001005a, 0x00002402}, // 0x0051 (0x005a-0x005b) o  co
+	{0x40000000, 0x00007003}, // 0x0052 (-------------) !  jet
+	{0x40000000, 0x00012b03}, // 0x0053 (-------------) !  mod
+	{0x40000000, 0x0000db19}, // 0x0054 (-------------) !  national-library-scotland
+	{0x40000000, 0x00005603}, // 0x0055 (-------------) !  nel
+	{0x40000000, 0x0000b403}, // 0x0056 (-------------) !  nic
+	{0x40000000, 0x0000fa03}, // 0x0057 (-------------) !  nls
+	{0x40000000, 0x00014d0a}, // 0x0058 (-------------) !  parliament
+	{0xa0000000, 0x0000fc03}, // 0x0059 (-------------) o* sch
+	{0x00000000, 0x00008308}, // 0x005a (-------------) +  blogspot
 }
 }

+ 32 - 0
publicsuffix/table_test.go

@@ -61,6 +61,20 @@ var rules = [...]string{
 	"wazuka.kyoto.jp",
 	"wazuka.kyoto.jp",
 	"yamashina.kyoto.jp",
 	"yamashina.kyoto.jp",
 	"yawata.kyoto.jp",
 	"yawata.kyoto.jp",
+	"tw",
+	"edu.tw",
+	"gov.tw",
+	"mil.tw",
+	"com.tw",
+	"net.tw",
+	"org.tw",
+	"idv.tw",
+	"game.tw",
+	"ebiz.tw",
+	"club.tw",
+	"xn--zf0ao64a.tw",
+	"xn--uc0atv.tw",
+	"xn--czrw28b.tw",
 	"*.uk",
 	"*.uk",
 	"*.sch.uk",
 	"*.sch.uk",
 	"!bl.uk",
 	"!bl.uk",
@@ -72,9 +86,11 @@ var rules = [...]string{
 	"!nic.uk",
 	"!nic.uk",
 	"!nls.uk",
 	"!nls.uk",
 	"!parliament.uk",
 	"!parliament.uk",
+	"xn--p1ai",
 	"*.zw",
 	"*.zw",
 	"blogspot.co.uk",
 	"blogspot.co.uk",
 	"blogspot.com.ar",
 	"blogspot.com.ar",
+	"blogspot.tw",
 }
 }
 
 
 var nodeLabels = [...]string{
 var nodeLabels = [...]string{
@@ -82,7 +98,9 @@ var nodeLabels = [...]string{
 	"ar",
 	"ar",
 	"arpa",
 	"arpa",
 	"jp",
 	"jp",
+	"tw",
 	"uk",
 	"uk",
+	"xn--p1ai",
 	"zw",
 	"zw",
 	"co",
 	"co",
 	"ed",
 	"ed",
@@ -141,6 +159,20 @@ var nodeLabels = [...]string{
 	"wazuka",
 	"wazuka",
 	"yamashina",
 	"yamashina",
 	"yawata",
 	"yawata",
+	"blogspot",
+	"club",
+	"com",
+	"ebiz",
+	"edu",
+	"game",
+	"gov",
+	"idv",
+	"mil",
+	"net",
+	"org",
+	"xn--czrw28b",
+	"xn--uc0atv",
+	"xn--zf0ao64a",
 	"bl",
 	"bl",
 	"british-library",
 	"british-library",
 	"co",
 	"co",