list_test.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package publicsuffix
  5. import (
  6. "sort"
  7. "strings"
  8. "testing"
  9. )
  10. func TestNodeLabel(t *testing.T) {
  11. for i, want := range nodeLabels {
  12. got := nodeLabel(uint32(i))
  13. if got != want {
  14. t.Errorf("%d: got %q, want %q", i, got, want)
  15. }
  16. }
  17. }
  18. func TestFind(t *testing.T) {
  19. testCases := []string{
  20. "",
  21. "a",
  22. "a0",
  23. "aaaa",
  24. "ao",
  25. "ap",
  26. "ar",
  27. "aro",
  28. "arp",
  29. "arpa",
  30. "arpaa",
  31. "arpb",
  32. "az",
  33. "b",
  34. "b0",
  35. "ba",
  36. "z",
  37. "zu",
  38. "zv",
  39. "zw",
  40. "zx",
  41. "zy",
  42. "zz",
  43. "zzzz",
  44. }
  45. for _, tc := range testCases {
  46. got := find(tc, 0, numTLD)
  47. want := notFound
  48. for i := uint32(0); i < numTLD; i++ {
  49. if tc == nodeLabel(i) {
  50. want = i
  51. break
  52. }
  53. }
  54. if got != want {
  55. t.Errorf("%q: got %d, want %d", tc, got, want)
  56. }
  57. }
  58. }
  59. var publicSuffixTestCases = []struct {
  60. domain, want string
  61. }{
  62. // Empty string.
  63. {"", ""},
  64. // The .ao rules are:
  65. // ao
  66. // ed.ao
  67. // gv.ao
  68. // og.ao
  69. // co.ao
  70. // pb.ao
  71. // it.ao
  72. {"ao", "ao"},
  73. {"www.ao", "ao"},
  74. {"pb.ao", "pb.ao"},
  75. {"www.pb.ao", "pb.ao"},
  76. {"www.xxx.yyy.zzz.pb.ao", "pb.ao"},
  77. // The .ar rules are:
  78. // *.ar
  79. // !congresodelalengua3.ar
  80. // !educ.ar
  81. // !gobiernoelectronico.ar
  82. // !mecon.ar
  83. // !nacion.ar
  84. // !nic.ar
  85. // !promocion.ar
  86. // !retina.ar
  87. // !uba.ar
  88. // blogspot.com.ar
  89. {"ar", "ar"},
  90. {"www.ar", "www.ar"},
  91. {"nic.ar", "ar"},
  92. {"www.nic.ar", "ar"},
  93. {"com.ar", "com.ar"},
  94. {"www.com.ar", "com.ar"},
  95. {"blogspot.com.ar", "blogspot.com.ar"},
  96. {"www.blogspot.com.ar", "blogspot.com.ar"},
  97. {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar"},
  98. {"logspot.com.ar", "com.ar"},
  99. {"zlogspot.com.ar", "com.ar"},
  100. {"zblogspot.com.ar", "com.ar"},
  101. // The .arpa rules are:
  102. // e164.arpa
  103. // in-addr.arpa
  104. // ip6.arpa
  105. // iris.arpa
  106. // uri.arpa
  107. // urn.arpa
  108. {"arpa", "arpa"},
  109. {"www.arpa", "arpa"},
  110. {"urn.arpa", "urn.arpa"},
  111. {"www.urn.arpa", "urn.arpa"},
  112. {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa"},
  113. // The relevant {kobe,kyoto}.jp rules are:
  114. // jp
  115. // *.kobe.jp
  116. // !city.kobe.jp
  117. // kyoto.jp
  118. // ide.kyoto.jp
  119. {"jp", "jp"},
  120. {"kobe.jp", "jp"},
  121. {"c.kobe.jp", "c.kobe.jp"},
  122. {"b.c.kobe.jp", "c.kobe.jp"},
  123. {"a.b.c.kobe.jp", "c.kobe.jp"},
  124. {"city.kobe.jp", "kobe.jp"},
  125. {"www.city.kobe.jp", "kobe.jp"},
  126. {"kyoto.jp", "kyoto.jp"},
  127. {"test.kyoto.jp", "kyoto.jp"},
  128. {"ide.kyoto.jp", "ide.kyoto.jp"},
  129. {"b.ide.kyoto.jp", "ide.kyoto.jp"},
  130. {"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
  131. // The .tw rules are:
  132. // tw
  133. // edu.tw
  134. // gov.tw
  135. // mil.tw
  136. // com.tw
  137. // net.tw
  138. // org.tw
  139. // idv.tw
  140. // game.tw
  141. // ebiz.tw
  142. // club.tw
  143. // 網路.tw (xn--zf0ao64a.tw)
  144. // 組織.tw (xn--uc0atv.tw)
  145. // 商業.tw (xn--czrw28b.tw)
  146. // blogspot.tw
  147. {"tw", "tw"},
  148. {"aaa.tw", "tw"},
  149. {"www.aaa.tw", "tw"},
  150. {"xn--czrw28b.aaa.tw", "tw"},
  151. {"edu.tw", "edu.tw"},
  152. {"www.edu.tw", "edu.tw"},
  153. {"xn--czrw28b.edu.tw", "edu.tw"},
  154. {"xn--czrw28b.tw", "xn--czrw28b.tw"},
  155. {"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
  156. {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
  157. {"xn--kpry57d.tw", "tw"},
  158. // The .uk rules are:
  159. // *.uk
  160. // *.sch.uk
  161. // !bl.uk
  162. // !british-library.uk
  163. // !jet.uk
  164. // !mod.uk
  165. // !national-library-scotland.uk
  166. // !nel.uk
  167. // !nic.uk
  168. // !nls.uk
  169. // !parliament.uk
  170. // blogspot.co.uk
  171. {"uk", "uk"},
  172. {"aaa.uk", "aaa.uk"},
  173. {"www.aaa.uk", "aaa.uk"},
  174. {"mod.uk", "uk"},
  175. {"www.mod.uk", "uk"},
  176. {"sch.uk", "sch.uk"},
  177. {"mod.sch.uk", "mod.sch.uk"},
  178. {"www.sch.uk", "www.sch.uk"},
  179. {"blogspot.co.uk", "blogspot.co.uk"},
  180. {"blogspot.nic.uk", "uk"},
  181. {"blogspot.sch.uk", "blogspot.sch.uk"},
  182. // The .рф rules are
  183. // рф (xn--p1ai)
  184. {"xn--p1ai", "xn--p1ai"},
  185. {"aaa.xn--p1ai", "xn--p1ai"},
  186. {"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
  187. // The .zw rules are:
  188. // *.zw
  189. {"zw", "zw"},
  190. {"www.zw", "www.zw"},
  191. {"zzz.zw", "zzz.zw"},
  192. {"www.zzz.zw", "zzz.zw"},
  193. {"www.xxx.yyy.zzz.zw", "zzz.zw"},
  194. // There are no .nosuchtld rules.
  195. {"nosuchtld", "nosuchtld"},
  196. {"foo.nosuchtld", "nosuchtld"},
  197. {"bar.foo.nosuchtld", "nosuchtld"},
  198. }
  199. func TestPublicSuffix(t *testing.T) {
  200. for _, tc := range publicSuffixTestCases {
  201. got := List.PublicSuffix(tc.domain)
  202. if got != tc.want {
  203. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  204. }
  205. }
  206. }
  207. func TestSlowPublicSuffix(t *testing.T) {
  208. for _, tc := range publicSuffixTestCases {
  209. got := slowPublicSuffix(tc.domain)
  210. if got != tc.want {
  211. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  212. }
  213. }
  214. }
  215. // slowPublicSuffix implements the canonical (but O(number of rules)) public
  216. // suffix algorithm described at http://publicsuffix.org/list/.
  217. //
  218. // 1. Match domain against all rules and take note of the matching ones.
  219. // 2. If no rules match, the prevailing rule is "*".
  220. // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
  221. // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
  222. // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
  223. // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
  224. // 7. The registered or registrable domain is the public suffix plus one additional label.
  225. //
  226. // This function returns the public suffix, not the registrable domain, and so
  227. // it stops after step 6.
  228. func slowPublicSuffix(domain string) string {
  229. match := func(rulePart, domainPart string) bool {
  230. switch rulePart[0] {
  231. case '*':
  232. return true
  233. case '!':
  234. return rulePart[1:] == domainPart
  235. }
  236. return rulePart == domainPart
  237. }
  238. domainParts := strings.Split(domain, ".")
  239. var matchingRules [][]string
  240. loop:
  241. for _, rule := range rules {
  242. ruleParts := strings.Split(rule, ".")
  243. if len(domainParts) < len(ruleParts) {
  244. continue
  245. }
  246. for i := range ruleParts {
  247. rulePart := ruleParts[len(ruleParts)-1-i]
  248. domainPart := domainParts[len(domainParts)-1-i]
  249. if !match(rulePart, domainPart) {
  250. continue loop
  251. }
  252. }
  253. matchingRules = append(matchingRules, ruleParts)
  254. }
  255. if len(matchingRules) == 0 {
  256. matchingRules = append(matchingRules, []string{"*"})
  257. } else {
  258. sort.Sort(byPriority(matchingRules))
  259. }
  260. prevailing := matchingRules[0]
  261. if prevailing[0][0] == '!' {
  262. prevailing = prevailing[1:]
  263. }
  264. if prevailing[0][0] == '*' {
  265. replaced := domainParts[len(domainParts)-len(prevailing)]
  266. prevailing = append([]string{replaced}, prevailing[1:]...)
  267. }
  268. return strings.Join(prevailing, ".")
  269. }
  270. type byPriority [][]string
  271. func (b byPriority) Len() int { return len(b) }
  272. func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
  273. func (b byPriority) Less(i, j int) bool {
  274. if b[i][0][0] == '!' {
  275. return true
  276. }
  277. if b[j][0][0] == '!' {
  278. return false
  279. }
  280. return len(b[i]) > len(b[j])
  281. }
  282. // TODO(nigeltao): add the "Effective Top Level Domain Plus 1" tests from
  283. // http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt