list_test.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package publicsuffix
  5. import (
  6. "sort"
  7. "strings"
  8. "testing"
  9. )
  10. func TestNodeLabel(t *testing.T) {
  11. for i, want := range nodeLabels {
  12. got := nodeLabel(uint32(i))
  13. if got != want {
  14. t.Errorf("%d: got %q, want %q", i, got, want)
  15. }
  16. }
  17. }
  18. func TestFind(t *testing.T) {
  19. testCases := []string{
  20. "",
  21. "a",
  22. "a0",
  23. "aaaa",
  24. "ao",
  25. "ap",
  26. "ar",
  27. "aro",
  28. "arp",
  29. "arpa",
  30. "arpaa",
  31. "arpb",
  32. "az",
  33. "b",
  34. "b0",
  35. "ba",
  36. "z",
  37. "zu",
  38. "zv",
  39. "zw",
  40. "zx",
  41. "zy",
  42. "zz",
  43. "zzzz",
  44. }
  45. for _, tc := range testCases {
  46. got := find(tc, 0, numTLD)
  47. want := notFound
  48. for i := uint32(0); i < numTLD; i++ {
  49. if tc == nodeLabel(i) {
  50. want = i
  51. break
  52. }
  53. }
  54. if got != want {
  55. t.Errorf("%q: got %d, want %d", tc, got, want)
  56. }
  57. }
  58. }
  59. func TestICANN(t *testing.T) {
  60. testCases := map[string]bool{
  61. "foo.org": true,
  62. "foo.co.uk": true,
  63. "foo.dyndns.org": false,
  64. "foo.go.dyndns.org": false,
  65. "foo.blogspot.co.uk": false,
  66. "foo.intranet": false,
  67. }
  68. for domain, want := range testCases {
  69. _, got := PublicSuffix(domain)
  70. if got != want {
  71. t.Errorf("%q: got %v, want %v", domain, got, want)
  72. }
  73. }
  74. }
  75. var publicSuffixTestCases = []struct {
  76. domain, want string
  77. }{
  78. // Empty string.
  79. {"", ""},
  80. // The .ao rules are:
  81. // ao
  82. // ed.ao
  83. // gv.ao
  84. // og.ao
  85. // co.ao
  86. // pb.ao
  87. // it.ao
  88. {"ao", "ao"},
  89. {"www.ao", "ao"},
  90. {"pb.ao", "pb.ao"},
  91. {"www.pb.ao", "pb.ao"},
  92. {"www.xxx.yyy.zzz.pb.ao", "pb.ao"},
  93. // The .ar rules are:
  94. // ar
  95. // com.ar
  96. // edu.ar
  97. // gob.ar
  98. // int.ar
  99. // mil.ar
  100. // net.ar
  101. // org.ar
  102. // tur.ar
  103. // blogspot.com.ar
  104. {"ar", "ar"},
  105. {"www.ar", "ar"},
  106. {"nic.ar", "ar"},
  107. {"www.nic.ar", "ar"},
  108. {"com.ar", "com.ar"},
  109. {"www.com.ar", "com.ar"},
  110. {"blogspot.com.ar", "blogspot.com.ar"},
  111. {"www.blogspot.com.ar", "blogspot.com.ar"},
  112. {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar"},
  113. {"logspot.com.ar", "com.ar"},
  114. {"zlogspot.com.ar", "com.ar"},
  115. {"zblogspot.com.ar", "com.ar"},
  116. // The .arpa rules are:
  117. // e164.arpa
  118. // in-addr.arpa
  119. // ip6.arpa
  120. // iris.arpa
  121. // uri.arpa
  122. // urn.arpa
  123. {"arpa", "arpa"},
  124. {"www.arpa", "arpa"},
  125. {"urn.arpa", "urn.arpa"},
  126. {"www.urn.arpa", "urn.arpa"},
  127. {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa"},
  128. // The relevant {kobe,kyoto}.jp rules are:
  129. // jp
  130. // *.kobe.jp
  131. // !city.kobe.jp
  132. // kyoto.jp
  133. // ide.kyoto.jp
  134. {"jp", "jp"},
  135. {"kobe.jp", "jp"},
  136. {"c.kobe.jp", "c.kobe.jp"},
  137. {"b.c.kobe.jp", "c.kobe.jp"},
  138. {"a.b.c.kobe.jp", "c.kobe.jp"},
  139. {"city.kobe.jp", "kobe.jp"},
  140. {"www.city.kobe.jp", "kobe.jp"},
  141. {"kyoto.jp", "kyoto.jp"},
  142. {"test.kyoto.jp", "kyoto.jp"},
  143. {"ide.kyoto.jp", "ide.kyoto.jp"},
  144. {"b.ide.kyoto.jp", "ide.kyoto.jp"},
  145. {"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
  146. // The .tw rules are:
  147. // tw
  148. // edu.tw
  149. // gov.tw
  150. // mil.tw
  151. // com.tw
  152. // net.tw
  153. // org.tw
  154. // idv.tw
  155. // game.tw
  156. // ebiz.tw
  157. // club.tw
  158. // 網路.tw (xn--zf0ao64a.tw)
  159. // 組織.tw (xn--uc0atv.tw)
  160. // 商業.tw (xn--czrw28b.tw)
  161. // blogspot.tw
  162. {"tw", "tw"},
  163. {"aaa.tw", "tw"},
  164. {"www.aaa.tw", "tw"},
  165. {"xn--czrw28b.aaa.tw", "tw"},
  166. {"edu.tw", "edu.tw"},
  167. {"www.edu.tw", "edu.tw"},
  168. {"xn--czrw28b.edu.tw", "edu.tw"},
  169. {"xn--czrw28b.tw", "xn--czrw28b.tw"},
  170. {"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
  171. {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
  172. {"xn--kpry57d.tw", "tw"},
  173. // The .uk rules are:
  174. // *.uk
  175. // *.sch.uk
  176. // !bl.uk
  177. // !british-library.uk
  178. // !jet.uk
  179. // !mod.uk
  180. // !national-library-scotland.uk
  181. // !nel.uk
  182. // !nic.uk
  183. // !nls.uk
  184. // !parliament.uk
  185. // blogspot.co.uk
  186. {"uk", "uk"},
  187. {"aaa.uk", "aaa.uk"},
  188. {"www.aaa.uk", "aaa.uk"},
  189. {"mod.uk", "uk"},
  190. {"www.mod.uk", "uk"},
  191. {"sch.uk", "sch.uk"},
  192. {"mod.sch.uk", "mod.sch.uk"},
  193. {"www.sch.uk", "www.sch.uk"},
  194. {"blogspot.co.uk", "blogspot.co.uk"},
  195. {"blogspot.nic.uk", "uk"},
  196. {"blogspot.sch.uk", "blogspot.sch.uk"},
  197. // The .рф rules are
  198. // рф (xn--p1ai)
  199. {"xn--p1ai", "xn--p1ai"},
  200. {"aaa.xn--p1ai", "xn--p1ai"},
  201. {"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
  202. // The .zw rules are:
  203. // *.zw
  204. {"zw", "zw"},
  205. {"www.zw", "www.zw"},
  206. {"zzz.zw", "zzz.zw"},
  207. {"www.zzz.zw", "zzz.zw"},
  208. {"www.xxx.yyy.zzz.zw", "zzz.zw"},
  209. // There are no .nosuchtld rules.
  210. {"nosuchtld", "nosuchtld"},
  211. {"foo.nosuchtld", "nosuchtld"},
  212. {"bar.foo.nosuchtld", "nosuchtld"},
  213. }
  214. func BenchmarkPublicSuffix(b *testing.B) {
  215. for i := 0; i < b.N; i++ {
  216. for _, tc := range publicSuffixTestCases {
  217. List.PublicSuffix(tc.domain)
  218. }
  219. }
  220. }
  221. func TestPublicSuffix(t *testing.T) {
  222. for _, tc := range publicSuffixTestCases {
  223. got := List.PublicSuffix(tc.domain)
  224. if got != tc.want {
  225. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  226. }
  227. }
  228. }
  229. func TestSlowPublicSuffix(t *testing.T) {
  230. for _, tc := range publicSuffixTestCases {
  231. got := slowPublicSuffix(tc.domain)
  232. if got != tc.want {
  233. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  234. }
  235. }
  236. }
  237. // slowPublicSuffix implements the canonical (but O(number of rules)) public
  238. // suffix algorithm described at http://publicsuffix.org/list/.
  239. //
  240. // 1. Match domain against all rules and take note of the matching ones.
  241. // 2. If no rules match, the prevailing rule is "*".
  242. // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
  243. // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
  244. // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
  245. // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
  246. // 7. The registered or registrable domain is the public suffix plus one additional label.
  247. //
  248. // This function returns the public suffix, not the registrable domain, and so
  249. // it stops after step 6.
  250. func slowPublicSuffix(domain string) string {
  251. match := func(rulePart, domainPart string) bool {
  252. switch rulePart[0] {
  253. case '*':
  254. return true
  255. case '!':
  256. return rulePart[1:] == domainPart
  257. }
  258. return rulePart == domainPart
  259. }
  260. domainParts := strings.Split(domain, ".")
  261. var matchingRules [][]string
  262. loop:
  263. for _, rule := range rules {
  264. ruleParts := strings.Split(rule, ".")
  265. if len(domainParts) < len(ruleParts) {
  266. continue
  267. }
  268. for i := range ruleParts {
  269. rulePart := ruleParts[len(ruleParts)-1-i]
  270. domainPart := domainParts[len(domainParts)-1-i]
  271. if !match(rulePart, domainPart) {
  272. continue loop
  273. }
  274. }
  275. matchingRules = append(matchingRules, ruleParts)
  276. }
  277. if len(matchingRules) == 0 {
  278. matchingRules = append(matchingRules, []string{"*"})
  279. } else {
  280. sort.Sort(byPriority(matchingRules))
  281. }
  282. prevailing := matchingRules[0]
  283. if prevailing[0][0] == '!' {
  284. prevailing = prevailing[1:]
  285. }
  286. if prevailing[0][0] == '*' {
  287. replaced := domainParts[len(domainParts)-len(prevailing)]
  288. prevailing = append([]string{replaced}, prevailing[1:]...)
  289. }
  290. return strings.Join(prevailing, ".")
  291. }
  292. type byPriority [][]string
  293. func (b byPriority) Len() int { return len(b) }
  294. func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
  295. func (b byPriority) Less(i, j int) bool {
  296. if b[i][0][0] == '!' {
  297. return true
  298. }
  299. if b[j][0][0] == '!' {
  300. return false
  301. }
  302. return len(b[i]) > len(b[j])
  303. }
  304. // eTLDPlusOneTestCases come from
  305. // http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt
  306. var eTLDPlusOneTestCases = []struct {
  307. domain, want string
  308. }{
  309. // Empty input.
  310. {"", ""},
  311. // Unlisted TLD.
  312. {"example", ""},
  313. {"example.example", "example.example"},
  314. {"b.example.example", "example.example"},
  315. {"a.b.example.example", "example.example"},
  316. // TLD with only 1 rule.
  317. {"biz", ""},
  318. {"domain.biz", "domain.biz"},
  319. {"b.domain.biz", "domain.biz"},
  320. {"a.b.domain.biz", "domain.biz"},
  321. // TLD with some 2-level rules.
  322. {"com", ""},
  323. {"example.com", "example.com"},
  324. {"b.example.com", "example.com"},
  325. {"a.b.example.com", "example.com"},
  326. {"uk.com", ""},
  327. {"example.uk.com", "example.uk.com"},
  328. {"b.example.uk.com", "example.uk.com"},
  329. {"a.b.example.uk.com", "example.uk.com"},
  330. {"test.ac", "test.ac"},
  331. // TLD with only 1 (wildcard) rule.
  332. {"cy", ""},
  333. {"c.cy", ""},
  334. {"b.c.cy", "b.c.cy"},
  335. {"a.b.c.cy", "b.c.cy"},
  336. // More complex TLD.
  337. {"jp", ""},
  338. {"test.jp", "test.jp"},
  339. {"www.test.jp", "test.jp"},
  340. {"ac.jp", ""},
  341. {"test.ac.jp", "test.ac.jp"},
  342. {"www.test.ac.jp", "test.ac.jp"},
  343. {"kyoto.jp", ""},
  344. {"test.kyoto.jp", "test.kyoto.jp"},
  345. {"ide.kyoto.jp", ""},
  346. {"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
  347. {"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
  348. {"c.kobe.jp", ""},
  349. {"b.c.kobe.jp", "b.c.kobe.jp"},
  350. {"a.b.c.kobe.jp", "b.c.kobe.jp"},
  351. {"city.kobe.jp", "city.kobe.jp"},
  352. {"www.city.kobe.jp", "city.kobe.jp"},
  353. // TLD with a wildcard rule and exceptions.
  354. {"ck", ""},
  355. {"test.ck", ""},
  356. {"b.test.ck", "b.test.ck"},
  357. {"a.b.test.ck", "b.test.ck"},
  358. {"www.ck", "www.ck"},
  359. {"www.www.ck", "www.ck"},
  360. // US K12.
  361. {"us", ""},
  362. {"test.us", "test.us"},
  363. {"www.test.us", "test.us"},
  364. {"ak.us", ""},
  365. {"test.ak.us", "test.ak.us"},
  366. {"www.test.ak.us", "test.ak.us"},
  367. {"k12.ak.us", ""},
  368. {"test.k12.ak.us", "test.k12.ak.us"},
  369. {"www.test.k12.ak.us", "test.k12.ak.us"},
  370. }
  371. func TestEffectiveTLDPlusOne(t *testing.T) {
  372. for _, tc := range eTLDPlusOneTestCases {
  373. got, _ := EffectiveTLDPlusOne(tc.domain)
  374. if got != tc.want {
  375. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  376. }
  377. }
  378. }