list_test.go 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package publicsuffix
  5. import (
  6. "sort"
  7. "strings"
  8. "testing"
  9. )
  10. func TestNodeLabel(t *testing.T) {
  11. for i, want := range nodeLabels {
  12. got := nodeLabel(uint32(i))
  13. if got != want {
  14. t.Errorf("%d: got %q, want %q", i, got, want)
  15. }
  16. }
  17. }
  18. func TestFind(t *testing.T) {
  19. testCases := []string{
  20. "",
  21. "a",
  22. "a0",
  23. "aaaa",
  24. "ao",
  25. "ap",
  26. "ar",
  27. "aro",
  28. "arp",
  29. "arpa",
  30. "arpaa",
  31. "arpb",
  32. "az",
  33. "b",
  34. "b0",
  35. "ba",
  36. "z",
  37. "zu",
  38. "zv",
  39. "zw",
  40. "zx",
  41. "zy",
  42. "zz",
  43. "zzzz",
  44. }
  45. for _, tc := range testCases {
  46. got := find(tc, 0, numTLD)
  47. want := notFound
  48. for i := uint32(0); i < numTLD; i++ {
  49. if tc == nodeLabel(i) {
  50. want = i
  51. break
  52. }
  53. }
  54. if got != want {
  55. t.Errorf("%q: got %d, want %d", tc, got, want)
  56. }
  57. }
  58. }
  59. func TestICANN(t *testing.T) {
  60. testCases := map[string]bool{
  61. "foo.org": true,
  62. "foo.co.uk": true,
  63. "foo.dyndns.org": false,
  64. "foo.go.dyndns.org": false,
  65. "foo.blogspot.co.uk": false,
  66. "foo.intranet": false,
  67. }
  68. for domain, want := range testCases {
  69. _, got := PublicSuffix(domain)
  70. if got != want {
  71. t.Errorf("%q: got %v, want %v", domain, got, want)
  72. }
  73. }
  74. }
  75. var publicSuffixTestCases = []struct {
  76. domain, want string
  77. }{
  78. // Empty string.
  79. {"", ""},
  80. // The .ao rules are:
  81. // ao
  82. // ed.ao
  83. // gv.ao
  84. // og.ao
  85. // co.ao
  86. // pb.ao
  87. // it.ao
  88. {"ao", "ao"},
  89. {"www.ao", "ao"},
  90. {"pb.ao", "pb.ao"},
  91. {"www.pb.ao", "pb.ao"},
  92. {"www.xxx.yyy.zzz.pb.ao", "pb.ao"},
  93. // The .ar rules are:
  94. // *.ar
  95. // !congresodelalengua3.ar
  96. // !educ.ar
  97. // !gobiernoelectronico.ar
  98. // !mecon.ar
  99. // !nacion.ar
  100. // !nic.ar
  101. // !promocion.ar
  102. // !retina.ar
  103. // !uba.ar
  104. // blogspot.com.ar
  105. {"ar", "ar"},
  106. {"www.ar", "www.ar"},
  107. {"nic.ar", "ar"},
  108. {"www.nic.ar", "ar"},
  109. {"com.ar", "com.ar"},
  110. {"www.com.ar", "com.ar"},
  111. {"blogspot.com.ar", "blogspot.com.ar"},
  112. {"www.blogspot.com.ar", "blogspot.com.ar"},
  113. {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar"},
  114. {"logspot.com.ar", "com.ar"},
  115. {"zlogspot.com.ar", "com.ar"},
  116. {"zblogspot.com.ar", "com.ar"},
  117. // The .arpa rules are:
  118. // e164.arpa
  119. // in-addr.arpa
  120. // ip6.arpa
  121. // iris.arpa
  122. // uri.arpa
  123. // urn.arpa
  124. {"arpa", "arpa"},
  125. {"www.arpa", "arpa"},
  126. {"urn.arpa", "urn.arpa"},
  127. {"www.urn.arpa", "urn.arpa"},
  128. {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa"},
  129. // The relevant {kobe,kyoto}.jp rules are:
  130. // jp
  131. // *.kobe.jp
  132. // !city.kobe.jp
  133. // kyoto.jp
  134. // ide.kyoto.jp
  135. {"jp", "jp"},
  136. {"kobe.jp", "jp"},
  137. {"c.kobe.jp", "c.kobe.jp"},
  138. {"b.c.kobe.jp", "c.kobe.jp"},
  139. {"a.b.c.kobe.jp", "c.kobe.jp"},
  140. {"city.kobe.jp", "kobe.jp"},
  141. {"www.city.kobe.jp", "kobe.jp"},
  142. {"kyoto.jp", "kyoto.jp"},
  143. {"test.kyoto.jp", "kyoto.jp"},
  144. {"ide.kyoto.jp", "ide.kyoto.jp"},
  145. {"b.ide.kyoto.jp", "ide.kyoto.jp"},
  146. {"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
  147. // The .tw rules are:
  148. // tw
  149. // edu.tw
  150. // gov.tw
  151. // mil.tw
  152. // com.tw
  153. // net.tw
  154. // org.tw
  155. // idv.tw
  156. // game.tw
  157. // ebiz.tw
  158. // club.tw
  159. // 網路.tw (xn--zf0ao64a.tw)
  160. // 組織.tw (xn--uc0atv.tw)
  161. // 商業.tw (xn--czrw28b.tw)
  162. // blogspot.tw
  163. {"tw", "tw"},
  164. {"aaa.tw", "tw"},
  165. {"www.aaa.tw", "tw"},
  166. {"xn--czrw28b.aaa.tw", "tw"},
  167. {"edu.tw", "edu.tw"},
  168. {"www.edu.tw", "edu.tw"},
  169. {"xn--czrw28b.edu.tw", "edu.tw"},
  170. {"xn--czrw28b.tw", "xn--czrw28b.tw"},
  171. {"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
  172. {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
  173. {"xn--kpry57d.tw", "tw"},
  174. // The .uk rules are:
  175. // *.uk
  176. // *.sch.uk
  177. // !bl.uk
  178. // !british-library.uk
  179. // !jet.uk
  180. // !mod.uk
  181. // !national-library-scotland.uk
  182. // !nel.uk
  183. // !nic.uk
  184. // !nls.uk
  185. // !parliament.uk
  186. // blogspot.co.uk
  187. {"uk", "uk"},
  188. {"aaa.uk", "aaa.uk"},
  189. {"www.aaa.uk", "aaa.uk"},
  190. {"mod.uk", "uk"},
  191. {"www.mod.uk", "uk"},
  192. {"sch.uk", "sch.uk"},
  193. {"mod.sch.uk", "mod.sch.uk"},
  194. {"www.sch.uk", "www.sch.uk"},
  195. {"blogspot.co.uk", "blogspot.co.uk"},
  196. {"blogspot.nic.uk", "uk"},
  197. {"blogspot.sch.uk", "blogspot.sch.uk"},
  198. // The .рф rules are
  199. // рф (xn--p1ai)
  200. {"xn--p1ai", "xn--p1ai"},
  201. {"aaa.xn--p1ai", "xn--p1ai"},
  202. {"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
  203. // The .zw rules are:
  204. // *.zw
  205. {"zw", "zw"},
  206. {"www.zw", "www.zw"},
  207. {"zzz.zw", "zzz.zw"},
  208. {"www.zzz.zw", "zzz.zw"},
  209. {"www.xxx.yyy.zzz.zw", "zzz.zw"},
  210. // There are no .nosuchtld rules.
  211. {"nosuchtld", "nosuchtld"},
  212. {"foo.nosuchtld", "nosuchtld"},
  213. {"bar.foo.nosuchtld", "nosuchtld"},
  214. }
  215. func BenchmarkPublicSuffix(b *testing.B) {
  216. for i := 0; i < b.N; i++ {
  217. for _, tc := range publicSuffixTestCases {
  218. List.PublicSuffix(tc.domain)
  219. }
  220. }
  221. }
  222. func TestPublicSuffix(t *testing.T) {
  223. for _, tc := range publicSuffixTestCases {
  224. got := List.PublicSuffix(tc.domain)
  225. if got != tc.want {
  226. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  227. }
  228. }
  229. }
  230. func TestSlowPublicSuffix(t *testing.T) {
  231. for _, tc := range publicSuffixTestCases {
  232. got := slowPublicSuffix(tc.domain)
  233. if got != tc.want {
  234. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  235. }
  236. }
  237. }
  238. // slowPublicSuffix implements the canonical (but O(number of rules)) public
  239. // suffix algorithm described at http://publicsuffix.org/list/.
  240. //
  241. // 1. Match domain against all rules and take note of the matching ones.
  242. // 2. If no rules match, the prevailing rule is "*".
  243. // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
  244. // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
  245. // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
  246. // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
  247. // 7. The registered or registrable domain is the public suffix plus one additional label.
  248. //
  249. // This function returns the public suffix, not the registrable domain, and so
  250. // it stops after step 6.
  251. func slowPublicSuffix(domain string) string {
  252. match := func(rulePart, domainPart string) bool {
  253. switch rulePart[0] {
  254. case '*':
  255. return true
  256. case '!':
  257. return rulePart[1:] == domainPart
  258. }
  259. return rulePart == domainPart
  260. }
  261. domainParts := strings.Split(domain, ".")
  262. var matchingRules [][]string
  263. loop:
  264. for _, rule := range rules {
  265. ruleParts := strings.Split(rule, ".")
  266. if len(domainParts) < len(ruleParts) {
  267. continue
  268. }
  269. for i := range ruleParts {
  270. rulePart := ruleParts[len(ruleParts)-1-i]
  271. domainPart := domainParts[len(domainParts)-1-i]
  272. if !match(rulePart, domainPart) {
  273. continue loop
  274. }
  275. }
  276. matchingRules = append(matchingRules, ruleParts)
  277. }
  278. if len(matchingRules) == 0 {
  279. matchingRules = append(matchingRules, []string{"*"})
  280. } else {
  281. sort.Sort(byPriority(matchingRules))
  282. }
  283. prevailing := matchingRules[0]
  284. if prevailing[0][0] == '!' {
  285. prevailing = prevailing[1:]
  286. }
  287. if prevailing[0][0] == '*' {
  288. replaced := domainParts[len(domainParts)-len(prevailing)]
  289. prevailing = append([]string{replaced}, prevailing[1:]...)
  290. }
  291. return strings.Join(prevailing, ".")
  292. }
  293. type byPriority [][]string
  294. func (b byPriority) Len() int { return len(b) }
  295. func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
  296. func (b byPriority) Less(i, j int) bool {
  297. if b[i][0][0] == '!' {
  298. return true
  299. }
  300. if b[j][0][0] == '!' {
  301. return false
  302. }
  303. return len(b[i]) > len(b[j])
  304. }
  305. // eTLDPlusOneTestCases come from
  306. // http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt
  307. var eTLDPlusOneTestCases = []struct {
  308. domain, want string
  309. }{
  310. // Empty input.
  311. {"", ""},
  312. // Unlisted TLD.
  313. {"example", ""},
  314. {"example.example", "example.example"},
  315. {"b.example.example", "example.example"},
  316. {"a.b.example.example", "example.example"},
  317. // TLD with only 1 rule.
  318. {"biz", ""},
  319. {"domain.biz", "domain.biz"},
  320. {"b.domain.biz", "domain.biz"},
  321. {"a.b.domain.biz", "domain.biz"},
  322. // TLD with some 2-level rules.
  323. {"com", ""},
  324. {"example.com", "example.com"},
  325. {"b.example.com", "example.com"},
  326. {"a.b.example.com", "example.com"},
  327. {"uk.com", ""},
  328. {"example.uk.com", "example.uk.com"},
  329. {"b.example.uk.com", "example.uk.com"},
  330. {"a.b.example.uk.com", "example.uk.com"},
  331. {"test.ac", "test.ac"},
  332. // TLD with only 1 (wildcard) rule.
  333. {"cy", ""},
  334. {"c.cy", ""},
  335. {"b.c.cy", "b.c.cy"},
  336. {"a.b.c.cy", "b.c.cy"},
  337. // More complex TLD.
  338. {"jp", ""},
  339. {"test.jp", "test.jp"},
  340. {"www.test.jp", "test.jp"},
  341. {"ac.jp", ""},
  342. {"test.ac.jp", "test.ac.jp"},
  343. {"www.test.ac.jp", "test.ac.jp"},
  344. {"kyoto.jp", ""},
  345. {"test.kyoto.jp", "test.kyoto.jp"},
  346. {"ide.kyoto.jp", ""},
  347. {"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
  348. {"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
  349. {"c.kobe.jp", ""},
  350. {"b.c.kobe.jp", "b.c.kobe.jp"},
  351. {"a.b.c.kobe.jp", "b.c.kobe.jp"},
  352. {"city.kobe.jp", "city.kobe.jp"},
  353. {"www.city.kobe.jp", "city.kobe.jp"},
  354. // TLD with a wildcard rule and exceptions.
  355. {"om", ""},
  356. {"test.om", ""},
  357. {"b.test.om", "b.test.om"},
  358. {"a.b.test.om", "b.test.om"},
  359. {"songfest.om", "songfest.om"},
  360. {"www.songfest.om", "songfest.om"},
  361. // US K12.
  362. {"us", ""},
  363. {"test.us", "test.us"},
  364. {"www.test.us", "test.us"},
  365. {"ak.us", ""},
  366. {"test.ak.us", "test.ak.us"},
  367. {"www.test.ak.us", "test.ak.us"},
  368. {"k12.ak.us", ""},
  369. {"test.k12.ak.us", "test.k12.ak.us"},
  370. {"www.test.k12.ak.us", "test.k12.ak.us"},
  371. }
  372. func TestEffectiveTLDPlusOne(t *testing.T) {
  373. for _, tc := range eTLDPlusOneTestCases {
  374. got, _ := EffectiveTLDPlusOne(tc.domain)
  375. if got != tc.want {
  376. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  377. }
  378. }
  379. }