list_test.go 14 KB


  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package publicsuffix
  5. import (
  6. "sort"
  7. "strings"
  8. "testing"
  9. )
  10. func TestNodeLabel(t *testing.T) {
  11. for i, want := range nodeLabels {
  12. got := nodeLabel(uint32(i))
  13. if got != want {
  14. t.Errorf("%d: got %q, want %q", i, got, want)
  15. }
  16. }
  17. }
  18. func TestFind(t *testing.T) {
  19. testCases := []string{
  20. "",
  21. "a",
  22. "a0",
  23. "aaaa",
  24. "ao",
  25. "ap",
  26. "ar",
  27. "aro",
  28. "arp",
  29. "arpa",
  30. "arpaa",
  31. "arpb",
  32. "az",
  33. "b",
  34. "b0",
  35. "ba",
  36. "z",
  37. "zu",
  38. "zv",
  39. "zw",
  40. "zx",
  41. "zy",
  42. "zz",
  43. "zzzz",
  44. }
  45. for _, tc := range testCases {
  46. got := find(tc, 0, numTLD)
  47. want := notFound
  48. for i := uint32(0); i < numTLD; i++ {
  49. if tc == nodeLabel(i) {
  50. want = i
  51. break
  52. }
  53. }
  54. if got != want {
  55. t.Errorf("%q: got %d, want %d", tc, got, want)
  56. }
  57. }
  58. }
  59. func TestICANN(t *testing.T) {
  60. testCases := map[string]bool{
  61. "foo.org": true,
  62. "foo.co.uk": true,
  63. "foo.dyndns.org": false,
  64. "foo.go.dyndns.org": false,
  65. "foo.blogspot.co.uk": false,
  66. "foo.intranet": false,
  67. }
  68. for domain, want := range testCases {
  69. _, got := PublicSuffix(domain)
  70. if got != want {
  71. t.Errorf("%q: got %v, want %v", domain, got, want)
  72. }
  73. }
  74. }
  75. var publicSuffixTestCases = []struct {
  76. domain string
  77. wantPS string
  78. wantICANN bool
  79. }{
  80. // Empty string.
  81. {"", "", false},
  82. // The .ao rules are:
  83. // ao
  84. // ed.ao
  85. // gv.ao
  86. // og.ao
  87. // co.ao
  88. // pb.ao
  89. // it.ao
  90. {"ao", "ao", true},
  91. {"www.ao", "ao", true},
  92. {"pb.ao", "pb.ao", true},
  93. {"www.pb.ao", "pb.ao", true},
  94. {"www.xxx.yyy.zzz.pb.ao", "pb.ao", true},
  95. // The .ar rules are:
  96. // ar
  97. // com.ar
  98. // edu.ar
  99. // gob.ar
  100. // gov.ar
  101. // int.ar
  102. // mil.ar
  103. // net.ar
  104. // org.ar
  105. // tur.ar
  106. // blogspot.com.ar (in the PRIVATE DOMAIN section).
  107. {"ar", "ar", true},
  108. {"www.ar", "ar", true},
  109. {"nic.ar", "ar", true},
  110. {"www.nic.ar", "ar", true},
  111. {"com.ar", "com.ar", true},
  112. {"www.com.ar", "com.ar", true},
  113. {"blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN.
  114. {"www.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN.
  115. {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN.
  116. {"logspot.com.ar", "com.ar", true},
  117. {"zlogspot.com.ar", "com.ar", true},
  118. {"zblogspot.com.ar", "com.ar", true},
  119. // The .arpa rules are:
  120. // arpa
  121. // e164.arpa
  122. // in-addr.arpa
  123. // ip6.arpa
  124. // iris.arpa
  125. // uri.arpa
  126. // urn.arpa
  127. {"arpa", "arpa", true},
  128. {"www.arpa", "arpa", true},
  129. {"urn.arpa", "urn.arpa", true},
  130. {"www.urn.arpa", "urn.arpa", true},
  131. {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa", true},
  132. // The relevant {kobe,kyoto}.jp rules are:
  133. // jp
  134. // *.kobe.jp
  135. // !city.kobe.jp
  136. // kyoto.jp
  137. // ide.kyoto.jp
  138. {"jp", "jp", true},
  139. {"kobe.jp", "jp", true},
  140. {"c.kobe.jp", "c.kobe.jp", true},
  141. {"b.c.kobe.jp", "c.kobe.jp", true},
  142. {"a.b.c.kobe.jp", "c.kobe.jp", true},
  143. {"city.kobe.jp", "kobe.jp", true},
  144. {"www.city.kobe.jp", "kobe.jp", true},
  145. {"kyoto.jp", "kyoto.jp", true},
  146. {"test.kyoto.jp", "kyoto.jp", true},
  147. {"ide.kyoto.jp", "ide.kyoto.jp", true},
  148. {"b.ide.kyoto.jp", "ide.kyoto.jp", true},
  149. {"a.b.ide.kyoto.jp", "ide.kyoto.jp", true},
  150. // The .tw rules are:
  151. // tw
  152. // edu.tw
  153. // gov.tw
  154. // mil.tw
  155. // com.tw
  156. // net.tw
  157. // org.tw
  158. // idv.tw
  159. // game.tw
  160. // ebiz.tw
  161. // club.tw
  162. // 網路.tw (xn--zf0ao64a.tw)
  163. // 組織.tw (xn--uc0atv.tw)
  164. // 商業.tw (xn--czrw28b.tw)
  165. // blogspot.tw
  166. {"tw", "tw", true},
  167. {"aaa.tw", "tw", true},
  168. {"www.aaa.tw", "tw", true},
  169. {"xn--czrw28b.aaa.tw", "tw", true},
  170. {"edu.tw", "edu.tw", true},
  171. {"www.edu.tw", "edu.tw", true},
  172. {"xn--czrw28b.edu.tw", "edu.tw", true},
  173. {"xn--czrw28b.tw", "xn--czrw28b.tw", true},
  174. {"www.xn--czrw28b.tw", "xn--czrw28b.tw", true},
  175. {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw", true},
  176. {"xn--kpry57d.tw", "tw", true},
  177. // The .uk rules are:
  178. // uk
  179. // ac.uk
  180. // co.uk
  181. // gov.uk
  182. // ltd.uk
  183. // me.uk
  184. // net.uk
  185. // nhs.uk
  186. // org.uk
  187. // plc.uk
  188. // police.uk
  189. // *.sch.uk
  190. // blogspot.co.uk (in the PRIVATE DOMAIN section).
  191. {"uk", "uk", true},
  192. {"aaa.uk", "uk", true},
  193. {"www.aaa.uk", "uk", true},
  194. {"mod.uk", "uk", true},
  195. {"www.mod.uk", "uk", true},
  196. {"sch.uk", "uk", true},
  197. {"mod.sch.uk", "mod.sch.uk", true},
  198. {"www.sch.uk", "www.sch.uk", true},
  199. {"co.uk", "co.uk", true},
  200. {"www.co.uk", "co.uk", true},
  201. {"blogspot.co.uk", "blogspot.co.uk", false}, // PRIVATE DOMAIN.
  202. {"blogspot.nic.uk", "uk", true},
  203. {"blogspot.sch.uk", "blogspot.sch.uk", true},
  204. // The .рф rules are
  205. // рф (xn--p1ai)
  206. {"xn--p1ai", "xn--p1ai", true},
  207. {"aaa.xn--p1ai", "xn--p1ai", true},
  208. {"www.xxx.yyy.xn--p1ai", "xn--p1ai", true},
  209. // The .bd rules are:
  210. // *.bd
  211. {"bd", "bd", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†).
  212. {"www.bd", "www.bd", true},
  213. {"xxx.www.bd", "www.bd", true},
  214. {"zzz.bd", "zzz.bd", true},
  215. {"www.zzz.bd", "zzz.bd", true},
  216. {"www.xxx.yyy.zzz.bd", "zzz.bd", true},
  217. // The .ck rules are:
  218. // *.ck
  219. // !www.ck
  220. {"ck", "ck", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†).
  221. {"www.ck", "ck", true},
  222. {"xxx.www.ck", "ck", true},
  223. {"zzz.ck", "zzz.ck", true},
  224. {"www.zzz.ck", "zzz.ck", true},
  225. {"www.xxx.yyy.zzz.ck", "zzz.ck", true},
  226. // The .myjino.ru rules (in the PRIVATE DOMAIN section) are:
  227. // myjino.ru
  228. // *.hosting.myjino.ru
  229. // *.landing.myjino.ru
  230. // *.spectrum.myjino.ru
  231. // *.vps.myjino.ru
  232. {"myjino.ru", "myjino.ru", false},
  233. {"aaa.myjino.ru", "myjino.ru", false},
  234. {"bbb.ccc.myjino.ru", "myjino.ru", false},
  235. {"hosting.ddd.myjino.ru", "myjino.ru", false},
  236. {"landing.myjino.ru", "myjino.ru", false},
  237. {"www.landing.myjino.ru", "www.landing.myjino.ru", false},
  238. {"spectrum.vps.myjino.ru", "spectrum.vps.myjino.ru", false},
  239. // The .uberspace.de rules (in the PRIVATE DOMAIN section) are:
  240. // *.uberspace.de
  241. {"uberspace.de", "de", true}, // "de" is in the ICANN DOMAIN section. See footnote (†).
  242. {"aaa.uberspace.de", "aaa.uberspace.de", false},
  243. {"bbb.ccc.uberspace.de", "ccc.uberspace.de", false},
  244. // There are no .nosuchtld rules.
  245. {"nosuchtld", "nosuchtld", false},
  246. {"foo.nosuchtld", "nosuchtld", false},
  247. {"bar.foo.nosuchtld", "nosuchtld", false},
  248. // (†) There is some disagreement on how wildcards behave: what should the
  249. // public suffix of "platform.sh" be when both "*.platform.sh" and "sh" is
  250. // in the PSL, but "platform.sh" is not? Two possible answers are
  251. // "platform.sh" and "sh", there are valid arguments for either behavior,
  252. // and different browsers have implemented different behaviors.
  253. //
  254. // This implementation, Go's golang.org/x/net/publicsuffix, returns "sh",
  255. // the same as a literal interpretation of the "Formal Algorithm" section
  256. // of https://publicsuffix.org/list/
  257. //
  258. // Together, the TestPublicSuffix and TestSlowPublicSuffix tests check that
  259. // the Go implementation (func PublicSuffix in list.go) and the literal
  260. // interpretation (func slowPublicSuffix in list_test.go) produce the same
  261. // (golden) results on every test case in this publicSuffixTestCases slice,
  262. // including some "platform.sh" style cases.
  263. //
  264. // More discussion of "the platform.sh problem" is at:
  265. // - https://github.com/publicsuffix/list/issues/694
  266. // - https://bugzilla.mozilla.org/show_bug.cgi?id=1124625#c6
  267. // - https://wiki.mozilla.org/Public_Suffix_List/platform.sh_Problem
  268. }
  269. func BenchmarkPublicSuffix(b *testing.B) {
  270. for i := 0; i < b.N; i++ {
  271. for _, tc := range publicSuffixTestCases {
  272. List.PublicSuffix(tc.domain)
  273. }
  274. }
  275. }
  276. func TestPublicSuffix(t *testing.T) {
  277. for _, tc := range publicSuffixTestCases {
  278. gotPS, gotICANN := PublicSuffix(tc.domain)
  279. if gotPS != tc.wantPS || gotICANN != tc.wantICANN {
  280. t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN)
  281. }
  282. }
  283. }
  284. func TestSlowPublicSuffix(t *testing.T) {
  285. for _, tc := range publicSuffixTestCases {
  286. gotPS, gotICANN := slowPublicSuffix(tc.domain)
  287. if gotPS != tc.wantPS || gotICANN != tc.wantICANN {
  288. t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN)
  289. }
  290. }
  291. }
  292. func TestNumICANNRules(t *testing.T) {
  293. if numICANNRules <= 0 {
  294. t.Fatal("no ICANN rules")
  295. }
  296. if numICANNRules >= len(rules) {
  297. t.Fatal("no Private rules")
  298. }
  299. // Check the last ICANN and first Private rules. If the underlying public
  300. // suffix list changes, we may need to update these hard-coded checks.
  301. if got, want := rules[numICANNRules-1], "zuerich"; got != want {
  302. t.Errorf("last ICANN rule: got %q, wawnt %q", got, want)
  303. }
  304. if got, want := rules[numICANNRules], "cc.ua"; got != want {
  305. t.Errorf("first Private rule: got %q, wawnt %q", got, want)
  306. }
  307. }
  308. type slowPublicSuffixRule struct {
  309. ruleParts []string
  310. icann bool
  311. }
  312. // slowPublicSuffix implements the canonical (but O(number of rules)) public
  313. // suffix algorithm described at http://publicsuffix.org/list/.
  314. //
  315. // 1. Match domain against all rules and take note of the matching ones.
  316. // 2. If no rules match, the prevailing rule is "*".
  317. // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
  318. // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
  319. // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
  320. // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
  321. // 7. The registered or registrable domain is the public suffix plus one additional label.
  322. //
  323. // This function returns the public suffix, not the registrable domain, and so
  324. // it stops after step 6.
  325. func slowPublicSuffix(domain string) (string, bool) {
  326. match := func(rulePart, domainPart string) bool {
  327. switch rulePart[0] {
  328. case '*':
  329. return true
  330. case '!':
  331. return rulePart[1:] == domainPart
  332. }
  333. return rulePart == domainPart
  334. }
  335. domainParts := strings.Split(domain, ".")
  336. var matchingRules []slowPublicSuffixRule
  337. loop:
  338. for i, rule := range rules {
  339. ruleParts := strings.Split(rule, ".")
  340. if len(domainParts) < len(ruleParts) {
  341. continue
  342. }
  343. for i := range ruleParts {
  344. rulePart := ruleParts[len(ruleParts)-1-i]
  345. domainPart := domainParts[len(domainParts)-1-i]
  346. if !match(rulePart, domainPart) {
  347. continue loop
  348. }
  349. }
  350. matchingRules = append(matchingRules, slowPublicSuffixRule{
  351. ruleParts: ruleParts,
  352. icann: i < numICANNRules,
  353. })
  354. }
  355. if len(matchingRules) == 0 {
  356. matchingRules = append(matchingRules, slowPublicSuffixRule{
  357. ruleParts: []string{"*"},
  358. icann: false,
  359. })
  360. } else {
  361. sort.Sort(byPriority(matchingRules))
  362. }
  363. prevailing := matchingRules[0]
  364. if prevailing.ruleParts[0][0] == '!' {
  365. prevailing.ruleParts = prevailing.ruleParts[1:]
  366. }
  367. if prevailing.ruleParts[0][0] == '*' {
  368. replaced := domainParts[len(domainParts)-len(prevailing.ruleParts)]
  369. prevailing.ruleParts = append([]string{replaced}, prevailing.ruleParts[1:]...)
  370. }
  371. return strings.Join(prevailing.ruleParts, "."), prevailing.icann
  372. }
  373. type byPriority []slowPublicSuffixRule
  374. func (b byPriority) Len() int { return len(b) }
  375. func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
  376. func (b byPriority) Less(i, j int) bool {
  377. if b[i].ruleParts[0][0] == '!' {
  378. return true
  379. }
  380. if b[j].ruleParts[0][0] == '!' {
  381. return false
  382. }
  383. return len(b[i].ruleParts) > len(b[j].ruleParts)
  384. }
  385. // eTLDPlusOneTestCases come from
  386. // https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt
  387. var eTLDPlusOneTestCases = []struct {
  388. domain, want string
  389. }{
  390. // Empty input.
  391. {"", ""},
  392. // Unlisted TLD.
  393. {"example", ""},
  394. {"example.example", "example.example"},
  395. {"b.example.example", "example.example"},
  396. {"a.b.example.example", "example.example"},
  397. // TLD with only 1 rule.
  398. {"biz", ""},
  399. {"domain.biz", "domain.biz"},
  400. {"b.domain.biz", "domain.biz"},
  401. {"a.b.domain.biz", "domain.biz"},
  402. // TLD with some 2-level rules.
  403. {"com", ""},
  404. {"example.com", "example.com"},
  405. {"b.example.com", "example.com"},
  406. {"a.b.example.com", "example.com"},
  407. {"uk.com", ""},
  408. {"example.uk.com", "example.uk.com"},
  409. {"b.example.uk.com", "example.uk.com"},
  410. {"a.b.example.uk.com", "example.uk.com"},
  411. {"test.ac", "test.ac"},
  412. // TLD with only 1 (wildcard) rule.
  413. {"mm", ""},
  414. {"c.mm", ""},
  415. {"b.c.mm", "b.c.mm"},
  416. {"a.b.c.mm", "b.c.mm"},
  417. // More complex TLD.
  418. {"jp", ""},
  419. {"test.jp", "test.jp"},
  420. {"www.test.jp", "test.jp"},
  421. {"ac.jp", ""},
  422. {"test.ac.jp", "test.ac.jp"},
  423. {"www.test.ac.jp", "test.ac.jp"},
  424. {"kyoto.jp", ""},
  425. {"test.kyoto.jp", "test.kyoto.jp"},
  426. {"ide.kyoto.jp", ""},
  427. {"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
  428. {"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
  429. {"c.kobe.jp", ""},
  430. {"b.c.kobe.jp", "b.c.kobe.jp"},
  431. {"a.b.c.kobe.jp", "b.c.kobe.jp"},
  432. {"city.kobe.jp", "city.kobe.jp"},
  433. {"www.city.kobe.jp", "city.kobe.jp"},
  434. // TLD with a wildcard rule and exceptions.
  435. {"ck", ""},
  436. {"test.ck", ""},
  437. {"b.test.ck", "b.test.ck"},
  438. {"a.b.test.ck", "b.test.ck"},
  439. {"www.ck", "www.ck"},
  440. {"www.www.ck", "www.ck"},
  441. // US K12.
  442. {"us", ""},
  443. {"test.us", "test.us"},
  444. {"www.test.us", "test.us"},
  445. {"ak.us", ""},
  446. {"test.ak.us", "test.ak.us"},
  447. {"www.test.ak.us", "test.ak.us"},
  448. {"k12.ak.us", ""},
  449. {"test.k12.ak.us", "test.k12.ak.us"},
  450. {"www.test.k12.ak.us", "test.k12.ak.us"},
  451. // Punycoded IDN labels
  452. {"xn--85x722f.com.cn", "xn--85x722f.com.cn"},
  453. {"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
  454. {"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
  455. {"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"},
  456. {"xn--55qx5d.cn", ""},
  457. {"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
  458. {"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
  459. {"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"},
  460. {"xn--fiqs8s", ""},
  461. // Invalid input
  462. {".", ""},
  463. {"de.", ""},
  464. {".de", ""},
  465. {".com.au", ""},
  466. {"com.au.", ""},
  467. {"com..au", ""},
  468. }
  469. func TestEffectiveTLDPlusOne(t *testing.T) {
  470. for _, tc := range eTLDPlusOneTestCases {
  471. got, _ := EffectiveTLDPlusOne(tc.domain)
  472. if got != tc.want {
  473. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  474. }
  475. }
  476. }