idna9.0.0_test.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build !go1.10
  5. // +build !go1.10
  6. package idna
  7. import "testing"
  8. // TestLabelErrors tests strings returned in case of error. All results should
  9. // be identical to the reference implementation and can be verified at
  10. // https://unicode.org/cldr/utility/idna.jsp. The reference implementation,
  11. // however, seems to not display Bidi and ContextJ errors.
  12. //
  13. // In some cases the behavior of browsers is added as a comment. In all cases,
  14. // whenever a resolve search returns an error here, Chrome will treat the input
  15. // string as a search string (including those for Bidi and Context J errors),
  16. // unless noted otherwise.
  17. func TestLabelErrors(t *testing.T) {
  18. encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
  19. type kind struct {
  20. name string
  21. f func(string) (string, error)
  22. }
  23. punyA := kind{"PunycodeA", punycode.ToASCII}
  24. resolve := kind{"ResolveA", Lookup.ToASCII}
  25. display := kind{"ToUnicode", Display.ToUnicode}
  26. p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
  27. lengthU := kind{"CheckLengthU", p.ToUnicode}
  28. lengthA := kind{"CheckLengthA", p.ToASCII}
  29. p = New(MapForLookup(), StrictDomainName(false))
  30. std3 := kind{"STD3", p.ToASCII}
  31. p = New(MapForLookup(), CheckHyphens(false))
  32. hyphens := kind{"CheckHyphens", p.ToASCII}
  33. testCases := []struct {
  34. kind
  35. input string
  36. want string
  37. wantErr string
  38. }{
  39. {lengthU, "", "", "A4"}, // From UTS 46 conformance test.
  40. {lengthA, "", "", "A4"},
  41. {lengthU, "xn--", "", "A4"},
  42. {lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
  43. {lengthU, "xn--.foo", ".foo", "A4"},
  44. {lengthU, "foo.xn--.bar", "foo..bar", "A4"},
  45. {display, "xn--", "", ""},
  46. {display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
  47. {display, "xn--.foo", ".foo", ""},
  48. {display, "foo.xn--.bar", "foo..bar", ""},
  49. {lengthA, "a..b", "a..b", "A4"},
  50. {punyA, ".b", ".b", ""},
  51. // For backwards compatibility, the Punycode profile does not map runes.
  52. {punyA, "\u3002b", "xn--b-83t", ""},
  53. {punyA, "..b", "..b", ""},
  54. // Only strip leading empty labels for certain profiles. Stripping
  55. // leading empty labels here but not for "empty" punycode above seems
  56. // inconsistent, but seems to be applied by both the conformance test
  57. // and Chrome. So we turn it off by default, support it as an option,
  58. // and enable it in profiles where it seems commonplace.
  59. {lengthA, ".b", "b", ""},
  60. {lengthA, "\u3002b", "b", ""},
  61. {lengthA, "..b", "b", ""},
  62. {lengthA, "b..", "b..", ""},
  63. {resolve, "a..b", "a..b", ""},
  64. {resolve, ".b", "b", ""},
  65. {resolve, "\u3002b", "b", ""},
  66. {resolve, "..b", "b", ""},
  67. {resolve, "b..", "b..", ""},
  68. // Raw punycode
  69. {punyA, "", "", ""},
  70. {punyA, "*.foo.com", "*.foo.com", ""},
  71. {punyA, "Foo.com", "Foo.com", ""},
  72. // STD3 rules
  73. {display, "*.foo.com", "*.foo.com", "P1"},
  74. {std3, "*.foo.com", "*.foo.com", ""},
  75. // Hyphens
  76. {display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"},
  77. {hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""},
  78. {display, "-label-.com", "-label-.com", "V3"},
  79. {hyphens, "-label-.com", "-label-.com", ""},
  80. // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
  81. // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
  82. // lab9.be.
  83. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
  84. {display, "lab⒐be", "lab⒐be", "P1"},
  85. {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
  86. {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
  87. // Chrome 54.0 recognizes the error and treats this input verbatim as a
  88. // search string.
  89. // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
  90. // punycode on the result using transitional mapping.
  91. // Firefox 49.0.1 goes haywire on this string and prints a bunch of what
  92. // seems to be nested punycode encodings.
  93. {resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
  94. {display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
  95. {resolve, "a\u200Cb", "ab", ""},
  96. {display, "a\u200Cb", "a\u200Cb", "C"},
  97. {resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
  98. {display, "a\u200Cb", "a\u200Cb", "C"},
  99. {resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
  100. {
  101. // Notice how the string gets transformed, even with an error.
  102. // Chrome will use the original string if it finds an error, so not
  103. // the transformed one.
  104. display,
  105. "gr\ufecb\ufeae\ufe91\ufef2.de",
  106. "gr\u0639\u0631\u0628\u064a.de",
  107. "B",
  108. },
  109. {resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
  110. {display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
  111. // normalize input
  112. {resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
  113. {display, "a\u0323\u0322", "\u1ea1\u0322", ""},
  114. // Non-normalized strings are not normalized when they originate from
  115. // punycode. Despite the error, Chrome, Safari and Firefox will attempt
  116. // to look up the input punycode.
  117. {resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
  118. {display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
  119. }
  120. for _, tc := range testCases {
  121. doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
  122. }
  123. }