|
@@ -21,6 +21,7 @@ import (
|
|
|
"unicode/utf8"
|
|
"unicode/utf8"
|
|
|
|
|
|
|
|
"golang.org/x/text/secure/bidirule"
|
|
"golang.org/x/text/secure/bidirule"
|
|
|
|
|
+ "golang.org/x/text/unicode/bidi"
|
|
|
"golang.org/x/text/unicode/norm"
|
|
"golang.org/x/text/unicode/norm"
|
|
|
)
|
|
)
|
|
|
|
|
|
|
@@ -68,7 +69,7 @@ func VerifyDNSLength(verify bool) Option {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// RemoveLeadingDots removes leading label separators. Leading runes that map to
|
|
// RemoveLeadingDots removes leading label separators. Leading runes that map to
|
|
|
-// dots, such as U+3002, are removed as well.
|
|
|
|
|
|
|
+// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
|
|
|
//
|
|
//
|
|
|
// This is the behavior suggested by the UTS #46 and is adopted by some
|
|
// This is the behavior suggested by the UTS #46 and is adopted by some
|
|
|
// browsers.
|
|
// browsers.
|
|
@@ -92,7 +93,7 @@ func ValidateLabels(enable bool) Option {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// StrictDomainName limits the set of permissable ASCII characters to those
|
|
|
|
|
|
|
+// StrictDomainName limits the set of permissible ASCII characters to those
|
|
|
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
|
|
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
|
|
|
// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
|
|
// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
|
|
|
//
|
|
//
|
|
@@ -142,7 +143,6 @@ func MapForLookup() Option {
|
|
|
o.mapping = validateAndMap
|
|
o.mapping = validateAndMap
|
|
|
StrictDomainName(true)(o)
|
|
StrictDomainName(true)(o)
|
|
|
ValidateLabels(true)(o)
|
|
ValidateLabels(true)(o)
|
|
|
- RemoveLeadingDots(true)(o)
|
|
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -160,7 +160,7 @@ type options struct {
|
|
|
|
|
|
|
|
// mapping implements a validation and mapping step as defined in RFC 5895
|
|
// mapping implements a validation and mapping step as defined in RFC 5895
|
|
|
// or UTS 46, tailored to, for example, domain registration or lookup.
|
|
// or UTS 46, tailored to, for example, domain registration or lookup.
|
|
|
- mapping func(p *Profile, s string) (string, error)
|
|
|
|
|
|
|
+ mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
|
|
|
|
|
|
|
|
// bidirule, if specified, checks whether s conforms to the Bidi Rule
|
|
// bidirule, if specified, checks whether s conforms to the Bidi Rule
|
|
|
// defined in RFC 5893.
|
|
// defined in RFC 5893.
|
|
@@ -251,23 +251,21 @@ var (
|
|
|
|
|
|
|
|
punycode = &Profile{}
|
|
punycode = &Profile{}
|
|
|
lookup = &Profile{options{
|
|
lookup = &Profile{options{
|
|
|
- transitional: true,
|
|
|
|
|
- useSTD3Rules: true,
|
|
|
|
|
- validateLabels: true,
|
|
|
|
|
- removeLeadingDots: true,
|
|
|
|
|
- trie: trie,
|
|
|
|
|
- fromPuny: validateFromPunycode,
|
|
|
|
|
- mapping: validateAndMap,
|
|
|
|
|
- bidirule: bidirule.ValidString,
|
|
|
|
|
|
|
+ transitional: true,
|
|
|
|
|
+ useSTD3Rules: true,
|
|
|
|
|
+ validateLabels: true,
|
|
|
|
|
+ trie: trie,
|
|
|
|
|
+ fromPuny: validateFromPunycode,
|
|
|
|
|
+ mapping: validateAndMap,
|
|
|
|
|
+ bidirule: bidirule.ValidString,
|
|
|
}}
|
|
}}
|
|
|
display = &Profile{options{
|
|
display = &Profile{options{
|
|
|
- useSTD3Rules: true,
|
|
|
|
|
- validateLabels: true,
|
|
|
|
|
- removeLeadingDots: true,
|
|
|
|
|
- trie: trie,
|
|
|
|
|
- fromPuny: validateFromPunycode,
|
|
|
|
|
- mapping: validateAndMap,
|
|
|
|
|
- bidirule: bidirule.ValidString,
|
|
|
|
|
|
|
+ useSTD3Rules: true,
|
|
|
|
|
+ validateLabels: true,
|
|
|
|
|
+ trie: trie,
|
|
|
|
|
+ fromPuny: validateFromPunycode,
|
|
|
|
|
+ mapping: validateAndMap,
|
|
|
|
|
+ bidirule: bidirule.ValidString,
|
|
|
}}
|
|
}}
|
|
|
registration = &Profile{options{
|
|
registration = &Profile{options{
|
|
|
useSTD3Rules: true,
|
|
useSTD3Rules: true,
|
|
@@ -302,14 +300,16 @@ func (e runeError) Error() string {
|
|
|
// see http://www.unicode.org/reports/tr46.
|
|
// see http://www.unicode.org/reports/tr46.
|
|
|
func (p *Profile) process(s string, toASCII bool) (string, error) {
|
|
func (p *Profile) process(s string, toASCII bool) (string, error) {
|
|
|
var err error
|
|
var err error
|
|
|
|
|
+ var isBidi bool
|
|
|
if p.mapping != nil {
|
|
if p.mapping != nil {
|
|
|
- s, err = p.mapping(p, s)
|
|
|
|
|
|
|
+ s, isBidi, err = p.mapping(p, s)
|
|
|
}
|
|
}
|
|
|
// Remove leading empty labels.
|
|
// Remove leading empty labels.
|
|
|
if p.removeLeadingDots {
|
|
if p.removeLeadingDots {
|
|
|
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
|
|
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+ // TODO: allow for a quick check the tables data.
|
|
|
// It seems like we should only create this error on ToASCII, but the
|
|
// It seems like we should only create this error on ToASCII, but the
|
|
|
// UTS 46 conformance tests suggests we should always check this.
|
|
// UTS 46 conformance tests suggests we should always check this.
|
|
|
if err == nil && p.verifyDNSLength && s == "" {
|
|
if err == nil && p.verifyDNSLength && s == "" {
|
|
@@ -335,6 +335,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
|
|
|
// Spec says keep the old label.
|
|
// Spec says keep the old label.
|
|
|
continue
|
|
continue
|
|
|
}
|
|
}
|
|
|
|
|
+ isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
|
|
|
labels.set(u)
|
|
labels.set(u)
|
|
|
if err == nil && p.validateLabels {
|
|
if err == nil && p.validateLabels {
|
|
|
err = p.fromPuny(p, u)
|
|
err = p.fromPuny(p, u)
|
|
@@ -349,6 +350,14 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
|
|
|
err = p.validateLabel(label)
|
|
err = p.validateLabel(label)
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+ if isBidi && p.bidirule != nil && err == nil {
|
|
|
|
|
+ for labels.reset(); !labels.done(); labels.next() {
|
|
|
|
|
+ if !p.bidirule(labels.label()) {
|
|
|
|
|
+ err = &labelError{s, "B"}
|
|
|
|
|
+ break
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
if toASCII {
|
|
if toASCII {
|
|
|
for labels.reset(); !labels.done(); labels.next() {
|
|
for labels.reset(); !labels.done(); labels.next() {
|
|
|
label := labels.label()
|
|
label := labels.label()
|
|
@@ -380,16 +389,23 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
|
|
|
return s, err
|
|
return s, err
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-func normalize(p *Profile, s string) (string, error) {
|
|
|
|
|
- return norm.NFC.String(s), nil
|
|
|
|
|
|
|
+func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
|
|
|
|
|
+ // TODO: consider first doing a quick check to see if any of these checks
|
|
|
|
|
+ // need to be done. This will make it slower in the general case, but
|
|
|
|
|
+ // faster in the common case.
|
|
|
|
|
+ mapped = norm.NFC.String(s)
|
|
|
|
|
+ isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
|
|
|
|
|
+ return mapped, isBidi, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-func validateRegistration(p *Profile, s string) (string, error) {
|
|
|
|
|
|
|
+func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
|
|
|
|
|
+ // TODO: filter need for normalization in loop below.
|
|
|
if !norm.NFC.IsNormalString(s) {
|
|
if !norm.NFC.IsNormalString(s) {
|
|
|
- return s, &labelError{s, "V1"}
|
|
|
|
|
|
|
+ return s, false, &labelError{s, "V1"}
|
|
|
}
|
|
}
|
|
|
for i := 0; i < len(s); {
|
|
for i := 0; i < len(s); {
|
|
|
v, sz := trie.lookupString(s[i:])
|
|
v, sz := trie.lookupString(s[i:])
|
|
|
|
|
+ bidi = bidi || info(v).isBidi(s[i:])
|
|
|
// Copy bytes not copied so far.
|
|
// Copy bytes not copied so far.
|
|
|
switch p.simplify(info(v).category()) {
|
|
switch p.simplify(info(v).category()) {
|
|
|
// TODO: handle the NV8 defined in the Unicode idna data set to allow
|
|
// TODO: handle the NV8 defined in the Unicode idna data set to allow
|
|
@@ -397,21 +413,41 @@ func validateRegistration(p *Profile, s string) (string, error) {
|
|
|
case valid, deviation:
|
|
case valid, deviation:
|
|
|
case disallowed, mapped, unknown, ignored:
|
|
case disallowed, mapped, unknown, ignored:
|
|
|
r, _ := utf8.DecodeRuneInString(s[i:])
|
|
r, _ := utf8.DecodeRuneInString(s[i:])
|
|
|
- return s, runeError(r)
|
|
|
|
|
|
|
+ return s, bidi, runeError(r)
|
|
|
}
|
|
}
|
|
|
i += sz
|
|
i += sz
|
|
|
}
|
|
}
|
|
|
- return s, nil
|
|
|
|
|
|
|
+ return s, bidi, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-func validateAndMap(p *Profile, s string) (string, error) {
|
|
|
|
|
|
|
+func (c info) isBidi(s string) bool {
|
|
|
|
|
+ if !c.isMapped() {
|
|
|
|
|
+ return c&attributesMask == rtl
|
|
|
|
|
+ }
|
|
|
|
|
+ // TODO: also store bidi info for mapped data. This is possible, but a bit
|
|
|
|
|
+ // cumbersome and not for the common case.
|
|
|
|
|
+ p, _ := bidi.LookupString(s)
|
|
|
|
|
+ switch p.Class() {
|
|
|
|
|
+ case bidi.R, bidi.AL, bidi.AN:
|
|
|
|
|
+ return true
|
|
|
|
|
+ }
|
|
|
|
|
+ return false
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
|
|
|
var (
|
|
var (
|
|
|
- err error
|
|
|
|
|
- b []byte
|
|
|
|
|
- k int
|
|
|
|
|
|
|
+ b []byte
|
|
|
|
|
+ k int
|
|
|
)
|
|
)
|
|
|
|
|
+ // combinedInfoBits contains the or-ed bits of all runes. We use this
|
|
|
|
|
+ // to derive the mayNeedNorm bit later. This may trigger normalization
|
|
|
|
|
+ // overeagerly, but it will not do so in the common case. The end result
|
|
|
|
|
+ // is another 10% saving on BenchmarkProfile for the common case.
|
|
|
|
|
+ var combinedInfoBits info
|
|
|
for i := 0; i < len(s); {
|
|
for i := 0; i < len(s); {
|
|
|
v, sz := trie.lookupString(s[i:])
|
|
v, sz := trie.lookupString(s[i:])
|
|
|
|
|
+ combinedInfoBits |= info(v)
|
|
|
|
|
+ bidi = bidi || info(v).isBidi(s[i:])
|
|
|
start := i
|
|
start := i
|
|
|
i += sz
|
|
i += sz
|
|
|
// Copy bytes not copied so far.
|
|
// Copy bytes not copied so far.
|
|
@@ -438,7 +474,9 @@ func validateAndMap(p *Profile, s string) (string, error) {
|
|
|
}
|
|
}
|
|
|
if k == 0 {
|
|
if k == 0 {
|
|
|
// No changes so far.
|
|
// No changes so far.
|
|
|
- s = norm.NFC.String(s)
|
|
|
|
|
|
|
+ if combinedInfoBits&mayNeedNorm != 0 {
|
|
|
|
|
+ s = norm.NFC.String(s)
|
|
|
|
|
+ }
|
|
|
} else {
|
|
} else {
|
|
|
b = append(b, s[k:]...)
|
|
b = append(b, s[k:]...)
|
|
|
if norm.NFC.QuickSpan(b) != len(b) {
|
|
if norm.NFC.QuickSpan(b) != len(b) {
|
|
@@ -447,7 +485,7 @@ func validateAndMap(p *Profile, s string) (string, error) {
|
|
|
// TODO: the punycode converters require strings as input.
|
|
// TODO: the punycode converters require strings as input.
|
|
|
s = string(b)
|
|
s = string(b)
|
|
|
}
|
|
}
|
|
|
- return s, err
|
|
|
|
|
|
|
+ return s, bidi, err
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// A labelIter allows iterating over domain name labels.
|
|
// A labelIter allows iterating over domain name labels.
|
|
@@ -542,6 +580,8 @@ func validateFromPunycode(p *Profile, s string) error {
|
|
|
if !norm.NFC.IsNormalString(s) {
|
|
if !norm.NFC.IsNormalString(s) {
|
|
|
return &labelError{s, "V1"}
|
|
return &labelError{s, "V1"}
|
|
|
}
|
|
}
|
|
|
|
|
+ // TODO: detect whether string may have to be normalized in the following
|
|
|
|
|
+ // loop.
|
|
|
for i := 0; i < len(s); {
|
|
for i := 0; i < len(s); {
|
|
|
v, sz := trie.lookupString(s[i:])
|
|
v, sz := trie.lookupString(s[i:])
|
|
|
if c := p.simplify(info(v).category()); c != valid && c != deviation {
|
|
if c := p.simplify(info(v).category()); c != valid && c != deviation {
|
|
@@ -616,16 +656,13 @@ var joinStates = [][numJoinTypes]joinState{
|
|
|
|
|
|
|
|
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
|
|
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
|
|
|
// already implicitly satisfied by the overall implementation.
|
|
// already implicitly satisfied by the overall implementation.
|
|
|
-func (p *Profile) validateLabel(s string) error {
|
|
|
|
|
|
|
+func (p *Profile) validateLabel(s string) (err error) {
|
|
|
if s == "" {
|
|
if s == "" {
|
|
|
if p.verifyDNSLength {
|
|
if p.verifyDNSLength {
|
|
|
return &labelError{s, "A4"}
|
|
return &labelError{s, "A4"}
|
|
|
}
|
|
}
|
|
|
return nil
|
|
return nil
|
|
|
}
|
|
}
|
|
|
- if p.bidirule != nil && !p.bidirule(s) {
|
|
|
|
|
- return &labelError{s, "B"}
|
|
|
|
|
- }
|
|
|
|
|
if !p.validateLabels {
|
|
if !p.validateLabels {
|
|
|
return nil
|
|
return nil
|
|
|
}
|
|
}
|