123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // +build go1.10
- package precis
- import (
- "strings"
- "golang.org/x/text/runes"
- "golang.org/x/text/secure/bidirule"
- )
- var enforceTestCases = []struct {
- name string
- p *Profile
- cases []testCase
- }{
- {"Basic", NewFreeform(), []testCase{
- {"e\u0301\u031f", "\u00e9\u031f", nil}, // normalize
- }},
- {"Context Rule 1", NewFreeform(), []testCase{
- // Rule 1: zero-width non-joiner (U+200C)
- // From RFC:
- // False
- // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
- // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
- // (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
- //
- // Example runes for different joining types:
- // Join L: U+A872; PHAGS-PA SUPERFIXED LETTER RA
- // Join D: U+062C; HAH WITH DOT BELOW
- // Join T: U+0610; ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM
- // Join R: U+0627; ALEF
- // Virama: U+0A4D; GURMUKHI SIGN VIRAMA
- // Virama and Join T: U+0ACD; GUJARATI SIGN VIRAMA
- {"\u200c", "", errContext},
- {"\u200ca", "", errContext},
- {"a\u200c", "", errContext},
- {"\u200c\u0627", "", errContext}, // missing JoinStart
- {"\u062c\u200c", "", errContext}, // missing JoinEnd
- {"\u0610\u200c\u0610\u0627", "", errContext}, // missing JoinStart
- {"\u062c\u0610\u200c\u0610", "", errContext}, // missing JoinEnd
- // Variants of: D T* U+200c T* R
- {"\u062c\u200c\u0627", "\u062c\u200c\u0627", nil},
- {"\u062c\u0610\u200c\u0610\u0627", "\u062c\u0610\u200c\u0610\u0627", nil},
- {"\u062c\u0610\u0610\u200c\u0610\u0610\u0627", "\u062c\u0610\u0610\u200c\u0610\u0610\u0627", nil},
- {"\u062c\u0610\u200c\u0627", "\u062c\u0610\u200c\u0627", nil},
- {"\u062c\u200c\u0610\u0627", "\u062c\u200c\u0610\u0627", nil},
- // Variants of: L T* U+200c T* D
- {"\ua872\u200c\u062c", "\ua872\u200c\u062c", nil},
- {"\ua872\u0610\u200c\u0610\u062c", "\ua872\u0610\u200c\u0610\u062c", nil},
- {"\ua872\u0610\u0610\u200c\u0610\u0610\u062c", "\ua872\u0610\u0610\u200c\u0610\u0610\u062c", nil},
- {"\ua872\u0610\u200c\u062c", "\ua872\u0610\u200c\u062c", nil},
- {"\ua872\u200c\u0610\u062c", "\ua872\u200c\u0610\u062c", nil},
- // Virama
- {"\u0a4d\u200c", "\u0a4d\u200c", nil},
- {"\ua872\u0a4d\u200c", "\ua872\u0a4d\u200c", nil},
- {"\ua872\u0a4d\u0610\u200c", "", errContext},
- {"\ua872\u0a4d\u0610\u200c", "", errContext},
- {"\u0acd\u200c", "\u0acd\u200c", nil},
- {"\ua872\u0acd\u200c", "\ua872\u0acd\u200c", nil},
- {"\ua872\u0acd\u0610\u200c", "", errContext},
- {"\ua872\u0acd\u0610\u200c", "", errContext},
- // Using Virama as join T
- {"\ua872\u0acd\u200c\u062c", "\ua872\u0acd\u200c\u062c", nil},
- {"\ua872\u200c\u0acd\u062c", "\ua872\u200c\u0acd\u062c", nil},
- }},
- {"Context Rule 2", NewFreeform(), []testCase{
- // Rule 2: zero-width joiner (U+200D)
- {"\u200d", "", errContext},
- {"\u200da", "", errContext},
- {"a\u200d", "", errContext},
- {"\u0a4d\u200d", "\u0a4d\u200d", nil},
- {"\ua872\u0a4d\u200d", "\ua872\u0a4d\u200d", nil},
- {"\u0a4da\u200d", "", errContext},
- }},
- {"Context Rule 3", NewFreeform(), []testCase{
- // Rule 3: middle dot
- {"·", "", errContext},
- {"l·", "", errContext},
- {"·l", "", errContext},
- {"a·", "", errContext},
- {"l·a", "", errContext},
- {"a·a", "", errContext},
- {"l·l", "l·l", nil},
- {"al·la", "al·la", nil},
- }},
- {"Context Rule 4", NewFreeform(), []testCase{
- // Rule 4: Greek lower numeral U+0375
- {"͵", "", errContext},
- {"͵a", "", errContext},
- {"α͵", "", errContext},
- {"͵α", "͵α", nil},
- {"α͵α", "α͵α", nil},
- {"͵͵α", "͵͵α", nil}, // The numeric sign is itself Greek.
- {"α͵͵α", "α͵͵α", nil},
- {"α͵͵", "", errContext},
- {"α͵͵a", "", errContext},
- }},
- {"Context Rule 5+6", NewFreeform(), []testCase{
- // Rule 5+6: Hebrew preceding
- // U+05f3: Geresh
- {"׳", "", errContext},
- {"׳ה", "", errContext},
- {"a׳b", "", errContext},
- {"ש׳", "ש׳", nil}, // U+05e9 U+05f3
- {"ש׳׳׳", "ש׳׳׳", nil}, // U+05e9 U+05f3
- // U+05f4: Gershayim
- {"״", "", errContext},
- {"״ה", "", errContext},
- {"a״b", "", errContext},
- {"ש״", "ש״", nil}, // U+05e9 U+05f4
- {"ש״״״", "ש״״״", nil}, // U+05e9 U+05f4
- {"aש״״״", "aש״״״", nil}, // U+05e9 U+05f4
- }},
- {"Context Rule 7", NewFreeform(), []testCase{
- // Rule 7: Katakana middle Dot
- {"・", "", errContext},
- {"abc・", "", errContext},
- {"・def", "", errContext},
- {"abc・def", "", errContext},
- {"aヅc・def", "aヅc・def", nil},
- {"abc・dぶf", "abc・dぶf", nil},
- {"⺐bc・def", "⺐bc・def", nil},
- }},
- {"Context Rule 8+9", NewFreeform(), []testCase{
- // Rule 8+9: Arabic Indic Digit
- {"١٢٣٤٥۶", "", errContext},
- {"۱۲۳۴۵٦", "", errContext},
- {"١٢٣٤٥", "١٢٣٤٥", nil},
- {"۱۲۳۴۵", "۱۲۳۴۵", nil},
- }},
- {"Nickname", Nickname, []testCase{
- {" Swan of Avon ", "Swan of Avon", nil},
- {"", "", errEmptyString},
- {" ", "", errEmptyString},
- {" ", "", errEmptyString},
- {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil},
- {"Foo", "Foo", nil},
- {"foo", "foo", nil},
- {"Foo Bar", "Foo Bar", nil},
- {"foo bar", "foo bar", nil},
- {"\u03A3", "\u03A3", nil},
- {"\u03C3", "\u03C3", nil},
- // Greek final sigma is left as is (do not fold!)
- {"\u03C2", "\u03C2", nil},
- {"\u265A", "♚", nil},
- {"Richard \u2163", "Richard IV", nil},
- {"\u212B", "Å", nil},
- {"\uFB00", "ff", nil}, // because of NFKC
- {"שa", "שa", nil}, // no bidi rule
- {"동일조건변경허락", "동일조건변경허락", nil},
- }},
- {"OpaqueString", OpaqueString, []testCase{
- {" Swan of Avon ", " Swan of Avon ", nil},
- {"", "", errEmptyString},
- {" ", " ", nil},
- {" ", " ", nil},
- {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil},
- {"Foo", "Foo", nil},
- {"foo", "foo", nil},
- {"Foo Bar", "Foo Bar", nil},
- {"foo bar", "foo bar", nil},
- {"\u03C3", "\u03C3", nil},
- {"Richard \u2163", "Richard \u2163", nil},
- {"\u212B", "Å", nil},
- {"Jack of \u2666s", "Jack of \u2666s", nil},
- {"my cat is a \u0009by", "", errDisallowedRune},
- {"שa", "שa", nil}, // no bidi rule
- }},
- {"UsernameCaseMapped", UsernameCaseMapped, []testCase{
- // TODO: Should this work?
- // {UsernameCaseMapped, "", "", errDisallowedRune},
- {"juliet@example.com", "juliet@example.com", nil},
- {"fussball", "fussball", nil},
- {"fu\u00DFball", "fu\u00DFball", nil},
- {"\u03C0", "\u03C0", nil},
- {"\u03A3", "\u03C3", nil},
- {"\u03C3", "\u03C3", nil},
- // Greek final sigma is left as is (do not fold!)
- {"\u03C2", "\u03C2", nil},
- {"\u0049", "\u0069", nil},
- {"\u0049", "\u0069", nil},
- {"\u03D2", "", errDisallowedRune},
- {"\u03B0", "\u03B0", nil},
- {"foo bar", "", errDisallowedRune},
- {"♚", "", bidirule.ErrInvalid},
- {"\u007E", "~", nil},
- {"a", "a", nil},
- {"!", "!", nil},
- {"²", "", bidirule.ErrInvalid},
- {"\t", "", errDisallowedRune},
- {"\n", "", errDisallowedRune},
- {"\u26D6", "", bidirule.ErrInvalid},
- {"\u26FF", "", bidirule.ErrInvalid},
- {"\uFB00", "", errDisallowedRune},
- {"\u1680", "", bidirule.ErrInvalid},
- {" ", "", errDisallowedRune},
- {" ", "", errDisallowedRune},
- {"\u01C5", "", errDisallowedRune},
- {"\u16EE", "", errDisallowedRune}, // Nl RUNIC ARLAUG SYMBOL
- {"\u0488", "", bidirule.ErrInvalid}, // Me COMBINING CYRILLIC HUNDRED THOUSANDS SIGN
- {"\u212B", "\u00e5", nil}, // Angstrom sign, NFC -> U+00E5
- {"A\u030A", "å", nil}, // A + ring
- {"\u00C5", "å", nil}, // A with ring
- {"\u00E7", "ç", nil}, // c cedille
- {"\u0063\u0327", "ç", nil}, // c + cedille
- {"\u0158", "ř", nil},
- {"\u0052\u030C", "ř", nil},
- {"\u1E61", "\u1E61", nil}, // LATIN SMALL LETTER S WITH DOT ABOVE
- // Confusable characters ARE allowed and should NOT be mapped.
- {"\u0410", "\u0430", nil}, // CYRILLIC CAPITAL LETTER A
- // Full width should be mapped to the canonical decomposition.
- {"AB", "ab", nil},
- {"שc", "", bidirule.ErrInvalid}, // bidi rule
- }},
- {"UsernameCasePreserved", UsernameCasePreserved, []testCase{
- {"ABC", "ABC", nil},
- {"AB", "AB", nil},
- {"שc", "", bidirule.ErrInvalid}, // bidi rule
- {"\uFB00", "", errDisallowedRune},
- {"\u212B", "\u00c5", nil}, // Angstrom sign, NFC -> U+00E5
- {"ẛ", "", errDisallowedRune}, // LATIN SMALL LETTER LONG S WITH DOT ABOVE
- }},
- {"UsernameCaseMappedRestricted", NewRestrictedProfile(UsernameCaseMapped, runes.Predicate(func(r rune) bool {
- return strings.ContainsRune(`@`, r)
- })), []testCase{
- {"juliet@example.com", "", errDisallowedRune},
- {"\u0049", "\u0069", nil},
- }},
- }
|