profile.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package precis
  5. import (
  6. "bytes"
  7. "errors"
  8. "unicode/utf8"
  9. "golang.org/x/text/cases"
  10. "golang.org/x/text/language"
  11. "golang.org/x/text/runes"
  12. "golang.org/x/text/secure/bidirule"
  13. "golang.org/x/text/transform"
  14. "golang.org/x/text/width"
  15. )
  16. var (
  17. errDisallowedRune = errors.New("precis: disallowed rune encountered")
  18. )
  19. var dpTrie = newDerivedPropertiesTrie(0)
  20. // A Profile represents a set of rules for normalizing and validating strings in
  21. // the PRECIS framework.
  22. type Profile struct {
  23. options
  24. class *class
  25. }
  26. // NewIdentifier creates a new PRECIS profile based on the Identifier string
  27. // class. Profiles created from this class are suitable for use where safety is
  28. // prioritized over expressiveness like network identifiers, user accounts, chat
  29. // rooms, and file names.
  30. func NewIdentifier(opts ...Option) *Profile {
  31. return &Profile{
  32. options: getOpts(opts...),
  33. class: identifier,
  34. }
  35. }
  36. // NewFreeform creates a new PRECIS profile based on the Freeform string class.
  37. // Profiles created from this class are suitable for use where expressiveness is
  38. // prioritized over safety like passwords, and display-elements such as
  39. // nicknames in a chat room.
  40. func NewFreeform(opts ...Option) *Profile {
  41. return &Profile{
  42. options: getOpts(opts...),
  43. class: freeform,
  44. }
  45. }
  46. // NewRestrictedProfile creates a new PRECIS profile based on an existing
  47. // profile.
  48. // If the parent profile already had the Disallow option set, the new rule
  49. // overrides the parents rule.
  50. func NewRestrictedProfile(parent *Profile, disallow runes.Set) *Profile {
  51. p := *parent
  52. Disallow(disallow)(&p.options)
  53. return &p
  54. }
  55. // NewTransformer creates a new transform.Transformer that performs the PRECIS
  56. // preparation and enforcement steps on the given UTF-8 encoded bytes.
  57. func (p *Profile) NewTransformer() *Transformer {
  58. var ts []transform.Transformer
  59. // These transforms are applied in the order defined in
  60. // https://tools.ietf.org/html/rfc7564#section-7
  61. // RFC 8266 §2.1:
  62. //
  63. // Implementation experience has shown that applying the rules for the
  64. // Nickname profile is not an idempotent procedure for all code points.
  65. // Therefore, an implementation SHOULD apply the rules repeatedly until
  66. // the output string is stable; if the output string does not stabilize
  67. // after reapplying the rules three (3) additional times after the first
  68. // application, the implementation SHOULD terminate application of the
  69. // rules and reject the input string as invalid.
  70. //
  71. // There is no known string that will change indefinitely, so repeat 4 times
  72. // and rely on the Span method to keep things relatively performant.
  73. r := 1
  74. if p.options.repeat {
  75. r = 4
  76. }
  77. for ; r > 0; r-- {
  78. if p.options.foldWidth {
  79. ts = append(ts, width.Fold)
  80. }
  81. for _, f := range p.options.additional {
  82. ts = append(ts, f())
  83. }
  84. if p.options.cases != nil {
  85. ts = append(ts, p.options.cases)
  86. }
  87. ts = append(ts, p.options.norm)
  88. if p.options.bidiRule {
  89. ts = append(ts, bidirule.New())
  90. }
  91. ts = append(ts, &checker{p: p, allowed: p.Allowed()})
  92. }
  93. // TODO: Add the disallow empty rule with a dummy transformer?
  94. return &Transformer{transform.Chain(ts...)}
  95. }
  96. var errEmptyString = errors.New("precis: transformation resulted in empty string")
  97. type buffers struct {
  98. src []byte
  99. buf [2][]byte
  100. next int
  101. }
  102. func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
  103. n, err := t.Span(b.src, true)
  104. if err != transform.ErrEndOfSpan {
  105. return err
  106. }
  107. x := b.next & 1
  108. if b.buf[x] == nil {
  109. b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
  110. }
  111. span := append(b.buf[x][:0], b.src[:n]...)
  112. b.src, _, err = transform.Append(t, span, b.src[n:])
  113. b.buf[x] = b.src
  114. b.next++
  115. return err
  116. }
  117. // Pre-allocate transformers when possible. In some cases this avoids allocation.
  118. var (
  119. foldWidthT transform.SpanningTransformer = width.Fold
  120. lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
  121. )
  122. // TODO: make this a method on profile.
  123. func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
  124. b.src = src
  125. ascii := true
  126. for _, c := range src {
  127. if c >= utf8.RuneSelf {
  128. ascii = false
  129. break
  130. }
  131. }
  132. // ASCII fast path.
  133. if ascii {
  134. for _, f := range p.options.additional {
  135. if err = b.apply(f()); err != nil {
  136. return nil, err
  137. }
  138. }
  139. switch {
  140. case p.options.asciiLower || (comparing && p.options.ignorecase):
  141. for i, c := range b.src {
  142. if 'A' <= c && c <= 'Z' {
  143. b.src[i] = c ^ 1<<5
  144. }
  145. }
  146. case p.options.cases != nil:
  147. b.apply(p.options.cases)
  148. }
  149. c := checker{p: p}
  150. if _, err := c.span(b.src, true); err != nil {
  151. return nil, err
  152. }
  153. if p.disallow != nil {
  154. for _, c := range b.src {
  155. if p.disallow.Contains(rune(c)) {
  156. return nil, errDisallowedRune
  157. }
  158. }
  159. }
  160. if p.options.disallowEmpty && len(b.src) == 0 {
  161. return nil, errEmptyString
  162. }
  163. return b.src, nil
  164. }
  165. // These transforms are applied in the order defined in
  166. // https://tools.ietf.org/html/rfc8264#section-7
  167. r := 1
  168. if p.options.repeat {
  169. r = 4
  170. }
  171. for ; r > 0; r-- {
  172. // TODO: allow different width transforms options.
  173. if p.options.foldWidth || (p.options.ignorecase && comparing) {
  174. b.apply(foldWidthT)
  175. }
  176. for _, f := range p.options.additional {
  177. if err = b.apply(f()); err != nil {
  178. return nil, err
  179. }
  180. }
  181. if p.options.cases != nil {
  182. b.apply(p.options.cases)
  183. }
  184. if comparing && p.options.ignorecase {
  185. b.apply(lowerCaseT)
  186. }
  187. b.apply(p.norm)
  188. if p.options.bidiRule && !bidirule.Valid(b.src) {
  189. return nil, bidirule.ErrInvalid
  190. }
  191. c := checker{p: p}
  192. if _, err := c.span(b.src, true); err != nil {
  193. return nil, err
  194. }
  195. if p.disallow != nil {
  196. for i := 0; i < len(b.src); {
  197. r, size := utf8.DecodeRune(b.src[i:])
  198. if p.disallow.Contains(r) {
  199. return nil, errDisallowedRune
  200. }
  201. i += size
  202. }
  203. }
  204. if p.options.disallowEmpty && len(b.src) == 0 {
  205. return nil, errEmptyString
  206. }
  207. }
  208. return b.src, nil
  209. }
  210. // Append appends the result of applying p to src writing the result to dst.
  211. // It returns an error if the input string is invalid.
  212. func (p *Profile) Append(dst, src []byte) ([]byte, error) {
  213. var buf buffers
  214. b, err := buf.enforce(p, src, false)
  215. if err != nil {
  216. return nil, err
  217. }
  218. return append(dst, b...), nil
  219. }
  220. func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
  221. var buf buffers
  222. b, err := buf.enforce(p, b, key)
  223. if err != nil {
  224. return nil, err
  225. }
  226. if buf.next == 0 {
  227. c := make([]byte, len(b))
  228. copy(c, b)
  229. return c, nil
  230. }
  231. return b, nil
  232. }
  233. // Bytes returns a new byte slice with the result of applying the profile to b.
  234. func (p *Profile) Bytes(b []byte) ([]byte, error) {
  235. return processBytes(p, b, false)
  236. }
  237. // AppendCompareKey appends the result of applying p to src (including any
  238. // optional rules to make strings comparable or useful in a map key such as
  239. // applying lowercasing) writing the result to dst. It returns an error if the
  240. // input string is invalid.
  241. func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
  242. var buf buffers
  243. b, err := buf.enforce(p, src, true)
  244. if err != nil {
  245. return nil, err
  246. }
  247. return append(dst, b...), nil
  248. }
  249. func processString(p *Profile, s string, key bool) (string, error) {
  250. var buf buffers
  251. b, err := buf.enforce(p, []byte(s), key)
  252. if err != nil {
  253. return "", err
  254. }
  255. return string(b), nil
  256. }
  257. // String returns a string with the result of applying the profile to s.
  258. func (p *Profile) String(s string) (string, error) {
  259. return processString(p, s, false)
  260. }
  261. // CompareKey returns a string that can be used for comparison, hashing, or
  262. // collation.
  263. func (p *Profile) CompareKey(s string) (string, error) {
  264. return processString(p, s, true)
  265. }
  266. // Compare enforces both strings, and then compares them for bit-string identity
  267. // (byte-for-byte equality). If either string cannot be enforced, the comparison
  268. // is false.
  269. func (p *Profile) Compare(a, b string) bool {
  270. var buf buffers
  271. akey, err := buf.enforce(p, []byte(a), true)
  272. if err != nil {
  273. return false
  274. }
  275. buf = buffers{}
  276. bkey, err := buf.enforce(p, []byte(b), true)
  277. if err != nil {
  278. return false
  279. }
  280. return bytes.Compare(akey, bkey) == 0
  281. }
  282. // Allowed returns a runes.Set containing every rune that is a member of the
  283. // underlying profile's string class and not disallowed by any profile specific
  284. // rules.
  285. func (p *Profile) Allowed() runes.Set {
  286. if p.options.disallow != nil {
  287. return runes.Predicate(func(r rune) bool {
  288. return p.class.Contains(r) && !p.options.disallow.Contains(r)
  289. })
  290. }
  291. return p.class
  292. }
  293. type checker struct {
  294. p *Profile
  295. allowed runes.Set
  296. beforeBits catBitmap
  297. termBits catBitmap
  298. acceptBits catBitmap
  299. }
  300. func (c *checker) Reset() {
  301. c.beforeBits = 0
  302. c.termBits = 0
  303. c.acceptBits = 0
  304. }
  305. func (c *checker) span(src []byte, atEOF bool) (n int, err error) {
  306. for n < len(src) {
  307. e, sz := dpTrie.lookup(src[n:])
  308. d := categoryTransitions[category(e&catMask)]
  309. if sz == 0 {
  310. if !atEOF {
  311. return n, transform.ErrShortSrc
  312. }
  313. return n, errDisallowedRune
  314. }
  315. doLookAhead := false
  316. if property(e) < c.p.class.validFrom {
  317. if d.rule == nil {
  318. return n, errDisallowedRune
  319. }
  320. doLookAhead, err = d.rule(c.beforeBits)
  321. if err != nil {
  322. return n, err
  323. }
  324. }
  325. c.beforeBits &= d.keep
  326. c.beforeBits |= d.set
  327. if c.termBits != 0 {
  328. // We are currently in an unterminated lookahead.
  329. if c.beforeBits&c.termBits != 0 {
  330. c.termBits = 0
  331. c.acceptBits = 0
  332. } else if c.beforeBits&c.acceptBits == 0 {
  333. // Invalid continuation of the unterminated lookahead sequence.
  334. return n, errContext
  335. }
  336. }
  337. if doLookAhead {
  338. if c.termBits != 0 {
  339. // A previous lookahead run has not been terminated yet.
  340. return n, errContext
  341. }
  342. c.termBits = d.term
  343. c.acceptBits = d.accept
  344. }
  345. n += sz
  346. }
  347. if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 {
  348. err = errContext
  349. }
  350. return n, err
  351. }
  352. // TODO: we may get rid of this transform if transform.Chain understands
  353. // something like a Spanner interface.
  354. func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  355. short := false
  356. if len(dst) < len(src) {
  357. src = src[:len(dst)]
  358. atEOF = false
  359. short = true
  360. }
  361. nSrc, err = c.span(src, atEOF)
  362. nDst = copy(dst, src[:nSrc])
  363. if short && (err == transform.ErrShortSrc || err == nil) {
  364. err = transform.ErrShortDst
  365. }
  366. return nDst, nSrc, err
  367. }