cases.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:generate go run gen.go gen_trieval.go
  5. // Package cases provides general and language-specific case mappers.
  6. package cases // import "golang.org/x/text/cases"
  7. import (
  8. "golang.org/x/text/language"
  9. "golang.org/x/text/transform"
  10. )
  11. // References:
  12. // - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
  13. // - https://www.unicode.org/reports/tr29/
  14. // - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
  15. // - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
  16. // - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
  17. // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
  18. // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
  19. // - http://userguide.icu-project.org/transforms/casemappings
  20. // TODO:
  21. // - Case folding
  22. // - Wide and Narrow?
  23. // - Segmenter option for title casing.
  24. // - ASCII fast paths
  25. // - Encode Soft-Dotted property within trie somehow.
  26. // A Caser transforms given input to a certain case. It implements
  27. // transform.Transformer.
  28. //
  29. // A Caser may be stateful and should therefore not be shared between
  30. // goroutines.
  31. type Caser struct {
  32. t transform.SpanningTransformer
  33. }
  34. // Bytes returns a new byte slice with the result of converting b to the case
  35. // form implemented by c.
  36. func (c Caser) Bytes(b []byte) []byte {
  37. b, _, _ = transform.Bytes(c.t, b)
  38. return b
  39. }
  40. // String returns a string with the result of transforming s to the case form
  41. // implemented by c.
  42. func (c Caser) String(s string) string {
  43. s, _, _ = transform.String(c.t, s)
  44. return s
  45. }
  46. // Reset resets the Caser to be reused for new input after a previous call to
  47. // Transform.
  48. func (c Caser) Reset() { c.t.Reset() }
  49. // Transform implements the transform.Transformer interface and transforms the
  50. // given input to the case form implemented by c.
  51. func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  52. return c.t.Transform(dst, src, atEOF)
  53. }
  54. // Span implements the transform.SpanningTransformer interface.
  55. func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
  56. return c.t.Span(src, atEOF)
  57. }
  58. // Upper returns a Caser for language-specific uppercasing.
  59. func Upper(t language.Tag, opts ...Option) Caser {
  60. return Caser{makeUpper(t, getOpts(opts...))}
  61. }
  62. // Lower returns a Caser for language-specific lowercasing.
  63. func Lower(t language.Tag, opts ...Option) Caser {
  64. return Caser{makeLower(t, getOpts(opts...))}
  65. }
  66. // Title returns a Caser for language-specific title casing. It uses an
  67. // approximation of the default Unicode Word Break algorithm.
  68. func Title(t language.Tag, opts ...Option) Caser {
  69. return Caser{makeTitle(t, getOpts(opts...))}
  70. }
  71. // Fold returns a Caser that implements Unicode case folding. The returned Caser
  72. // is stateless and safe to use concurrently by multiple goroutines.
  73. //
  74. // Case folding does not normalize the input and may not preserve a normal form.
  75. // Use the collate or search package for more convenient and linguistically
  76. // sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
  77. // where security aspects are a concern.
  78. func Fold(opts ...Option) Caser {
  79. return Caser{makeFold(getOpts(opts...))}
  80. }
  81. // An Option is used to modify the behavior of a Caser.
  82. type Option func(o options) options
  83. // TODO: consider these options to take a boolean as well, like FinalSigma.
  84. // The advantage of using this approach is that other providers of a lower-case
  85. // algorithm could set different defaults by prefixing a user-provided slice
  86. // of options with their own. This is handy, for instance, for the precis
  87. // package which would override the default to not handle the Greek final sigma.
  88. var (
  89. // NoLower disables the lowercasing of non-leading letters for a title
  90. // caser.
  91. NoLower Option = noLower
  92. // Compact omits mappings in case folding for characters that would grow the
  93. // input. (Unimplemented.)
  94. Compact Option = compact
  95. )
  96. // TODO: option to preserve a normal form, if applicable?
  97. type options struct {
  98. noLower bool
  99. simple bool
  100. // TODO: segmenter, max ignorable, alternative versions, etc.
  101. ignoreFinalSigma bool
  102. }
  103. func getOpts(o ...Option) (res options) {
  104. for _, f := range o {
  105. res = f(res)
  106. }
  107. return
  108. }
  109. func noLower(o options) options {
  110. o.noLower = true
  111. return o
  112. }
  113. func compact(o options) options {
  114. o.simple = true
  115. return o
  116. }
  117. // HandleFinalSigma specifies whether the special handling of Greek final sigma
  118. // should be enabled. Unicode prescribes handling the Greek final sigma for all
  119. // locales, but standards like IDNA and PRECIS override this default.
  120. func HandleFinalSigma(enable bool) Option {
  121. if enable {
  122. return handleFinalSigma
  123. }
  124. return ignoreFinalSigma
  125. }
  126. func ignoreFinalSigma(o options) options {
  127. o.ignoreFinalSigma = true
  128. return o
  129. }
  130. func handleFinalSigma(o options) options {
  131. o.ignoreFinalSigma = false
  132. return o
  133. }