punycode.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package idna
  5. // This file implements the Punycode algorithm from RFC 3492.
  6. import (
  7. "math"
  8. "strings"
  9. "unicode/utf8"
  10. )
  11. // These parameter values are specified in section 5.
  12. //
  13. // All computation is done with int32s, so that overflow behavior is identical
  14. // regardless of whether int is 32-bit or 64-bit.
  15. const (
  16. base int32 = 36
  17. damp int32 = 700
  18. initialBias int32 = 72
  19. initialN int32 = 128
  20. skew int32 = 38
  21. tmax int32 = 26
  22. tmin int32 = 1
  23. )
  24. func punyError(s string) error { return &labelError{s, "A3"} }
  25. // decode decodes a string as specified in section 6.2.
  26. func decode(encoded string) (string, error) {
  27. if encoded == "" {
  28. return "", nil
  29. }
  30. pos := 1 + strings.LastIndex(encoded, "-")
  31. if pos == 1 {
  32. return "", punyError(encoded)
  33. }
  34. if pos == len(encoded) {
  35. return encoded[:len(encoded)-1], nil
  36. }
  37. output := make([]rune, 0, len(encoded))
  38. if pos != 0 {
  39. for _, r := range encoded[:pos-1] {
  40. output = append(output, r)
  41. }
  42. }
  43. i, n, bias := int32(0), initialN, initialBias
  44. for pos < len(encoded) {
  45. oldI, w := i, int32(1)
  46. for k := base; ; k += base {
  47. if pos == len(encoded) {
  48. return "", punyError(encoded)
  49. }
  50. digit, ok := decodeDigit(encoded[pos])
  51. if !ok {
  52. return "", punyError(encoded)
  53. }
  54. pos++
  55. i += digit * w
  56. if i < 0 {
  57. return "", punyError(encoded)
  58. }
  59. t := k - bias
  60. if t < tmin {
  61. t = tmin
  62. } else if t > tmax {
  63. t = tmax
  64. }
  65. if digit < t {
  66. break
  67. }
  68. w *= base - t
  69. if w >= math.MaxInt32/base {
  70. return "", punyError(encoded)
  71. }
  72. }
  73. x := int32(len(output) + 1)
  74. bias = adapt(i-oldI, x, oldI == 0)
  75. n += i / x
  76. i %= x
  77. if n > utf8.MaxRune || len(output) >= 1024 {
  78. return "", punyError(encoded)
  79. }
  80. output = append(output, 0)
  81. copy(output[i+1:], output[i:])
  82. output[i] = n
  83. i++
  84. }
  85. return string(output), nil
  86. }
  87. // encode encodes a string as specified in section 6.3 and prepends prefix to
  88. // the result.
  89. //
  90. // The "while h < length(input)" line in the specification becomes "for
  91. // remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
  92. func encode(prefix, s string) (string, error) {
  93. output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
  94. copy(output, prefix)
  95. delta, n, bias := int32(0), initialN, initialBias
  96. b, remaining := int32(0), int32(0)
  97. for _, r := range s {
  98. if r < 0x80 {
  99. b++
  100. output = append(output, byte(r))
  101. } else {
  102. remaining++
  103. }
  104. }
  105. h := b
  106. if b > 0 {
  107. output = append(output, '-')
  108. }
  109. for remaining != 0 {
  110. m := int32(0x7fffffff)
  111. for _, r := range s {
  112. if m > r && r >= n {
  113. m = r
  114. }
  115. }
  116. delta += (m - n) * (h + 1)
  117. if delta < 0 {
  118. return "", punyError(s)
  119. }
  120. n = m
  121. for _, r := range s {
  122. if r < n {
  123. delta++
  124. if delta < 0 {
  125. return "", punyError(s)
  126. }
  127. continue
  128. }
  129. if r > n {
  130. continue
  131. }
  132. q := delta
  133. for k := base; ; k += base {
  134. t := k - bias
  135. if t < tmin {
  136. t = tmin
  137. } else if t > tmax {
  138. t = tmax
  139. }
  140. if q < t {
  141. break
  142. }
  143. output = append(output, encodeDigit(t+(q-t)%(base-t)))
  144. q = (q - t) / (base - t)
  145. }
  146. output = append(output, encodeDigit(q))
  147. bias = adapt(delta, h+1, h == b)
  148. delta = 0
  149. h++
  150. remaining--
  151. }
  152. delta++
  153. n++
  154. }
  155. return string(output), nil
  156. }
  157. func decodeDigit(x byte) (digit int32, ok bool) {
  158. switch {
  159. case '0' <= x && x <= '9':
  160. return int32(x - ('0' - 26)), true
  161. case 'A' <= x && x <= 'Z':
  162. return int32(x - 'A'), true
  163. case 'a' <= x && x <= 'z':
  164. return int32(x - 'a'), true
  165. }
  166. return 0, false
  167. }
  168. func encodeDigit(digit int32) byte {
  169. switch {
  170. case 0 <= digit && digit < 26:
  171. return byte(digit + 'a')
  172. case 26 <= digit && digit < 36:
  173. return byte(digit + ('0' - 26))
  174. }
  175. panic("idna: internal error in punycode encoding")
  176. }
  177. // adapt is the bias adaptation function specified in section 6.1.
  178. func adapt(delta, numPoints int32, firstTime bool) int32 {
  179. if firstTime {
  180. delta /= damp
  181. } else {
  182. delta /= 2
  183. }
  184. delta += delta / numPoints
  185. k := int32(0)
  186. for delta > ((base-tmin)*tmax)/2 {
  187. delta /= base - tmin
  188. k += base
  189. }
  190. return k + (base-tmin+1)*delta/(delta+skew)
  191. }