big5.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package traditionalchinese
  5. import (
  6. "unicode/utf8"
  7. "golang.org/x/text/encoding"
  8. "golang.org/x/text/encoding/internal"
  9. "golang.org/x/text/encoding/internal/identifier"
  10. "golang.org/x/text/transform"
  11. )
  12. // All is a list of all defined encodings in this package.
  13. var All = []encoding.Encoding{Big5}
  14. // Big5 is the Big5 encoding, also known as Code Page 950.
  15. var Big5 encoding.Encoding = &big5
  16. var big5 = internal.Encoding{
  17. &internal.SimpleEncoding{big5Decoder{}, big5Encoder{}},
  18. "Big5",
  19. identifier.Big5,
  20. }
  21. type big5Decoder struct{ transform.NopResetter }
  22. func (big5Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  23. r, size, s := rune(0), 0, ""
  24. loop:
  25. for ; nSrc < len(src); nSrc += size {
  26. switch c0 := src[nSrc]; {
  27. case c0 < utf8.RuneSelf:
  28. r, size = rune(c0), 1
  29. case 0x81 <= c0 && c0 < 0xff:
  30. if nSrc+1 >= len(src) {
  31. if !atEOF {
  32. err = transform.ErrShortSrc
  33. break loop
  34. }
  35. r, size = utf8.RuneError, 1
  36. goto write
  37. }
  38. c1 := src[nSrc+1]
  39. switch {
  40. case 0x40 <= c1 && c1 < 0x7f:
  41. c1 -= 0x40
  42. case 0xa1 <= c1 && c1 < 0xff:
  43. c1 -= 0x62
  44. case c1 < 0x40:
  45. r, size = utf8.RuneError, 1
  46. goto write
  47. default:
  48. r, size = utf8.RuneError, 2
  49. goto write
  50. }
  51. r, size = '\ufffd', 2
  52. if i := int(c0-0x81)*157 + int(c1); i < len(decode) {
  53. if 1133 <= i && i < 1167 {
  54. // The two-rune special cases for LATIN CAPITAL / SMALL E WITH CIRCUMFLEX
  55. // AND MACRON / CARON are from http://encoding.spec.whatwg.org/#big5
  56. switch i {
  57. case 1133:
  58. s = "\u00CA\u0304"
  59. goto writeStr
  60. case 1135:
  61. s = "\u00CA\u030C"
  62. goto writeStr
  63. case 1164:
  64. s = "\u00EA\u0304"
  65. goto writeStr
  66. case 1166:
  67. s = "\u00EA\u030C"
  68. goto writeStr
  69. }
  70. }
  71. r = rune(decode[i])
  72. if r == 0 {
  73. r = '\ufffd'
  74. }
  75. }
  76. default:
  77. r, size = utf8.RuneError, 1
  78. }
  79. write:
  80. if nDst+utf8.RuneLen(r) > len(dst) {
  81. err = transform.ErrShortDst
  82. break loop
  83. }
  84. nDst += utf8.EncodeRune(dst[nDst:], r)
  85. continue loop
  86. writeStr:
  87. if nDst+len(s) > len(dst) {
  88. err = transform.ErrShortDst
  89. break loop
  90. }
  91. nDst += copy(dst[nDst:], s)
  92. continue loop
  93. }
  94. return nDst, nSrc, err
  95. }
  96. type big5Encoder struct{ transform.NopResetter }
  97. func (big5Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  98. r, size := rune(0), 0
  99. for ; nSrc < len(src); nSrc += size {
  100. r = rune(src[nSrc])
  101. // Decode a 1-byte rune.
  102. if r < utf8.RuneSelf {
  103. size = 1
  104. if nDst >= len(dst) {
  105. err = transform.ErrShortDst
  106. break
  107. }
  108. dst[nDst] = uint8(r)
  109. nDst++
  110. continue
  111. } else {
  112. // Decode a multi-byte rune.
  113. r, size = utf8.DecodeRune(src[nSrc:])
  114. if size == 1 {
  115. // All valid runes of size 1 (those below utf8.RuneSelf) were
  116. // handled above. We have invalid UTF-8 or we haven't seen the
  117. // full character yet.
  118. if !atEOF && !utf8.FullRune(src[nSrc:]) {
  119. err = transform.ErrShortSrc
  120. break
  121. }
  122. }
  123. }
  124. if r >= utf8.RuneSelf {
  125. // func init checks that the switch covers all tables.
  126. switch {
  127. case encode0Low <= r && r < encode0High:
  128. if r = rune(encode0[r-encode0Low]); r != 0 {
  129. goto write2
  130. }
  131. case encode1Low <= r && r < encode1High:
  132. if r = rune(encode1[r-encode1Low]); r != 0 {
  133. goto write2
  134. }
  135. case encode2Low <= r && r < encode2High:
  136. if r = rune(encode2[r-encode2Low]); r != 0 {
  137. goto write2
  138. }
  139. case encode3Low <= r && r < encode3High:
  140. if r = rune(encode3[r-encode3Low]); r != 0 {
  141. goto write2
  142. }
  143. case encode4Low <= r && r < encode4High:
  144. if r = rune(encode4[r-encode4Low]); r != 0 {
  145. goto write2
  146. }
  147. case encode5Low <= r && r < encode5High:
  148. if r = rune(encode5[r-encode5Low]); r != 0 {
  149. goto write2
  150. }
  151. case encode6Low <= r && r < encode6High:
  152. if r = rune(encode6[r-encode6Low]); r != 0 {
  153. goto write2
  154. }
  155. case encode7Low <= r && r < encode7High:
  156. if r = rune(encode7[r-encode7Low]); r != 0 {
  157. goto write2
  158. }
  159. }
  160. err = internal.ErrASCIIReplacement
  161. break
  162. }
  163. write2:
  164. if nDst+2 > len(dst) {
  165. err = transform.ErrShortDst
  166. break
  167. }
  168. dst[nDst+0] = uint8(r >> 8)
  169. dst[nDst+1] = uint8(r)
  170. nDst += 2
  171. continue
  172. }
  173. return nDst, nSrc, err
  174. }
  175. func init() {
  176. // Check that the hard-coded encode switch covers all tables.
  177. if numEncodeTables != 8 {
  178. panic("bad numEncodeTables")
  179. }
  180. }