charset.go 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. package cp
  2. type charsetMap struct {
  3. sb [256]rune // single byte runes, -1 for a double byte character lead byte
  4. db map[int]rune // double byte runes
  5. }
  6. func collation2charset(col Collation) *charsetMap {
  7. // http://msdn.microsoft.com/en-us/library/ms144250.aspx
  8. // http://msdn.microsoft.com/en-us/library/ms144250(v=sql.105).aspx
  9. switch col.SortId {
  10. case 30, 31, 32, 33, 34:
  11. return cp437
  12. case 40, 41, 42, 44, 49, 55, 56, 57, 58, 59, 60, 61:
  13. return cp850
  14. case 50, 51, 52, 53, 54, 71, 72, 73, 74, 75:
  15. return cp1252
  16. case 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96:
  17. return cp1250
  18. case 104, 105, 106, 107, 108:
  19. return cp1251
  20. case 112, 113, 114, 121, 124:
  21. return cp1253
  22. case 128, 129, 130:
  23. return cp1254
  24. case 136, 137, 138:
  25. return cp1255
  26. case 144, 145, 146:
  27. return cp1256
  28. case 152, 153, 154, 155, 156, 157, 158, 159, 160:
  29. return cp1257
  30. case 183, 184, 185, 186:
  31. return cp1252
  32. case 192, 193:
  33. return cp932
  34. case 194, 195:
  35. return cp949
  36. case 196, 197:
  37. return cp950
  38. case 198, 199:
  39. return cp936
  40. case 200:
  41. return cp932
  42. case 201:
  43. return cp949
  44. case 202:
  45. return cp950
  46. case 203:
  47. return cp936
  48. case 204, 205, 206:
  49. return cp874
  50. case 210, 211, 212, 213, 214, 215, 216, 217:
  51. return cp1252
  52. }
  53. // http://technet.microsoft.com/en-us/library/aa176553(v=sql.80).aspx
  54. switch col.getLcid() {
  55. case 0x001e, 0x041e:
  56. return cp874
  57. case 0x0411, 0x10411:
  58. return cp932
  59. case 0x0804, 0x1004, 0x20804:
  60. return cp936
  61. case 0x0012, 0x0412:
  62. return cp949
  63. case 0x0404, 0x1404, 0x0c04, 0x7c04, 0x30404:
  64. return cp950
  65. case 0x041c, 0x041a, 0x0405, 0x040e, 0x104e, 0x0415, 0x0418, 0x041b, 0x0424, 0x1040e:
  66. return cp1250
  67. case 0x0423, 0x0402, 0x042f, 0x0419, 0x081a, 0x0c1a, 0x0422, 0x043f, 0x0444, 0x082c:
  68. return cp1251
  69. case 0x0408:
  70. return cp1253
  71. case 0x041f, 0x042c, 0x0443:
  72. return cp1254
  73. case 0x040d:
  74. return cp1255
  75. case 0x0401, 0x0801, 0xc01, 0x1001, 0x1401, 0x1801, 0x1c01, 0x2001, 0x2401, 0x2801, 0x2c01, 0x3001, 0x3401, 0x3801, 0x3c01, 0x4001, 0x0429, 0x0420:
  76. return cp1256
  77. case 0x0425, 0x0426, 0x0427, 0x0827:
  78. return cp1257
  79. case 0x042a:
  80. return cp1258
  81. case 0x0439, 0x045a, 0x0465:
  82. return nil
  83. }
  84. return cp1252
  85. }
  86. func CharsetToUTF8(col Collation, s []byte) string {
  87. cm := collation2charset(col)
  88. if cm == nil {
  89. return string(s)
  90. }
  91. buf := make([]rune, 0, len(s))
  92. for i := 0; i < len(s); i++ {
  93. ch := cm.sb[s[i]]
  94. if ch == -1 {
  95. if i+1 == len(s) {
  96. ch = 0xfffd
  97. } else {
  98. n := int(s[i+1]) + (int(s[i]) << 8)
  99. i++
  100. var ok bool
  101. ch, ok = cm.db[n]
  102. if !ok {
  103. ch = 0xfffd
  104. }
  105. }
  106. }
  107. buf = append(buf, ch)
  108. }
  109. return string(buf)
  110. }