float.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. // Copyright (c) 2012-2018 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a MIT license found in the LICENSE file.
  3. package codec
  4. import "strconv"
  5. // func parseFloat(b []byte, bitsize int) (f float64, err error) {
  6. // if bitsize == 32 {
  7. // return parseFloat32(b)
  8. // } else {
  9. // return parseFloat64(b)
  10. // }
  11. // }
  12. func parseFloat32(b []byte) (f float32, err error) {
  13. return parseFloat32_custom(b)
  14. // return parseFloat32_strconv(b)
  15. }
  16. func parseFloat64(b []byte) (f float64, err error) {
  17. return parseFloat64_custom(b)
  18. // return parseFloat64_strconv(b)
  19. }
  20. func parseFloat32_strconv(b []byte) (f float32, err error) {
  21. // defer func() { xdebugf("strconv float32: %s, %v, err: %v", b, f, err) }()
  22. f64, err := strconv.ParseFloat(stringView(b), 32)
  23. f = float32(f64)
  24. return
  25. }
  26. func parseFloat64_strconv(b []byte) (f float64, err error) {
  27. // defer func() { xdebugf("strconv float64: %s, %v, err: %v", b, f, err) }()
  28. return strconv.ParseFloat(stringView(b), 64)
  29. }
  30. // ------ parseFloat custom below --------
  31. // We assume that a lot of floating point numbers in json files will be
  32. // those that are handwritten, and with defined precision (in terms of number
  33. // of digits after decimal point), etc.
  34. //
  35. // We further assume that this ones can be written in exact format.
  36. //
  37. // strconv.ParseFloat has some unnecessary overhead which we can do without
  38. // for the common case:
  39. //
  40. // - expensive char-by-char check to see if underscores are in right place
  41. // - testing for and skipping underscores
  42. // - check if the string matches ignorecase +/- inf, +/- infinity, nan
  43. // - support for base 16 (0xFFFF...)
  44. //
  45. // The functions below will try a fast-path for floats which can be decoded
  46. // without any loss of precision, meaning they:
  47. //
  48. // - fits within the significand bits of the 32-bits or 64-bits
  49. // - exponent fits within the exponent value
  50. // - there is no truncation (any extra numbers are all trailing zeros)
  51. //
  52. // To figure out what the values are for maxMantDigits, use this idea below:
  53. //
  54. // 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
  55. // 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
  56. // 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
  57. // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
  58. //
  59. // Note: we only allow for up to what can comfortably fit into the significand
  60. // ignoring the exponent, and we only try to parse iff significand fits.
  61. // Exact powers of 10.
  62. var float64pow10 = [...]float64{
  63. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
  64. 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
  65. 1e20, 1e21, 1e22,
  66. }
  67. var float32pow10 = [...]float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
  68. type floatinfo struct {
  69. mantbits uint8
  70. expbits uint8
  71. bias int16
  72. exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
  73. exactInts int8 // Exact integers are <= 10^N
  74. maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
  75. }
  76. var fi32 = floatinfo{23, 8, -127, 10, 7, 9} // maxMantDigits = 9
  77. var fi64 = floatinfo{52, 11, -1023, 22, 15, 19} // maxMantDigits = 19
  78. const fMax64 = 1e15
  79. const fMax32 = 1e7
  80. const fBase = 10
  81. func parseFloatErr(b []byte) error {
  82. return &strconv.NumError{
  83. Func: "ParseFloat",
  84. Err: strconv.ErrSyntax,
  85. Num: string(b),
  86. }
  87. }
  88. func parseFloat32_custom(b []byte) (f float32, err error) {
  89. mantissa, exp, neg, trunc, bad, ok := readFloat(b, fi32)
  90. _ = trunc
  91. if bad {
  92. return 0, parseFloatErr(b)
  93. }
  94. if ok {
  95. // parseFloatDebug(b, 32, false, exp, trunc, ok)
  96. f = float32(mantissa)
  97. if neg {
  98. f = -f
  99. }
  100. if exp != 0 {
  101. indx := fExpIndx(exp)
  102. if exp < 0 { // int / 10^k
  103. f /= float32pow10[indx]
  104. } else { // exp > 0
  105. if exp > fi32.exactPow10 {
  106. f *= float32pow10[exp-fi32.exactPow10]
  107. if f < -fMax32 || f > fMax32 { // exponent too large - outside range
  108. goto FALLBACK
  109. }
  110. indx = uint8(fi32.exactPow10)
  111. }
  112. f *= float32pow10[indx]
  113. }
  114. }
  115. return
  116. }
  117. FALLBACK:
  118. // parseFloatDebug(b, 32, true, exp, trunc, ok)
  119. return parseFloat32_strconv(b)
  120. }
  121. func parseFloat64_custom(b []byte) (f float64, err error) {
  122. mantissa, exp, neg, trunc, bad, ok := readFloat(b, fi64)
  123. _ = trunc
  124. if bad {
  125. return 0, parseFloatErr(b)
  126. }
  127. if ok {
  128. f = float64(mantissa)
  129. if neg {
  130. f = -f
  131. }
  132. if exp != 0 {
  133. indx := fExpIndx(exp)
  134. if exp < 0 { // int / 10^k
  135. f /= float64pow10[indx]
  136. } else { // exp > 0
  137. if exp > fi64.exactPow10 {
  138. f *= float64pow10[exp-fi64.exactPow10]
  139. if f < -fMax64 || f > fMax64 { // exponent too large - outside range
  140. goto FALLBACK
  141. }
  142. indx = uint8(fi64.exactPow10)
  143. }
  144. f *= float64pow10[indx]
  145. }
  146. }
  147. return
  148. }
  149. FALLBACK:
  150. return parseFloat64_strconv(b)
  151. }
  152. func fExpIndx(v int8) uint8 {
  153. if v < 0 {
  154. return uint8(-v)
  155. }
  156. return uint8(v)
  157. }
  158. func readFloat(s []byte, y floatinfo) (mantissa uint64, exp int8, neg, trunc, bad, ok bool) {
  159. var i uint // make it uint, so that we eliminate bounds checking
  160. var slen = uint(len(s))
  161. if slen == 0 {
  162. bad = true
  163. return
  164. }
  165. switch s[0] {
  166. case '+':
  167. i++
  168. case '-':
  169. neg = true
  170. i++
  171. }
  172. // we considered punting early if string has length > maxMantDigits, but this doesn't account
  173. // for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
  174. // var sawdot, sawdigits, sawexp bool
  175. var sawdot, sawexp bool
  176. var nd, ndMant, dp int8
  177. L:
  178. for ; i < slen; i++ {
  179. switch s[i] {
  180. case '.':
  181. if sawdot {
  182. bad = true
  183. return
  184. }
  185. sawdot = true
  186. dp = nd
  187. case '0':
  188. if nd == 0 { // ignore leading zeros
  189. dp--
  190. continue
  191. }
  192. nd++
  193. if ndMant < y.maxMantDigits {
  194. // mantissa = (mantissa << 1) + (mantissa << 3)
  195. mantissa *= fBase
  196. ndMant++
  197. }
  198. case '1', '2', '3', '4', '5', '6', '7', '8', '9':
  199. // sawdigits = true
  200. nd++
  201. if ndMant < y.maxMantDigits {
  202. // mantissa = (mantissa << 1) + (mantissa << 3) + uint64(s[i]-'0')
  203. mantissa = mantissa*fBase + uint64(s[i]-'0')
  204. // mantissa *= fBase
  205. // mantissa += uint64(s[i] - '0')
  206. ndMant++
  207. } else {
  208. trunc = true
  209. return // break L
  210. }
  211. case 'e', 'E':
  212. sawexp = true
  213. break L
  214. default:
  215. bad = true
  216. return
  217. }
  218. }
  219. // if !sawdigits {
  220. // bad = true
  221. // return
  222. // }
  223. if !sawdot {
  224. dp = nd
  225. }
  226. if sawexp {
  227. i++
  228. if i < slen {
  229. var eneg bool
  230. if s[i] == '+' {
  231. i++
  232. } else if s[i] == '-' {
  233. i++
  234. eneg = true
  235. }
  236. if i < slen {
  237. // for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
  238. // exit quick if exponent is more than 2 digits.
  239. if i+2 < slen {
  240. return
  241. }
  242. var e int8
  243. if s[i] < '0' || s[i] > '9' {
  244. bad = true
  245. return
  246. }
  247. e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
  248. i++
  249. if i < slen {
  250. if s[i] < '0' || s[i] > '9' {
  251. bad = true
  252. return
  253. }
  254. e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
  255. i++
  256. }
  257. if eneg {
  258. dp -= e
  259. } else {
  260. dp += e
  261. }
  262. }
  263. }
  264. }
  265. if mantissa != 0 {
  266. if mantissa>>y.mantbits != 0 {
  267. return
  268. }
  269. exp = dp - ndMant
  270. if exp < -y.exactPow10 || exp > y.exactInts+y.exactPow10 { // cannot handle it
  271. return
  272. }
  273. }
  274. ok = true // && !trunc // if trunc=true, we return early (so here trunc=false)
  275. return
  276. }
  277. // fMul10ShiftU64
  278. // func parseFloatDebug(b []byte, bitsize int, strconv bool, exp int8, trunc, ok bool) {
  279. // if strconv {
  280. // xdebugf("parseFloat%d: delegating: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
  281. // } else {
  282. // xdebug2f("parseFloat%d: attempting: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
  283. // }
  284. // }