float.go 11 KB


  1. // Copyright (c) 2012-2018 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a MIT license found in the LICENSE file.
  3. package codec
  4. import (
  5. "strconv"
  6. )
  7. // func parseFloat(b []byte, bitsize int) (f float64, err error) {
  8. // if bitsize == 32 {
  9. // return parseFloat32(b)
  10. // } else {
  11. // return parseFloat64(b)
  12. // }
  13. // }
  14. func parseFloat32(b []byte) (f float32, err error) {
  15. return parseFloat32_custom(b)
  16. // return parseFloat32_strconv(b)
  17. }
  18. func parseFloat64(b []byte) (f float64, err error) {
  19. return parseFloat64_custom(b)
  20. // return parseFloat64_strconv(b)
  21. }
  22. func parseFloat32_strconv(b []byte) (f float32, err error) {
  23. // defer func() { xdebugf("strconv float32: %s, %v, err: %v", b, f, err) }()
  24. f64, err := strconv.ParseFloat(stringView(b), 32)
  25. f = float32(f64)
  26. return
  27. }
  28. func parseFloat64_strconv(b []byte) (f float64, err error) {
  29. // defer func() { xdebugf("strconv float64: %s, %v, err: %v", b, f, err) }()
  30. return strconv.ParseFloat(stringView(b), 64)
  31. }
  32. // ------ parseFloat custom below --------
  33. // We assume that a lot of floating point numbers in json files will be
  34. // those that are handwritten, and with defined precision (in terms of number
  35. // of digits after decimal point), etc.
  36. //
  37. // We further assume that this ones can be written in exact format.
  38. //
  39. // strconv.ParseFloat has some unnecessary overhead which we can do without
  40. // for the common case:
  41. //
  42. // - expensive char-by-char check to see if underscores are in right place
  43. // - testing for and skipping underscores
  44. // - check if the string matches ignorecase +/- inf, +/- infinity, nan
  45. // - support for base 16 (0xFFFF...)
  46. //
  47. // The functions below will try a fast-path for floats which can be decoded
  48. // without any loss of precision, meaning they:
  49. //
  50. // - fits within the significand bits of the 32-bits or 64-bits
  51. // - exponent fits within the exponent value
  52. // - there is no truncation (any extra numbers are all trailing zeros)
  53. //
  54. // To figure out what the values are for maxMantDigits, use this idea below:
  55. //
  56. // 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
  57. // 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
  58. // 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
  59. // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
  60. //
  61. // Note: we only allow for up to what can comfortably fit into the significand
  62. // ignoring the exponent, and we only try to parse iff significand fits.
  63. // Exact powers of 10.
  64. const (
  65. thousand = 1000
  66. million = thousand * thousand
  67. billion = thousand * million
  68. trillion = thousand * billion
  69. quadrillion = thousand * trillion
  70. quintillion = thousand * quadrillion
  71. )
  72. var uint64pow10 = [...]uint64{
  73. 1, 10, 100,
  74. 1 * thousand, 10 * thousand, 100 * thousand,
  75. 1 * million, 10 * million, 100 * million,
  76. 1 * billion, 10 * billion, 100 * billion,
  77. 1 * trillion, 10 * trillion, 100 * trillion,
  78. 1 * quadrillion, 10 * quadrillion, 100 * quadrillion,
  79. 1 * quintillion, 10 * quintillion,
  80. }
  81. var float64pow10 = [...]float64{
  82. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
  83. 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
  84. 1e20, 1e21, 1e22,
  85. }
  86. var float32pow10 = [...]float32{
  87. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10,
  88. }
  89. type floatinfo struct {
  90. mantbits uint8
  91. expbits uint8 // (unused)
  92. bias int16 // (unused)
  93. exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
  94. exactInts int8 // Exact integers are <= 10^N
  95. maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
  96. }
  97. var fi32 = floatinfo{23, 8, -127, 10, 7, 9}
  98. var fi64 = floatinfo{52, 11, -1023, 22, 15, 19}
  99. var fi64u = floatinfo{64, 0, -1023, 20, 15, 19}
  100. var fi64i = floatinfo{63, 0, -1023, 20, 15, 19}
  101. const fMax64 = 1e15
  102. const fMax32 = 1e7
  103. const fBase = 10
  104. func strconvParseErr(b []byte, fn string) error {
  105. return &strconv.NumError{
  106. Func: fn,
  107. Err: strconv.ErrSyntax,
  108. Num: string(b),
  109. }
  110. }
  111. func parseFloat32_reader(r readFloatResult) (f float32, fail bool) {
  112. // parseFloatDebug(b, 32, false, exp, trunc, ok)
  113. f = float32(r.mantissa)
  114. if r.exp != 0 {
  115. indx := fExpIndx(r.exp)
  116. if r.exp < 0 { // int / 10^k
  117. f /= float32pow10[indx]
  118. } else { // exp > 0
  119. if r.exp > fi32.exactPow10 {
  120. f *= float32pow10[r.exp-fi32.exactPow10]
  121. if f > fMax32 { // exponent too large - outside range
  122. fail = true
  123. return // ok = false
  124. }
  125. indx = uint8(fi32.exactPow10)
  126. }
  127. f *= float32pow10[indx]
  128. }
  129. }
  130. if r.neg {
  131. f = -f
  132. }
  133. return
  134. }
  135. func parseFloat32_custom(b []byte) (f float32, err error) {
  136. r := readFloat(b, fi32)
  137. if r.bad {
  138. return 0, strconvParseErr(b, "ParseFloat")
  139. }
  140. if r.ok {
  141. if f, r.bad = parseFloat32_reader(r); r.bad {
  142. goto FALLBACK
  143. }
  144. return
  145. }
  146. FALLBACK:
  147. return parseFloat32_strconv(b)
  148. }
  149. func parseFloat64_reader(r readFloatResult) (f float64, fail bool) {
  150. f = float64(r.mantissa)
  151. if r.exp != 0 {
  152. indx := fExpIndx(r.exp)
  153. if r.exp < 0 { // int / 10^k
  154. f /= float64pow10[indx]
  155. } else { // exp > 0
  156. if r.exp > fi64.exactPow10 {
  157. f *= float64pow10[r.exp-fi64.exactPow10]
  158. if f > fMax64 { // exponent too large - outside range
  159. fail = true
  160. return
  161. }
  162. indx = uint8(fi64.exactPow10)
  163. }
  164. f *= float64pow10[indx]
  165. }
  166. }
  167. if r.neg {
  168. f = -f
  169. }
  170. return
  171. }
  172. func parseFloat64_custom(b []byte) (f float64, err error) {
  173. r := readFloat(b, fi64)
  174. if r.bad {
  175. return 0, strconvParseErr(b, "ParseFloat")
  176. }
  177. if r.ok {
  178. if f, r.bad = parseFloat64_reader(r); r.bad {
  179. goto FALLBACK
  180. }
  181. return
  182. }
  183. FALLBACK:
  184. return parseFloat64_strconv(b)
  185. }
  186. const maxUint64 = (1<<64 - 1)
  187. const uint64Cutoff = maxUint64/10 + 1
  188. func parseUint64_simple(b []byte) (n uint64, ok bool) {
  189. for _, c := range b {
  190. if c < '0' || c > '9' {
  191. return
  192. }
  193. // unsigned integers don't overflow well on multiplication, so check cutoff here
  194. // e.g. (maxUint64-5)*10 doesn't overflow well ...
  195. if n >= uint64Cutoff {
  196. return
  197. }
  198. n *= 10
  199. n1 := n + uint64(c-'0')
  200. if n1 < n || n1 > maxUint64 {
  201. return
  202. }
  203. n = n1
  204. }
  205. ok = true
  206. return
  207. }
  208. func parseUint64_reader(r readFloatResult) (f uint64, fail bool) {
  209. f = r.mantissa
  210. if r.exp != 0 {
  211. indx := fExpIndx(r.exp)
  212. if r.exp < 0 { // int / 10^k
  213. f /= uint64pow10[indx]
  214. } else { // exp > 0
  215. if r.exp > fi64u.exactPow10 {
  216. f *= uint64pow10[r.exp-fi64u.exactPow10]
  217. // if f > math.MaxUint64 { // exponent too large - outside range
  218. // fail = true
  219. // return
  220. // }
  221. indx = uint8(fi64u.exactPow10)
  222. }
  223. f *= uint64pow10[indx]
  224. }
  225. }
  226. return
  227. }
  228. func parseUint64(b []byte) (f uint64, err error) {
  229. var ok bool
  230. var r readFloatResult
  231. if b[0] == '-' {
  232. goto ERROR
  233. }
  234. f, ok = parseUint64_simple(b)
  235. if ok {
  236. return
  237. }
  238. r = readFloat(b, fi64u)
  239. // if r.neg {
  240. // goto ERROR
  241. // }
  242. if r.okInt {
  243. if f, r.bad = parseUint64_reader(r); r.bad {
  244. goto ERROR
  245. }
  246. return
  247. }
  248. ERROR:
  249. err = strconvParseErr(b, "ParseUint")
  250. return
  251. }
  252. func parseInt64_reader(r readFloatResult) (v int64, fail bool) {
  253. f := r.mantissa
  254. if r.exp != 0 {
  255. indx := fExpIndx(r.exp)
  256. if r.exp < 0 { // int / 10^k
  257. f /= uint64pow10[indx]
  258. } else { // exp > 0
  259. if r.exp > fi64i.exactPow10 {
  260. f *= uint64pow10[r.exp-fi64i.exactPow10]
  261. // if f > math.MaxInt64 { // exponent too large - outside range
  262. // fail = true
  263. // return
  264. // }
  265. indx = uint8(fi64i.exactPow10)
  266. }
  267. f *= uint64pow10[indx]
  268. }
  269. }
  270. if chkOvf.Uint2Int(f, r.neg) {
  271. fail = true
  272. return
  273. }
  274. if r.neg {
  275. v = -int64(f)
  276. } else {
  277. v = int64(f)
  278. }
  279. return
  280. }
  281. func parseInt64(b []byte) (v int64, err error) {
  282. var ok, neg bool
  283. var f uint64
  284. var r readFloatResult
  285. if b[0] == '-' {
  286. neg = true
  287. b = b[1:]
  288. }
  289. f, ok = parseUint64_simple(b)
  290. if ok {
  291. if chkOvf.Uint2Int(f, neg) {
  292. goto ERROR
  293. }
  294. if neg {
  295. v = -int64(f)
  296. } else {
  297. v = int64(f)
  298. }
  299. return
  300. }
  301. r = readFloat(b, fi64i)
  302. if r.okInt {
  303. r.neg = neg
  304. if v, r.bad = parseInt64_reader(r); r.bad {
  305. goto ERROR
  306. }
  307. return
  308. }
  309. ERROR:
  310. err = strconvParseErr(b, "ParseInt")
  311. return
  312. }
  313. // parseNumber will return an integer if only composed of [-]?[0-9]+
  314. // Else it will return a float.
  315. func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) {
  316. var ok, neg bool
  317. var f uint64
  318. if b[0] == '-' {
  319. neg = true
  320. f, ok = parseUint64_simple(b[1:])
  321. } else {
  322. f, ok = parseUint64_simple(b)
  323. }
  324. if ok {
  325. if neg {
  326. z.v = valueTypeInt
  327. if chkOvf.Uint2Int(f, neg) {
  328. err = strconvParseErr(b, "ParseInt")
  329. return
  330. }
  331. z.i = -int64(f)
  332. } else if preferSignedInt {
  333. z.v = valueTypeInt
  334. if chkOvf.Uint2Int(f, neg) {
  335. err = strconvParseErr(b, "ParseInt")
  336. return
  337. }
  338. z.i = int64(f)
  339. } else {
  340. z.v = valueTypeUint
  341. z.u = f
  342. }
  343. return
  344. }
  345. z.v = valueTypeFloat
  346. z.f, err = parseFloat64_custom(b)
  347. return
  348. }
  349. func fExpIndx(v int8) uint8 {
  350. if v < 0 {
  351. return uint8(-v)
  352. }
  353. return uint8(v)
  354. }
  355. type readFloatResult struct {
  356. mantissa uint64
  357. exp int8
  358. neg, sawdot, sawexp, trunc, bad, okInt, ok bool
  359. }
  360. func readFloat(s []byte, y floatinfo) (r readFloatResult) {
  361. var i uint // make it uint, so that we eliminate bounds checking
  362. var slen = uint(len(s))
  363. if slen == 0 {
  364. r.bad = true
  365. return
  366. }
  367. if s[0] == '-' {
  368. r.neg = true
  369. i++
  370. }
  371. // we considered punting early if string has length > maxMantDigits, but this doesn't account
  372. // for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
  373. var nd, ndMant, dp int8
  374. var c uint8
  375. for ; i < slen; i++ {
  376. c = s[i]
  377. if c == '.' {
  378. if r.sawdot {
  379. r.bad = true
  380. return
  381. }
  382. r.sawdot = true
  383. dp = nd
  384. } else if c == 'e' || c == 'E' {
  385. r.sawexp = true
  386. break
  387. } else if c >= '0' && c <= '9' { // !(c < '0' || c > '9') { //
  388. if c == '0' && nd == 0 {
  389. dp--
  390. continue
  391. }
  392. nd++
  393. if ndMant >= y.maxMantDigits {
  394. r.trunc = true
  395. return
  396. }
  397. // mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0')
  398. r.mantissa = r.mantissa*fBase + uint64(c-'0')
  399. ndMant++
  400. } else {
  401. r.bad = true
  402. return
  403. }
  404. }
  405. if !r.sawdot {
  406. dp = nd
  407. }
  408. if r.sawexp {
  409. i++
  410. if i < slen {
  411. var eneg bool
  412. if s[i] == '+' {
  413. i++
  414. } else if s[i] == '-' {
  415. i++
  416. eneg = true
  417. }
  418. if i < slen {
  419. // for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
  420. // exit quick if exponent is more than 2 digits.
  421. if i+2 < slen {
  422. return
  423. }
  424. var e int8
  425. if s[i] < '0' || s[i] > '9' {
  426. r.bad = true
  427. return
  428. }
  429. e = int8(s[i] - '0')
  430. i++
  431. if i < slen {
  432. if s[i] < '0' || s[i] > '9' {
  433. r.bad = true
  434. return
  435. }
  436. e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
  437. i++
  438. }
  439. if eneg {
  440. dp -= e
  441. } else {
  442. dp += e
  443. }
  444. }
  445. }
  446. }
  447. r.okInt = true
  448. if r.mantissa != 0 {
  449. r.exp = dp - ndMant
  450. if r.exp < -y.exactPow10 ||
  451. r.exp > y.exactInts+y.exactPow10 ||
  452. r.mantissa>>y.mantbits != 0 { // cannot handle it
  453. return
  454. }
  455. }
  456. r.ok = true
  457. return
  458. }