rounding.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Multiprecision decimal numbers.
  5. // For floating-point formatting only; not general purpose.
  6. // Only operations are assign and (binary) left/right shift.
  7. // Can do binary floating point in multiprecision decimal precisely
  8. // because 2 divides 10; cannot do decimal floating point
  9. // in multiprecision binary precisely.
  10. package decimal
  11. type floatInfo struct {
  12. mantbits uint
  13. expbits uint
  14. bias int
  15. }
  16. var float32info = floatInfo{23, 8, -127}
  17. var float64info = floatInfo{52, 11, -1023}
  18. // roundShortest rounds d (= mant * 2^exp) to the shortest number of digits
  19. // that will let the original floating point value be precisely reconstructed.
  20. func roundShortest(d *decimal, mant uint64, exp int, flt *floatInfo) {
  21. // If mantissa is zero, the number is zero; stop now.
  22. if mant == 0 {
  23. d.nd = 0
  24. return
  25. }
  26. // Compute upper and lower such that any decimal number
  27. // between upper and lower (possibly inclusive)
  28. // will round to the original floating point number.
  29. // We may see at once that the number is already shortest.
  30. //
  31. // Suppose d is not denormal, so that 2^exp <= d < 10^dp.
  32. // The closest shorter number is at least 10^(dp-nd) away.
  33. // The lower/upper bounds computed below are at distance
  34. // at most 2^(exp-mantbits).
  35. //
  36. // So the number is already shortest if 10^(dp-nd) > 2^(exp-mantbits),
  37. // or equivalently log2(10)*(dp-nd) > exp-mantbits.
  38. // It is true if 332/100*(dp-nd) >= exp-mantbits (log2(10) > 3.32).
  39. minexp := flt.bias + 1 // minimum possible exponent
  40. if exp > minexp && 332*(d.dp-d.nd) >= 100*(exp-int(flt.mantbits)) {
  41. // The number is already shortest.
  42. return
  43. }
  44. // d = mant << (exp - mantbits)
  45. // Next highest floating point number is mant+1 << exp-mantbits.
  46. // Our upper bound is halfway between, mant*2+1 << exp-mantbits-1.
  47. upper := new(decimal)
  48. upper.Assign(mant*2 + 1)
  49. upper.Shift(exp - int(flt.mantbits) - 1)
  50. // d = mant << (exp - mantbits)
  51. // Next lowest floating point number is mant-1 << exp-mantbits,
  52. // unless mant-1 drops the significant bit and exp is not the minimum exp,
  53. // in which case the next lowest is mant*2-1 << exp-mantbits-1.
  54. // Either way, call it mantlo << explo-mantbits.
  55. // Our lower bound is halfway between, mantlo*2+1 << explo-mantbits-1.
  56. var mantlo uint64
  57. var explo int
  58. if mant > 1<<flt.mantbits || exp == minexp {
  59. mantlo = mant - 1
  60. explo = exp
  61. } else {
  62. mantlo = mant*2 - 1
  63. explo = exp - 1
  64. }
  65. lower := new(decimal)
  66. lower.Assign(mantlo*2 + 1)
  67. lower.Shift(explo - int(flt.mantbits) - 1)
  68. // The upper and lower bounds are possible outputs only if
  69. // the original mantissa is even, so that IEEE round-to-even
  70. // would round to the original mantissa and not the neighbors.
  71. inclusive := mant%2 == 0
  72. // As we walk the digits we want to know whether rounding up would fall
  73. // within the upper bound. This is tracked by upperdelta:
  74. //
  75. // If upperdelta == 0, the digits of d and upper are the same so far.
  76. //
  77. // If upperdelta == 1, we saw a difference of 1 between d and upper on a
  78. // previous digit and subsequently only 9s for d and 0s for upper.
  79. // (Thus rounding up may fall outside the bound, if it is exclusive.)
  80. //
  81. // If upperdelta == 2, then the difference is greater than 1
  82. // and we know that rounding up falls within the bound.
  83. var upperdelta uint8
  84. // Now we can figure out the minimum number of digits required.
  85. // Walk along until d has distinguished itself from upper and lower.
  86. for ui := 0; ; ui++ {
  87. // lower, d, and upper may have the decimal points at different
  88. // places. In this case upper is the longest, so we iterate from
  89. // ui==0 and start li and mi at (possibly) -1.
  90. mi := ui - upper.dp + d.dp
  91. if mi >= d.nd {
  92. break
  93. }
  94. li := ui - upper.dp + lower.dp
  95. l := byte('0') // lower digit
  96. if li >= 0 && li < lower.nd {
  97. l = lower.d[li]
  98. }
  99. m := byte('0') // middle digit
  100. if mi >= 0 {
  101. m = d.d[mi]
  102. }
  103. u := byte('0') // upper digit
  104. if ui < upper.nd {
  105. u = upper.d[ui]
  106. }
  107. // Okay to round down (truncate) if lower has a different digit
  108. // or if lower is inclusive and is exactly the result of rounding
  109. // down (i.e., and we have reached the final digit of lower).
  110. okdown := l != m || inclusive && li+1 == lower.nd
  111. switch {
  112. case upperdelta == 0 && m+1 < u:
  113. // Example:
  114. // m = 12345xxx
  115. // u = 12347xxx
  116. upperdelta = 2
  117. case upperdelta == 0 && m != u:
  118. // Example:
  119. // m = 12345xxx
  120. // u = 12346xxx
  121. upperdelta = 1
  122. case upperdelta == 1 && (m != '9' || u != '0'):
  123. // Example:
  124. // m = 1234598x
  125. // u = 1234600x
  126. upperdelta = 2
  127. }
  128. // Okay to round up if upper has a different digit and either upper
  129. // is inclusive or upper is bigger than the result of rounding up.
  130. okup := upperdelta > 0 && (inclusive || upperdelta > 1 || ui+1 < upper.nd)
  131. // If it's okay to do either, then round to the nearest one.
  132. // If it's okay to do only one, do it.
  133. switch {
  134. case okdown && okup:
  135. d.Round(mi + 1)
  136. return
  137. case okdown:
  138. d.RoundDown(mi + 1)
  139. return
  140. case okup:
  141. d.RoundUp(mi + 1)
  142. return
  143. }
  144. }
  145. }