huffman_sortByFreq.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package flate
  5. // Sort sorts data.
  6. // It makes one call to data.Len to determine n, and O(n*log(n)) calls to
  7. // data.Less and data.Swap. The sort is not guaranteed to be stable.
  8. func sortByFreq(data []literalNode) {
  9. n := len(data)
  10. quickSortByFreq(data, 0, n, maxDepth(n))
  11. }
  12. func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
  13. for b-a > 12 { // Use ShellSort for slices <= 12 elements
  14. if maxDepth == 0 {
  15. heapSort(data, a, b)
  16. return
  17. }
  18. maxDepth--
  19. mlo, mhi := doPivotByFreq(data, a, b)
  20. // Avoiding recursion on the larger subproblem guarantees
  21. // a stack depth of at most lg(b-a).
  22. if mlo-a < b-mhi {
  23. quickSortByFreq(data, a, mlo, maxDepth)
  24. a = mhi // i.e., quickSortByFreq(data, mhi, b)
  25. } else {
  26. quickSortByFreq(data, mhi, b, maxDepth)
  27. b = mlo // i.e., quickSortByFreq(data, a, mlo)
  28. }
  29. }
  30. if b-a > 1 {
  31. // Do ShellSort pass with gap 6
  32. // It could be written in this simplified form cause b-a <= 12
  33. for i := a + 6; i < b; i++ {
  34. if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
  35. data[i], data[i-6] = data[i-6], data[i]
  36. }
  37. }
  38. insertionSortByFreq(data, a, b)
  39. }
  40. }
  41. // siftDownByFreq implements the heap property on data[lo, hi).
  42. // first is an offset into the array where the root of the heap lies.
  43. func siftDownByFreq(data []literalNode, lo, hi, first int) {
  44. root := lo
  45. for {
  46. child := 2*root + 1
  47. if child >= hi {
  48. break
  49. }
  50. if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) {
  51. child++
  52. }
  53. if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq {
  54. return
  55. }
  56. data[first+root], data[first+child] = data[first+child], data[first+root]
  57. root = child
  58. }
  59. }
  60. func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
  61. m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
  62. if hi-lo > 40 {
  63. // Tukey's ``Ninther,'' median of three medians of three.
  64. s := (hi - lo) / 8
  65. medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
  66. medianOfThreeSortByFreq(data, m, m-s, m+s)
  67. medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
  68. }
  69. medianOfThreeSortByFreq(data, lo, m, hi-1)
  70. // Invariants are:
  71. // data[lo] = pivot (set up by ChoosePivot)
  72. // data[lo < i < a] < pivot
  73. // data[a <= i < b] <= pivot
  74. // data[b <= i < c] unexamined
  75. // data[c <= i < hi-1] > pivot
  76. // data[hi-1] >= pivot
  77. pivot := lo
  78. a, c := lo+1, hi-1
  79. for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
  80. }
  81. b := a
  82. for {
  83. for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
  84. }
  85. for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
  86. }
  87. if b >= c {
  88. break
  89. }
  90. // data[b] > pivot; data[c-1] <= pivot
  91. data[b], data[c-1] = data[c-1], data[b]
  92. b++
  93. c--
  94. }
  95. // If hi-c<3 then there are duplicates (by property of median of nine).
  96. // Let's be a bit more conservative, and set border to 5.
  97. protect := hi-c < 5
  98. if !protect && hi-c < (hi-lo)/4 {
  99. // Lets test some points for equality to pivot
  100. dups := 0
  101. if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
  102. data[c], data[hi-1] = data[hi-1], data[c]
  103. c++
  104. dups++
  105. }
  106. if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
  107. b--
  108. dups++
  109. }
  110. // m-lo = (hi-lo)/2 > 6
  111. // b-lo > (hi-lo)*3/4-1 > 8
  112. // ==> m < b ==> data[m] <= pivot
  113. if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
  114. data[m], data[b-1] = data[b-1], data[m]
  115. b--
  116. dups++
  117. }
  118. // if at least 2 points are equal to pivot, assume skewed distribution
  119. protect = dups > 1
  120. }
  121. if protect {
  122. // Protect against a lot of duplicates
  123. // Add invariant:
  124. // data[a <= i < b] unexamined
  125. // data[b <= i < c] = pivot
  126. for {
  127. for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
  128. }
  129. for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
  130. }
  131. if a >= b {
  132. break
  133. }
  134. // data[a] == pivot; data[b-1] < pivot
  135. data[a], data[b-1] = data[b-1], data[a]
  136. a++
  137. b--
  138. }
  139. }
  140. // Swap pivot into middle
  141. data[pivot], data[b-1] = data[b-1], data[pivot]
  142. return b - 1, c
  143. }
  144. // Insertion sort
  145. func insertionSortByFreq(data []literalNode, a, b int) {
  146. for i := a + 1; i < b; i++ {
  147. for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
  148. data[j], data[j-1] = data[j-1], data[j]
  149. }
  150. }
  151. }
  152. // quickSortByFreq, loosely following Bentley and McIlroy,
  153. // ``Engineering a Sort Function,'' SP&E November 1993.
  154. // medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
  155. func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
  156. // sort 3 elements
  157. if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
  158. data[m1], data[m0] = data[m0], data[m1]
  159. }
  160. // data[m0] <= data[m1]
  161. if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
  162. data[m2], data[m1] = data[m1], data[m2]
  163. // data[m0] <= data[m2] && data[m1] < data[m2]
  164. if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
  165. data[m1], data[m0] = data[m0], data[m1]
  166. }
  167. }
  168. // now data[m0] <= data[m1] <= data[m2]
  169. }