huffman.go 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. // Copyright 2014 The Go Authors.
  2. // See https://code.google.com/p/go/source/browse/CONTRIBUTORS
  3. // Licensed under the same terms as Go itself:
  4. // https://code.google.com/p/go/source/browse/LICENSE
  5. package hpack
  6. import (
  7. "bytes"
  8. "io"
  9. "sync"
  10. )
  11. var bufPool = sync.Pool{
  12. New: func() interface{} { return new(bytes.Buffer) },
  13. }
  14. // HuffmanDecode decodes the string in v and writes the expanded
  15. // result to w, returning the number of bytes written to w and the
  16. // Write call's return value. At most one Write call is made.
  17. func HuffmanDecode(w io.Writer, v []byte) (int, error) {
  18. buf := bufPool.Get().(*bytes.Buffer)
  19. buf.Reset()
  20. defer bufPool.Put(buf)
  21. n := rootHuffmanNode
  22. cur, nbits := uint(0), uint8(0)
  23. for _, b := range v {
  24. cur = cur<<8 | uint(b)
  25. nbits += 8
  26. for nbits >= 8 {
  27. n = n.children[byte(cur>>(nbits-8))]
  28. if n.children == nil {
  29. buf.WriteByte(n.sym)
  30. nbits -= n.codeLen
  31. n = rootHuffmanNode
  32. } else {
  33. nbits -= 8
  34. }
  35. }
  36. }
  37. for nbits > 0 {
  38. n = n.children[byte(cur<<(8-nbits))]
  39. if n.children != nil || n.codeLen > nbits {
  40. break
  41. }
  42. buf.WriteByte(n.sym)
  43. nbits -= n.codeLen
  44. n = rootHuffmanNode
  45. }
  46. return w.Write(buf.Bytes())
  47. }
  48. type node struct {
  49. // children is non-nil for internal nodes
  50. children []*node
  51. // The following are only valid if children is nil:
  52. codeLen uint8 // number of bits that led to the output of sym
  53. sym byte // output symbol
  54. }
  55. func newInternalNode() *node {
  56. return &node{children: make([]*node, 256)}
  57. }
  58. var rootHuffmanNode = newInternalNode()
  59. func init() {
  60. for i, code := range huffmanCodes {
  61. if i > 255 {
  62. panic("too many huffman codes")
  63. }
  64. addDecoderNode(byte(i), code, huffmanCodeLen[i])
  65. }
  66. }
  67. func addDecoderNode(sym byte, code uint32, codeLen uint8) {
  68. cur := rootHuffmanNode
  69. for codeLen > 8 {
  70. codeLen -= 8
  71. i := uint8(code >> codeLen)
  72. if cur.children[i] == nil {
  73. cur.children[i] = newInternalNode()
  74. }
  75. cur = cur.children[i]
  76. }
  77. shift := 8 - codeLen
  78. start, end := int(uint8(code<<shift)), int(1<<shift)
  79. for i := start; i < start+end; i++ {
  80. cur.children[i] = &node{sym: sym, codeLen: codeLen}
  81. }
  82. }
  83. // AppendHuffmanString appends s, as encoded in Huffman codes, to dst
  84. // and returns the extended buffer.
  85. func AppendHuffmanString(dst []byte, s string) []byte {
  86. rembits := uint8(8)
  87. for i := 0; i < len(s); i++ {
  88. if rembits == 8 {
  89. dst = append(dst, 0)
  90. }
  91. dst, rembits = appendByteToHuffmanCode(dst, rembits, s[i])
  92. }
  93. if rembits < 8 {
  94. // special EOS symbol
  95. code := uint32(0x3fffffff)
  96. nbits := uint8(30)
  97. t := uint8(code >> (nbits - rembits))
  98. dst[len(dst)-1] |= t
  99. }
  100. return dst
  101. }
  102. // HuffmanEncodeLength returns the number of bytes required to encode
  103. // s in Huffman codes. The result is round up to byte boundary.
  104. func HuffmanEncodeLength(s string) uint64 {
  105. n := uint64(0)
  106. for i := 0; i < len(s); i++ {
  107. n += uint64(huffmanCodeLen[s[i]])
  108. }
  109. return (n + 7) / 8
  110. }
  111. // appendByteToHuffmanCode appends Huffman code for c to dst and
  112. // returns the extended buffer and the remaining bits in the last
  113. // element. The appending is not byte aligned and the remaining bits
  114. // in the last element of dst is given in rembits.
  115. func appendByteToHuffmanCode(dst []byte, rembits uint8, c byte) ([]byte, uint8) {
  116. code := huffmanCodes[c]
  117. nbits := huffmanCodeLen[c]
  118. for {
  119. if rembits > nbits {
  120. t := uint8(code << (rembits - nbits))
  121. dst[len(dst)-1] |= t
  122. rembits -= nbits
  123. break
  124. }
  125. t := uint8(code >> (nbits - rembits))
  126. dst[len(dst)-1] |= t
  127. nbits -= rembits
  128. rembits = 8
  129. if nbits == 0 {
  130. break
  131. }
  132. dst = append(dst, 0)
  133. }
  134. return dst, rembits
  135. }