huffman.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package hpack
  5. import (
  6. "bytes"
  7. "errors"
  8. "io"
  9. "sync"
  10. )
  11. var bufPool = sync.Pool{
  12. New: func() interface{} { return new(bytes.Buffer) },
  13. }
  14. // HuffmanDecode decodes the string in v and writes the expanded
  15. // result to w, returning the number of bytes written to w and the
  16. // Write call's return value. At most one Write call is made.
  17. func HuffmanDecode(w io.Writer, v []byte) (int, error) {
  18. buf := bufPool.Get().(*bytes.Buffer)
  19. buf.Reset()
  20. defer bufPool.Put(buf)
  21. if err := huffmanDecode(buf, v); err != nil {
  22. return 0, err
  23. }
  24. return w.Write(buf.Bytes())
  25. }
  26. // HuffmanDecodeToString decodes the string in v.
  27. func HuffmanDecodeToString(v []byte) (string, error) {
  28. buf := bufPool.Get().(*bytes.Buffer)
  29. buf.Reset()
  30. defer bufPool.Put(buf)
  31. if err := huffmanDecode(buf, v); err != nil {
  32. return "", err
  33. }
  34. return buf.String(), nil
  35. }
  36. // ErrInvalidHuffman is returned for errors found decoding
  37. // Huffman-encoded strings.
  38. var ErrInvalidHuffman = errors.New("hpack: invalid Huffman-encoded data")
  39. func huffmanDecode(buf *bytes.Buffer, v []byte) error {
  40. n := rootHuffmanNode
  41. cur, nbits := uint(0), uint8(0)
  42. for _, b := range v {
  43. cur = cur<<8 | uint(b)
  44. nbits += 8
  45. for nbits >= 8 {
  46. idx := byte(cur >> (nbits - 8))
  47. n = n.children[idx]
  48. if n == nil {
  49. return ErrInvalidHuffman
  50. }
  51. if n.children == nil {
  52. buf.WriteByte(n.sym)
  53. nbits -= n.codeLen
  54. n = rootHuffmanNode
  55. } else {
  56. nbits -= 8
  57. }
  58. }
  59. }
  60. for nbits > 0 {
  61. n = n.children[byte(cur<<(8-nbits))]
  62. if n.children != nil || n.codeLen > nbits {
  63. break
  64. }
  65. buf.WriteByte(n.sym)
  66. nbits -= n.codeLen
  67. n = rootHuffmanNode
  68. }
  69. return nil
  70. }
  71. type node struct {
  72. // children is non-nil for internal nodes
  73. children []*node
  74. // The following are only valid if children is nil:
  75. codeLen uint8 // number of bits that led to the output of sym
  76. sym byte // output symbol
  77. }
  78. func newInternalNode() *node {
  79. return &node{children: make([]*node, 256)}
  80. }
  81. var rootHuffmanNode = newInternalNode()
  82. func init() {
  83. if len(huffmanCodes) != 256 {
  84. panic("unexpected size")
  85. }
  86. for i, code := range huffmanCodes {
  87. addDecoderNode(byte(i), code, huffmanCodeLen[i])
  88. }
  89. }
  90. func addDecoderNode(sym byte, code uint32, codeLen uint8) {
  91. cur := rootHuffmanNode
  92. for codeLen > 8 {
  93. codeLen -= 8
  94. i := uint8(code >> codeLen)
  95. if cur.children[i] == nil {
  96. cur.children[i] = newInternalNode()
  97. }
  98. cur = cur.children[i]
  99. }
  100. shift := 8 - codeLen
  101. start, end := int(uint8(code<<shift)), int(1<<shift)
  102. for i := start; i < start+end; i++ {
  103. cur.children[i] = &node{sym: sym, codeLen: codeLen}
  104. }
  105. }
  106. // AppendHuffmanString appends s, as encoded in Huffman codes, to dst
  107. // and returns the extended buffer.
  108. func AppendHuffmanString(dst []byte, s string) []byte {
  109. rembits := uint8(8)
  110. for i := 0; i < len(s); i++ {
  111. if rembits == 8 {
  112. dst = append(dst, 0)
  113. }
  114. dst, rembits = appendByteToHuffmanCode(dst, rembits, s[i])
  115. }
  116. if rembits < 8 {
  117. // special EOS symbol
  118. code := uint32(0x3fffffff)
  119. nbits := uint8(30)
  120. t := uint8(code >> (nbits - rembits))
  121. dst[len(dst)-1] |= t
  122. }
  123. return dst
  124. }
  125. // HuffmanEncodeLength returns the number of bytes required to encode
  126. // s in Huffman codes. The result is round up to byte boundary.
  127. func HuffmanEncodeLength(s string) uint64 {
  128. n := uint64(0)
  129. for i := 0; i < len(s); i++ {
  130. n += uint64(huffmanCodeLen[s[i]])
  131. }
  132. return (n + 7) / 8
  133. }
  134. // appendByteToHuffmanCode appends Huffman code for c to dst and
  135. // returns the extended buffer and the remaining bits in the last
  136. // element. The appending is not byte aligned and the remaining bits
  137. // in the last element of dst is given in rembits.
  138. func appendByteToHuffmanCode(dst []byte, rembits uint8, c byte) ([]byte, uint8) {
  139. code := huffmanCodes[c]
  140. nbits := huffmanCodeLen[c]
  141. for {
  142. if rembits > nbits {
  143. t := uint8(code << (rembits - nbits))
  144. dst[len(dst)-1] |= t
  145. rembits -= nbits
  146. break
  147. }
  148. t := uint8(code >> (nbits - rembits))
  149. dst[len(dst)-1] |= t
  150. nbits -= rembits
  151. rembits = 8
  152. if nbits == 0 {
  153. break
  154. }
  155. dst = append(dst, 0)
  156. }
  157. return dst, rembits
  158. }