inflate.go 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package flate implements the DEFLATE compressed data format, described in
  5. // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
  6. // formats.
  7. package flate
  8. import (
  9. "bufio"
  10. "fmt"
  11. "io"
  12. "math/bits"
  13. "strconv"
  14. "sync"
  15. )
  16. const (
  17. maxCodeLen = 16 // max length of Huffman code
  18. maxCodeLenMask = 15 // mask for max length of Huffman code
  19. // The next three numbers come from the RFC section 3.2.7, with the
  20. // additional proviso in section 3.2.5 which implies that distance codes
  21. // 30 and 31 should never occur in compressed data.
  22. maxNumLit = 286
  23. maxNumDist = 30
  24. numCodes = 19 // number of codes in Huffman meta-code
  25. debugDecode = false
  26. )
  27. // Initialize the fixedHuffmanDecoder only once upon first use.
  28. var fixedOnce sync.Once
  29. var fixedHuffmanDecoder huffmanDecoder
  30. // A CorruptInputError reports the presence of corrupt input at a given offset.
  31. type CorruptInputError int64
  32. func (e CorruptInputError) Error() string {
  33. return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
  34. }
  35. // An InternalError reports an error in the flate code itself.
  36. type InternalError string
  37. func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
  38. // A ReadError reports an error encountered while reading input.
  39. //
  40. // Deprecated: No longer returned.
  41. type ReadError struct {
  42. Offset int64 // byte offset where error occurred
  43. Err error // error returned by underlying Read
  44. }
  45. func (e *ReadError) Error() string {
  46. return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  47. }
  48. // A WriteError reports an error encountered while writing output.
  49. //
  50. // Deprecated: No longer returned.
  51. type WriteError struct {
  52. Offset int64 // byte offset where error occurred
  53. Err error // error returned by underlying Write
  54. }
  55. func (e *WriteError) Error() string {
  56. return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  57. }
  58. // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
  59. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  60. // instead of allocating a new one.
  61. type Resetter interface {
  62. // Reset discards any buffered data and resets the Resetter as if it was
  63. // newly initialized with the given reader.
  64. Reset(r io.Reader, dict []byte) error
  65. }
  66. // The data structure for decoding Huffman tables is based on that of
  67. // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
  68. // For codes smaller than the table width, there are multiple entries
  69. // (each combination of trailing bits has the same value). For codes
  70. // larger than the table width, the table contains a link to an overflow
  71. // table. The width of each entry in the link table is the maximum code
  72. // size minus the chunk width.
  73. //
  74. // Note that you can do a lookup in the table even without all bits
  75. // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
  76. // have the property that shorter codes come before longer ones, the
  77. // bit length estimate in the result is a lower bound on the actual
  78. // number of bits.
  79. //
  80. // See the following:
  81. // http://www.gzip.org/algorithm.txt
  82. // chunk & 15 is number of bits
  83. // chunk >> 4 is value, including table link
  84. const (
  85. huffmanChunkBits = 9
  86. huffmanNumChunks = 1 << huffmanChunkBits
  87. huffmanCountMask = 15
  88. huffmanValueShift = 4
  89. )
  90. type huffmanDecoder struct {
  91. maxRead int // the maximum number of bits we can read and not overread
  92. chunks *[huffmanNumChunks]uint16 // chunks as described above
  93. links [][]uint16 // overflow links
  94. linkMask uint32 // mask the width of the link table
  95. }
  96. // Initialize Huffman decoding tables from array of code lengths.
  97. // Following this function, h is guaranteed to be initialized into a complete
  98. // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
  99. // degenerate case where the tree has only a single symbol with length 1. Empty
  100. // trees are permitted.
  101. func (h *huffmanDecoder) init(lengths []int) bool {
  102. // Sanity enables additional runtime tests during Huffman
  103. // table construction. It's intended to be used during
  104. // development to supplement the currently ad-hoc unit tests.
  105. const sanity = false
  106. if h.chunks == nil {
  107. h.chunks = &[huffmanNumChunks]uint16{}
  108. }
  109. if h.maxRead != 0 {
  110. *h = huffmanDecoder{chunks: h.chunks, links: h.links}
  111. }
  112. // Count number of codes of each length,
  113. // compute maxRead and max length.
  114. var count [maxCodeLen]int
  115. var min, max int
  116. for _, n := range lengths {
  117. if n == 0 {
  118. continue
  119. }
  120. if min == 0 || n < min {
  121. min = n
  122. }
  123. if n > max {
  124. max = n
  125. }
  126. count[n&maxCodeLenMask]++
  127. }
  128. // Empty tree. The decompressor.huffSym function will fail later if the tree
  129. // is used. Technically, an empty tree is only valid for the HDIST tree and
  130. // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
  131. // is guaranteed to fail since it will attempt to use the tree to decode the
  132. // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
  133. // guaranteed to fail later since the compressed data section must be
  134. // composed of at least one symbol (the end-of-block marker).
  135. if max == 0 {
  136. return true
  137. }
  138. code := 0
  139. var nextcode [maxCodeLen]int
  140. for i := min; i <= max; i++ {
  141. code <<= 1
  142. nextcode[i&maxCodeLenMask] = code
  143. code += count[i&maxCodeLenMask]
  144. }
  145. // Check that the coding is complete (i.e., that we've
  146. // assigned all 2-to-the-max possible bit sequences).
  147. // Exception: To be compatible with zlib, we also need to
  148. // accept degenerate single-code codings. See also
  149. // TestDegenerateHuffmanCoding.
  150. if code != 1<<uint(max) && !(code == 1 && max == 1) {
  151. if debugDecode {
  152. fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
  153. }
  154. return false
  155. }
  156. h.maxRead = min
  157. chunks := h.chunks[:]
  158. for i := range chunks {
  159. chunks[i] = 0
  160. }
  161. if max > huffmanChunkBits {
  162. numLinks := 1 << (uint(max) - huffmanChunkBits)
  163. h.linkMask = uint32(numLinks - 1)
  164. // create link tables
  165. link := nextcode[huffmanChunkBits+1] >> 1
  166. if cap(h.links) < huffmanNumChunks-link {
  167. h.links = make([][]uint16, huffmanNumChunks-link)
  168. } else {
  169. h.links = h.links[:huffmanNumChunks-link]
  170. }
  171. for j := uint(link); j < huffmanNumChunks; j++ {
  172. reverse := int(bits.Reverse16(uint16(j)))
  173. reverse >>= uint(16 - huffmanChunkBits)
  174. off := j - uint(link)
  175. if sanity && h.chunks[reverse] != 0 {
  176. panic("impossible: overwriting existing chunk")
  177. }
  178. h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
  179. if cap(h.links[off]) < numLinks {
  180. h.links[off] = make([]uint16, numLinks)
  181. } else {
  182. links := h.links[off][:0]
  183. h.links[off] = links[:numLinks]
  184. }
  185. }
  186. } else {
  187. h.links = h.links[:0]
  188. }
  189. for i, n := range lengths {
  190. if n == 0 {
  191. continue
  192. }
  193. code := nextcode[n]
  194. nextcode[n]++
  195. chunk := uint16(i<<huffmanValueShift | n)
  196. reverse := int(bits.Reverse16(uint16(code)))
  197. reverse >>= uint(16 - n)
  198. if n <= huffmanChunkBits {
  199. for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
  200. // We should never need to overwrite
  201. // an existing chunk. Also, 0 is
  202. // never a valid chunk, because the
  203. // lower 4 "count" bits should be
  204. // between 1 and 15.
  205. if sanity && h.chunks[off] != 0 {
  206. panic("impossible: overwriting existing chunk")
  207. }
  208. h.chunks[off] = chunk
  209. }
  210. } else {
  211. j := reverse & (huffmanNumChunks - 1)
  212. if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
  213. // Longer codes should have been
  214. // associated with a link table above.
  215. panic("impossible: not an indirect chunk")
  216. }
  217. value := h.chunks[j] >> huffmanValueShift
  218. linktab := h.links[value]
  219. reverse >>= huffmanChunkBits
  220. for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
  221. if sanity && linktab[off] != 0 {
  222. panic("impossible: overwriting existing chunk")
  223. }
  224. linktab[off] = chunk
  225. }
  226. }
  227. }
  228. if sanity {
  229. // Above we've sanity checked that we never overwrote
  230. // an existing entry. Here we additionally check that
  231. // we filled the tables completely.
  232. for i, chunk := range h.chunks {
  233. if chunk == 0 {
  234. // As an exception, in the degenerate
  235. // single-code case, we allow odd
  236. // chunks to be missing.
  237. if code == 1 && i%2 == 1 {
  238. continue
  239. }
  240. panic("impossible: missing chunk")
  241. }
  242. }
  243. for _, linktab := range h.links {
  244. for _, chunk := range linktab {
  245. if chunk == 0 {
  246. panic("impossible: missing chunk")
  247. }
  248. }
  249. }
  250. }
  251. return true
  252. }
  253. // The actual read interface needed by NewReader.
  254. // If the passed in io.Reader does not also have ReadByte,
  255. // the NewReader will introduce its own buffering.
  256. type Reader interface {
  257. io.Reader
  258. io.ByteReader
  259. }
  260. // Decompress state.
  261. type decompressor struct {
  262. // Input source.
  263. r Reader
  264. roffset int64
  265. // Huffman decoders for literal/length, distance.
  266. h1, h2 huffmanDecoder
  267. // Length arrays used to define Huffman codes.
  268. bits *[maxNumLit + maxNumDist]int
  269. codebits *[numCodes]int
  270. // Output history, buffer.
  271. dict dictDecoder
  272. // Next step in the decompression,
  273. // and decompression state.
  274. step func(*decompressor)
  275. stepState int
  276. err error
  277. toRead []byte
  278. hl, hd *huffmanDecoder
  279. copyLen int
  280. copyDist int
  281. // Temporary buffer (avoids repeated allocation).
  282. buf [4]byte
  283. // Input bits, in top of b.
  284. b uint32
  285. nb uint
  286. final bool
  287. }
  288. func (f *decompressor) nextBlock() {
  289. for f.nb < 1+2 {
  290. if f.err = f.moreBits(); f.err != nil {
  291. return
  292. }
  293. }
  294. f.final = f.b&1 == 1
  295. f.b >>= 1
  296. typ := f.b & 3
  297. f.b >>= 2
  298. f.nb -= 1 + 2
  299. switch typ {
  300. case 0:
  301. f.dataBlock()
  302. case 1:
  303. // compressed, fixed Huffman tables
  304. f.hl = &fixedHuffmanDecoder
  305. f.hd = nil
  306. f.huffmanBlockDecoder()()
  307. case 2:
  308. // compressed, dynamic Huffman tables
  309. if f.err = f.readHuffman(); f.err != nil {
  310. break
  311. }
  312. f.hl = &f.h1
  313. f.hd = &f.h2
  314. f.huffmanBlockDecoder()()
  315. default:
  316. // 3 is reserved.
  317. if debugDecode {
  318. fmt.Println("reserved data block encountered")
  319. }
  320. f.err = CorruptInputError(f.roffset)
  321. }
  322. }
  323. func (f *decompressor) Read(b []byte) (int, error) {
  324. for {
  325. if len(f.toRead) > 0 {
  326. n := copy(b, f.toRead)
  327. f.toRead = f.toRead[n:]
  328. if len(f.toRead) == 0 {
  329. return n, f.err
  330. }
  331. return n, nil
  332. }
  333. if f.err != nil {
  334. return 0, f.err
  335. }
  336. f.step(f)
  337. if f.err != nil && len(f.toRead) == 0 {
  338. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  339. }
  340. }
  341. }
  342. // Support the io.WriteTo interface for io.Copy and friends.
  343. func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
  344. total := int64(0)
  345. flushed := false
  346. for {
  347. if len(f.toRead) > 0 {
  348. n, err := w.Write(f.toRead)
  349. total += int64(n)
  350. if err != nil {
  351. f.err = err
  352. return total, err
  353. }
  354. if n != len(f.toRead) {
  355. return total, io.ErrShortWrite
  356. }
  357. f.toRead = f.toRead[:0]
  358. }
  359. if f.err != nil && flushed {
  360. if f.err == io.EOF {
  361. return total, nil
  362. }
  363. return total, f.err
  364. }
  365. if f.err == nil {
  366. f.step(f)
  367. }
  368. if len(f.toRead) == 0 && f.err != nil && !flushed {
  369. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  370. flushed = true
  371. }
  372. }
  373. }
  374. func (f *decompressor) Close() error {
  375. if f.err == io.EOF {
  376. return nil
  377. }
  378. return f.err
  379. }
  380. // RFC 1951 section 3.2.7.
  381. // Compression with dynamic Huffman codes
  382. var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  383. func (f *decompressor) readHuffman() error {
  384. // HLIT[5], HDIST[5], HCLEN[4].
  385. for f.nb < 5+5+4 {
  386. if err := f.moreBits(); err != nil {
  387. return err
  388. }
  389. }
  390. nlit := int(f.b&0x1F) + 257
  391. if nlit > maxNumLit {
  392. if debugDecode {
  393. fmt.Println("nlit > maxNumLit", nlit)
  394. }
  395. return CorruptInputError(f.roffset)
  396. }
  397. f.b >>= 5
  398. ndist := int(f.b&0x1F) + 1
  399. if ndist > maxNumDist {
  400. if debugDecode {
  401. fmt.Println("ndist > maxNumDist", ndist)
  402. }
  403. return CorruptInputError(f.roffset)
  404. }
  405. f.b >>= 5
  406. nclen := int(f.b&0xF) + 4
  407. // numCodes is 19, so nclen is always valid.
  408. f.b >>= 4
  409. f.nb -= 5 + 5 + 4
  410. // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
  411. for i := 0; i < nclen; i++ {
  412. for f.nb < 3 {
  413. if err := f.moreBits(); err != nil {
  414. return err
  415. }
  416. }
  417. f.codebits[codeOrder[i]] = int(f.b & 0x7)
  418. f.b >>= 3
  419. f.nb -= 3
  420. }
  421. for i := nclen; i < len(codeOrder); i++ {
  422. f.codebits[codeOrder[i]] = 0
  423. }
  424. if !f.h1.init(f.codebits[0:]) {
  425. if debugDecode {
  426. fmt.Println("init codebits failed")
  427. }
  428. return CorruptInputError(f.roffset)
  429. }
  430. // HLIT + 257 code lengths, HDIST + 1 code lengths,
  431. // using the code length Huffman code.
  432. for i, n := 0, nlit+ndist; i < n; {
  433. x, err := f.huffSym(&f.h1)
  434. if err != nil {
  435. return err
  436. }
  437. if x < 16 {
  438. // Actual length.
  439. f.bits[i] = x
  440. i++
  441. continue
  442. }
  443. // Repeat previous length or zero.
  444. var rep int
  445. var nb uint
  446. var b int
  447. switch x {
  448. default:
  449. return InternalError("unexpected length code")
  450. case 16:
  451. rep = 3
  452. nb = 2
  453. if i == 0 {
  454. if debugDecode {
  455. fmt.Println("i==0")
  456. }
  457. return CorruptInputError(f.roffset)
  458. }
  459. b = f.bits[i-1]
  460. case 17:
  461. rep = 3
  462. nb = 3
  463. b = 0
  464. case 18:
  465. rep = 11
  466. nb = 7
  467. b = 0
  468. }
  469. for f.nb < nb {
  470. if err := f.moreBits(); err != nil {
  471. if debugDecode {
  472. fmt.Println("morebits:", err)
  473. }
  474. return err
  475. }
  476. }
  477. rep += int(f.b & uint32(1<<nb-1))
  478. f.b >>= nb
  479. f.nb -= nb
  480. if i+rep > n {
  481. if debugDecode {
  482. fmt.Println("i+rep > n", i, rep, n)
  483. }
  484. return CorruptInputError(f.roffset)
  485. }
  486. for j := 0; j < rep; j++ {
  487. f.bits[i] = b
  488. i++
  489. }
  490. }
  491. if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
  492. if debugDecode {
  493. fmt.Println("init2 failed")
  494. }
  495. return CorruptInputError(f.roffset)
  496. }
  497. // As an optimization, we can initialize the maxRead bits to read at a time
  498. // for the HLIT tree to the length of the EOB marker since we know that
  499. // every block must terminate with one. This preserves the property that
  500. // we never read any extra bytes after the end of the DEFLATE stream.
  501. if f.h1.maxRead < f.bits[endBlockMarker] {
  502. f.h1.maxRead = f.bits[endBlockMarker]
  503. }
  504. if !f.final {
  505. // If not the final block, the smallest block possible is
  506. // a predefined table, BTYPE=01, with a single EOB marker.
  507. // This will take up 3 + 7 bits.
  508. f.h1.maxRead += 10
  509. }
  510. return nil
  511. }
  512. // Decode a single Huffman block from f.
  513. // hl and hd are the Huffman states for the lit/length values
  514. // and the distance values, respectively. If hd == nil, using the
  515. // fixed distance encoding associated with fixed Huffman blocks.
  516. func (f *decompressor) huffmanBlockGeneric() {
  517. const (
  518. stateInit = iota // Zero value must be stateInit
  519. stateDict
  520. )
  521. switch f.stepState {
  522. case stateInit:
  523. goto readLiteral
  524. case stateDict:
  525. goto copyHistory
  526. }
  527. readLiteral:
  528. // Read literal and/or (length, distance) according to RFC section 3.2.3.
  529. {
  530. var v int
  531. {
  532. // Inlined v, err := f.huffSym(f.hl)
  533. // Since a huffmanDecoder can be empty or be composed of a degenerate tree
  534. // with single element, huffSym must error on these two edge cases. In both
  535. // cases, the chunks slice will be 0 for the invalid sequence, leading it
  536. // satisfy the n == 0 check below.
  537. n := uint(f.hl.maxRead)
  538. // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
  539. // but is smart enough to keep local variables in registers, so use nb and b,
  540. // inline call to moreBits and reassign b,nb back to f on return.
  541. nb, b := f.nb, f.b
  542. for {
  543. for nb < n {
  544. c, err := f.r.ReadByte()
  545. if err != nil {
  546. f.b = b
  547. f.nb = nb
  548. f.err = noEOF(err)
  549. return
  550. }
  551. f.roffset++
  552. b |= uint32(c) << (nb & 31)
  553. nb += 8
  554. }
  555. chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
  556. n = uint(chunk & huffmanCountMask)
  557. if n > huffmanChunkBits {
  558. chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
  559. n = uint(chunk & huffmanCountMask)
  560. }
  561. if n <= nb {
  562. if n == 0 {
  563. f.b = b
  564. f.nb = nb
  565. if debugDecode {
  566. fmt.Println("huffsym: n==0")
  567. }
  568. f.err = CorruptInputError(f.roffset)
  569. return
  570. }
  571. f.b = b >> (n & 31)
  572. f.nb = nb - n
  573. v = int(chunk >> huffmanValueShift)
  574. break
  575. }
  576. }
  577. }
  578. var n uint // number of bits extra
  579. var length int
  580. var err error
  581. switch {
  582. case v < 256:
  583. f.dict.writeByte(byte(v))
  584. if f.dict.availWrite() == 0 {
  585. f.toRead = f.dict.readFlush()
  586. f.step = (*decompressor).huffmanBlockGeneric
  587. f.stepState = stateInit
  588. return
  589. }
  590. goto readLiteral
  591. case v == 256:
  592. f.finishBlock()
  593. return
  594. // otherwise, reference to older data
  595. case v < 265:
  596. length = v - (257 - 3)
  597. n = 0
  598. case v < 269:
  599. length = v*2 - (265*2 - 11)
  600. n = 1
  601. case v < 273:
  602. length = v*4 - (269*4 - 19)
  603. n = 2
  604. case v < 277:
  605. length = v*8 - (273*8 - 35)
  606. n = 3
  607. case v < 281:
  608. length = v*16 - (277*16 - 67)
  609. n = 4
  610. case v < 285:
  611. length = v*32 - (281*32 - 131)
  612. n = 5
  613. case v < maxNumLit:
  614. length = 258
  615. n = 0
  616. default:
  617. if debugDecode {
  618. fmt.Println(v, ">= maxNumLit")
  619. }
  620. f.err = CorruptInputError(f.roffset)
  621. return
  622. }
  623. if n > 0 {
  624. for f.nb < n {
  625. if err = f.moreBits(); err != nil {
  626. if debugDecode {
  627. fmt.Println("morebits n>0:", err)
  628. }
  629. f.err = err
  630. return
  631. }
  632. }
  633. length += int(f.b & uint32(1<<n-1))
  634. f.b >>= n
  635. f.nb -= n
  636. }
  637. var dist int
  638. if f.hd == nil {
  639. for f.nb < 5 {
  640. if err = f.moreBits(); err != nil {
  641. if debugDecode {
  642. fmt.Println("morebits f.nb<5:", err)
  643. }
  644. f.err = err
  645. return
  646. }
  647. }
  648. dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
  649. f.b >>= 5
  650. f.nb -= 5
  651. } else {
  652. if dist, err = f.huffSym(f.hd); err != nil {
  653. if debugDecode {
  654. fmt.Println("huffsym:", err)
  655. }
  656. f.err = err
  657. return
  658. }
  659. }
  660. switch {
  661. case dist < 4:
  662. dist++
  663. case dist < maxNumDist:
  664. nb := uint(dist-2) >> 1
  665. // have 1 bit in bottom of dist, need nb more.
  666. extra := (dist & 1) << nb
  667. for f.nb < nb {
  668. if err = f.moreBits(); err != nil {
  669. if debugDecode {
  670. fmt.Println("morebits f.nb<nb:", err)
  671. }
  672. f.err = err
  673. return
  674. }
  675. }
  676. extra |= int(f.b & uint32(1<<nb-1))
  677. f.b >>= nb
  678. f.nb -= nb
  679. dist = 1<<(nb+1) + 1 + extra
  680. default:
  681. if debugDecode {
  682. fmt.Println("dist too big:", dist, maxNumDist)
  683. }
  684. f.err = CorruptInputError(f.roffset)
  685. return
  686. }
  687. // No check on length; encoding can be prescient.
  688. if dist > f.dict.histSize() {
  689. if debugDecode {
  690. fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
  691. }
  692. f.err = CorruptInputError(f.roffset)
  693. return
  694. }
  695. f.copyLen, f.copyDist = length, dist
  696. goto copyHistory
  697. }
  698. copyHistory:
  699. // Perform a backwards copy according to RFC section 3.2.3.
  700. {
  701. cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
  702. if cnt == 0 {
  703. cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
  704. }
  705. f.copyLen -= cnt
  706. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  707. f.toRead = f.dict.readFlush()
  708. f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work
  709. f.stepState = stateDict
  710. return
  711. }
  712. goto readLiteral
  713. }
  714. }
  715. // Copy a single uncompressed data block from input to output.
  716. func (f *decompressor) dataBlock() {
  717. // Uncompressed.
  718. // Discard current half-byte.
  719. left := (f.nb) & 7
  720. f.nb -= left
  721. f.b >>= left
  722. offBytes := f.nb >> 3
  723. // Unfilled values will be overwritten.
  724. f.buf[0] = uint8(f.b)
  725. f.buf[1] = uint8(f.b >> 8)
  726. f.buf[2] = uint8(f.b >> 16)
  727. f.buf[3] = uint8(f.b >> 24)
  728. f.roffset += int64(offBytes)
  729. f.nb, f.b = 0, 0
  730. // Length then ones-complement of length.
  731. nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
  732. f.roffset += int64(nr)
  733. if err != nil {
  734. f.err = noEOF(err)
  735. return
  736. }
  737. n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
  738. nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
  739. if nn != ^n {
  740. if debugDecode {
  741. ncomp := ^n
  742. fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
  743. }
  744. f.err = CorruptInputError(f.roffset)
  745. return
  746. }
  747. if n == 0 {
  748. f.toRead = f.dict.readFlush()
  749. f.finishBlock()
  750. return
  751. }
  752. f.copyLen = int(n)
  753. f.copyData()
  754. }
  755. // copyData copies f.copyLen bytes from the underlying reader into f.hist.
  756. // It pauses for reads when f.hist is full.
  757. func (f *decompressor) copyData() {
  758. buf := f.dict.writeSlice()
  759. if len(buf) > f.copyLen {
  760. buf = buf[:f.copyLen]
  761. }
  762. cnt, err := io.ReadFull(f.r, buf)
  763. f.roffset += int64(cnt)
  764. f.copyLen -= cnt
  765. f.dict.writeMark(cnt)
  766. if err != nil {
  767. f.err = noEOF(err)
  768. return
  769. }
  770. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  771. f.toRead = f.dict.readFlush()
  772. f.step = (*decompressor).copyData
  773. return
  774. }
  775. f.finishBlock()
  776. }
  777. func (f *decompressor) finishBlock() {
  778. if f.final {
  779. if f.dict.availRead() > 0 {
  780. f.toRead = f.dict.readFlush()
  781. }
  782. f.err = io.EOF
  783. }
  784. f.step = (*decompressor).nextBlock
  785. }
  786. // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
  787. func noEOF(e error) error {
  788. if e == io.EOF {
  789. return io.ErrUnexpectedEOF
  790. }
  791. return e
  792. }
  793. func (f *decompressor) moreBits() error {
  794. c, err := f.r.ReadByte()
  795. if err != nil {
  796. return noEOF(err)
  797. }
  798. f.roffset++
  799. f.b |= uint32(c) << f.nb
  800. f.nb += 8
  801. return nil
  802. }
  803. // Read the next Huffman-encoded symbol from f according to h.
  804. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
  805. // Since a huffmanDecoder can be empty or be composed of a degenerate tree
  806. // with single element, huffSym must error on these two edge cases. In both
  807. // cases, the chunks slice will be 0 for the invalid sequence, leading it
  808. // satisfy the n == 0 check below.
  809. n := uint(h.maxRead)
  810. // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
  811. // but is smart enough to keep local variables in registers, so use nb and b,
  812. // inline call to moreBits and reassign b,nb back to f on return.
  813. nb, b := f.nb, f.b
  814. for {
  815. for nb < n {
  816. c, err := f.r.ReadByte()
  817. if err != nil {
  818. f.b = b
  819. f.nb = nb
  820. return 0, noEOF(err)
  821. }
  822. f.roffset++
  823. b |= uint32(c) << (nb & 31)
  824. nb += 8
  825. }
  826. chunk := h.chunks[b&(huffmanNumChunks-1)]
  827. n = uint(chunk & huffmanCountMask)
  828. if n > huffmanChunkBits {
  829. chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
  830. n = uint(chunk & huffmanCountMask)
  831. }
  832. if n <= nb {
  833. if n == 0 {
  834. f.b = b
  835. f.nb = nb
  836. if debugDecode {
  837. fmt.Println("huffsym: n==0")
  838. }
  839. f.err = CorruptInputError(f.roffset)
  840. return 0, f.err
  841. }
  842. f.b = b >> (n & 31)
  843. f.nb = nb - n
  844. return int(chunk >> huffmanValueShift), nil
  845. }
  846. }
  847. }
  848. func makeReader(r io.Reader) Reader {
  849. if rr, ok := r.(Reader); ok {
  850. return rr
  851. }
  852. return bufio.NewReader(r)
  853. }
  854. func fixedHuffmanDecoderInit() {
  855. fixedOnce.Do(func() {
  856. // These come from the RFC section 3.2.6.
  857. var bits [288]int
  858. for i := 0; i < 144; i++ {
  859. bits[i] = 8
  860. }
  861. for i := 144; i < 256; i++ {
  862. bits[i] = 9
  863. }
  864. for i := 256; i < 280; i++ {
  865. bits[i] = 7
  866. }
  867. for i := 280; i < 288; i++ {
  868. bits[i] = 8
  869. }
  870. fixedHuffmanDecoder.init(bits[:])
  871. })
  872. }
  873. func (f *decompressor) Reset(r io.Reader, dict []byte) error {
  874. *f = decompressor{
  875. r: makeReader(r),
  876. bits: f.bits,
  877. codebits: f.codebits,
  878. h1: f.h1,
  879. h2: f.h2,
  880. dict: f.dict,
  881. step: (*decompressor).nextBlock,
  882. }
  883. f.dict.init(maxMatchOffset, dict)
  884. return nil
  885. }
  886. // NewReader returns a new ReadCloser that can be used
  887. // to read the uncompressed version of r.
  888. // If r does not also implement io.ByteReader,
  889. // the decompressor may read more data than necessary from r.
  890. // It is the caller's responsibility to call Close on the ReadCloser
  891. // when finished reading.
  892. //
  893. // The ReadCloser returned by NewReader also implements Resetter.
  894. func NewReader(r io.Reader) io.ReadCloser {
  895. fixedHuffmanDecoderInit()
  896. var f decompressor
  897. f.r = makeReader(r)
  898. f.bits = new([maxNumLit + maxNumDist]int)
  899. f.codebits = new([numCodes]int)
  900. f.step = (*decompressor).nextBlock
  901. f.dict.init(maxMatchOffset, nil)
  902. return &f
  903. }
  904. // NewReaderDict is like NewReader but initializes the reader
  905. // with a preset dictionary. The returned Reader behaves as if
  906. // the uncompressed data stream started with the given dictionary,
  907. // which has already been read. NewReaderDict is typically used
  908. // to read data compressed by NewWriterDict.
  909. //
  910. // The ReadCloser returned by NewReader also implements Resetter.
  911. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
  912. fixedHuffmanDecoderInit()
  913. var f decompressor
  914. f.r = makeReader(r)
  915. f.bits = new([maxNumLit + maxNumDist]int)
  916. f.codebits = new([numCodes]int)
  917. f.step = (*decompressor).nextBlock
  918. f.dict.init(maxMatchOffset, dict)
  919. return &f
  920. }