123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375 |
- // Copyright 2009 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package flate
- import (
- "bytes"
- "encoding/binary"
- "fmt"
- "io"
- "math"
- )
- const (
- // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
- // 8 bits: xlength = length - MIN_MATCH_LENGTH
- // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
- lengthShift = 22
- offsetMask = 1<<lengthShift - 1
- typeMask = 3 << 30
- literalType = 0 << 30
- matchType = 1 << 30
- )
- // The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
- // is lengthCodes[length - MIN_MATCH_LENGTH]
- var lengthCodes = [256]uint8{
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
- 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
- 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
- 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
- 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
- 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
- 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 28,
- }
- // lengthCodes1 is length codes, but starting at 1.
- var lengthCodes1 = [256]uint8{
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
- 10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
- 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
- 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
- 18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
- 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
- 20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 29,
- }
- var offsetCodes = [256]uint32{
- 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
- 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- }
- // offsetCodes14 are offsetCodes, but with 14 added.
- var offsetCodes14 = [256]uint32{
- 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
- }
- type token uint32
- type tokens struct {
- nLits int
- extraHist [32]uint16 // codes 256->maxnumlit
- offHist [32]uint16 // offset codes
- litHist [256]uint16 // codes 0->255
- n uint16 // Must be able to contain maxStoreBlockSize
- tokens [maxStoreBlockSize + 1]token
- }
- func (t *tokens) Reset() {
- if t.n == 0 {
- return
- }
- t.n = 0
- t.nLits = 0
- for i := range t.litHist[:] {
- t.litHist[i] = 0
- }
- for i := range t.extraHist[:] {
- t.extraHist[i] = 0
- }
- for i := range t.offHist[:] {
- t.offHist[i] = 0
- }
- }
- func (t *tokens) Fill() {
- if t.n == 0 {
- return
- }
- for i, v := range t.litHist[:] {
- if v == 0 {
- t.litHist[i] = 1
- t.nLits++
- }
- }
- for i, v := range t.extraHist[:literalCount-256] {
- if v == 0 {
- t.nLits++
- t.extraHist[i] = 1
- }
- }
- for i, v := range t.offHist[:offsetCodeCount] {
- if v == 0 {
- t.offHist[i] = 1
- }
- }
- }
- func indexTokens(in []token) tokens {
- var t tokens
- t.indexTokens(in)
- return t
- }
- func (t *tokens) indexTokens(in []token) {
- t.Reset()
- for _, tok := range in {
- if tok < matchType {
- t.AddLiteral(tok.literal())
- continue
- }
- t.AddMatch(uint32(tok.length()), tok.offset())
- }
- }
- // emitLiteral writes a literal chunk and returns the number of bytes written.
- func emitLiteral(dst *tokens, lit []byte) {
- ol := int(dst.n)
- for i, v := range lit {
- dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
- dst.litHist[v]++
- }
- dst.n += uint16(len(lit))
- dst.nLits += len(lit)
- }
- func (t *tokens) AddLiteral(lit byte) {
- t.tokens[t.n] = token(lit)
- t.litHist[lit]++
- t.n++
- t.nLits++
- }
- // from https://stackoverflow.com/a/28730362
- func mFastLog2(val float32) float32 {
- ux := int32(math.Float32bits(val))
- log2 := (float32)(((ux >> 23) & 255) - 128)
- ux &= -0x7f800001
- ux += 127 << 23
- uval := math.Float32frombits(uint32(ux))
- log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
- return log2
- }
- // EstimatedBits will return an minimum size estimated by an *optimal*
- // compression of the block.
- // The size of the block
- func (t *tokens) EstimatedBits() int {
- shannon := float32(0)
- bits := int(0)
- nMatches := 0
- if t.nLits > 0 {
- invTotal := 1.0 / float32(t.nLits)
- for _, v := range t.litHist[:] {
- if v > 0 {
- n := float32(v)
- shannon += -mFastLog2(n*invTotal) * n
- }
- }
- // Just add 15 for EOB
- shannon += 15
- for i, v := range t.extraHist[1 : literalCount-256] {
- if v > 0 {
- n := float32(v)
- shannon += -mFastLog2(n*invTotal) * n
- bits += int(lengthExtraBits[i&31]) * int(v)
- nMatches += int(v)
- }
- }
- }
- if nMatches > 0 {
- invTotal := 1.0 / float32(nMatches)
- for i, v := range t.offHist[:offsetCodeCount] {
- if v > 0 {
- n := float32(v)
- shannon += -mFastLog2(n*invTotal) * n
- bits += int(offsetExtraBits[i&31]) * int(v)
- }
- }
- }
- return int(shannon) + bits
- }
- // AddMatch adds a match to the tokens.
- // This function is very sensitive to inlining and right on the border.
- func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
- if debugDeflate {
- if xlength >= maxMatchLength+baseMatchLength {
- panic(fmt.Errorf("invalid length: %v", xlength))
- }
- if xoffset >= maxMatchOffset+baseMatchOffset {
- panic(fmt.Errorf("invalid offset: %v", xoffset))
- }
- }
- t.nLits++
- lengthCode := lengthCodes1[uint8(xlength)] & 31
- t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
- t.extraHist[lengthCode]++
- t.offHist[offsetCode(xoffset)&31]++
- t.n++
- }
- // AddMatchLong adds a match to the tokens, potentially longer than max match length.
- // Length should NOT have the base subtracted, only offset should.
- func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
- if debugDeflate {
- if xoffset >= maxMatchOffset+baseMatchOffset {
- panic(fmt.Errorf("invalid offset: %v", xoffset))
- }
- }
- oc := offsetCode(xoffset) & 31
- for xlength > 0 {
- xl := xlength
- if xl > 258 {
- // We need to have at least baseMatchLength left over for next loop.
- xl = 258 - baseMatchLength
- }
- xlength -= xl
- xl -= 3
- t.nLits++
- lengthCode := lengthCodes1[uint8(xl)] & 31
- t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
- t.extraHist[lengthCode]++
- t.offHist[oc]++
- t.n++
- }
- }
- func (t *tokens) AddEOB() {
- t.tokens[t.n] = token(endBlockMarker)
- t.extraHist[0]++
- t.n++
- }
- func (t *tokens) Slice() []token {
- return t.tokens[:t.n]
- }
- // VarInt returns the tokens as varint encoded bytes.
- func (t *tokens) VarInt() []byte {
- var b = make([]byte, binary.MaxVarintLen32*int(t.n))
- var off int
- for _, v := range t.tokens[:t.n] {
- off += binary.PutUvarint(b[off:], uint64(v))
- }
- return b[:off]
- }
- // FromVarInt restores t to the varint encoded tokens provided.
- // Any data in t is removed.
- func (t *tokens) FromVarInt(b []byte) error {
- var buf = bytes.NewReader(b)
- var toks []token
- for {
- r, err := binary.ReadUvarint(buf)
- if err == io.EOF {
- break
- }
- if err != nil {
- return err
- }
- toks = append(toks, token(r))
- }
- t.indexTokens(toks)
- return nil
- }
- // Returns the type of a token
- func (t token) typ() uint32 { return uint32(t) & typeMask }
- // Returns the literal of a literal token
- func (t token) literal() uint8 { return uint8(t) }
- // Returns the extra offset of a match token
- func (t token) offset() uint32 { return uint32(t) & offsetMask }
- func (t token) length() uint8 { return uint8(t >> lengthShift) }
- // The code is never more than 8 bits, but is returned as uint32 for convenience.
- func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
- // Returns the offset code corresponding to a specific offset
- func offsetCode(off uint32) uint32 {
- if false {
- if off < uint32(len(offsetCodes)) {
- return offsetCodes[off&255]
- } else if off>>7 < uint32(len(offsetCodes)) {
- return offsetCodes[(off>>7)&255] + 14
- } else {
- return offsetCodes[(off>>14)&255] + 28
- }
- }
- if off < uint32(len(offsetCodes)) {
- return offsetCodes[uint8(off)]
- }
- return offsetCodes14[uint8(off>>7)]
- }
|