10 jaren geleden · bf2ded9d81
--- a/decode.go
+++ b/decode.go
@@ -140,8 +140,8 @@ func Decode(dst, src []byte) ([]byte, error) {
 
				 func NewReader(r io.Reader) *Reader {
			
 
				 	return &Reader{
			
 
				 		r:       r,
			
 
				-		decoded: make([]byte, maxUncompressedChunkLen),
			
 
				-		buf:     make([]byte, maxEncodedLenOfMaxUncompressedChunkLen+checksumSize),
			
 
				+		decoded: make([]byte, maxBlockSize),
			
 
				+		buf:     make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/encode.go
+++ b/encode.go
@@ -94,30 +94,22 @@ func Encode(dst, src []byte) []byte {
 
				 	for len(src) > 0 {
			
 
				 		p := src
			
 
				 		src = nil
			
 
				-		if len(p) > maxInternalEncodeSrcLen {
			
 
				-			p, src = p[:maxInternalEncodeSrcLen], p[maxInternalEncodeSrcLen:]
			
 
				+		if len(p) > maxBlockSize {
			
 
				+			p, src = p[:maxBlockSize], p[maxBlockSize:]
			
 
				 		}
			
 
				-		d += encode(dst[d:], p)
			
 
				+		d += encodeBlock(dst[d:], p)
			
 
				 	}
			
 
				 	return dst[:d]
			
 
				 }
			
 
				 
			
 
				-// maxInternalEncodeSrcLen must be less than math.MaxInt32, so that in the
			
 
				-// (internal) encode function, it is safe to have the s variable (which indexes
			
 
				-// the src slice), and therefore the hash table entries, to have type int32
			
 
				-// instead of int.
			
 
				-const maxInternalEncodeSrcLen = 0x40000000
			
 
				-
			
 
				-// encode encodes a non-empty src to a guaranteed-large-enough dst. It assumes
			
 
				-// that the varint-encoded length of the decompressed bytes has already been
			
 
				-// written.
			
 
				+// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
			
 
				+// assumes that the varint-encoded length of the decompressed bytes has already
			
 
				+// been written.
			
 
				 //
			
 
				 // It also assumes that:
			
 
				 //	len(dst) >= MaxEncodedLen(len(src)) &&
			
 
				-// 	0 < len(src) &&
			
 
				-//	len(src) <= maxInternalEncodeSrcLen &&
			
 
				-// 	maxInternalEncodeSrcLen < math.MaxInt32.
			
 
				-func encode(dst, src []byte) (d int) {
			
 
				+// 	0 < len(src) && len(src) <= maxBlockSize
			
 
				+func encodeBlock(dst, src []byte) (d int) {
			
 
				 	// Return early if src is short.
			
 
				 	if len(src) <= 4 {
			
 
				 		return emitLiteral(dst, src)
			
@@ -258,7 +250,7 @@ func NewWriter(w io.Writer) *Writer {
 
				 func NewBufferedWriter(w io.Writer) *Writer {
			
 
				 	return &Writer{
			
 
				 		w:    w,
			
 
				-		ibuf: make([]byte, 0, maxUncompressedChunkLen),
			
 
				+		ibuf: make([]byte, 0, maxBlockSize),
			
 
				 		obuf: make([]byte, obufLen),
			
 
				 	}
			
 
				 }
			
@@ -342,8 +334,8 @@ func (w *Writer) write(p []byte) (nRet int, errRet error) {
 
				 		}
			
 
				 
			
 
				 		var uncompressed []byte
			
 
				-		if len(p) > maxUncompressedChunkLen {
			
 
				-			uncompressed, p = p[:maxUncompressedChunkLen], p[maxUncompressedChunkLen:]
			
 
				+		if len(p) > maxBlockSize {
			
 
				+			uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
			
 
				 		} else {
			
 
				 			uncompressed, p = p, nil
			
 
				 		}
			
--- a/snappy.go
+++ b/snappy.go
@@ -46,18 +46,25 @@ const (
 
				 	chunkHeaderSize = 4
			
 
				 	magicChunk      = "\xff\x06\x00\x00" + magicBody
			
 
				 	magicBody       = "sNaPpY"
			
 
				+
			
 
				+	// maxBlockSize is the maximum size of the input to encodeBlock. It is not
			
 
				+	// part of the wire format per se, but some parts of the encoder assume
			
 
				+	// that an offset fits into a uint16.
			
 
				+	//
			
 
				+	// Also, for the framing format (Writer type instead of Encode function),
			
 
				 	// https://github.com/google/snappy/blob/master/framing_format.txt says
			
 
				-	// that "the uncompressed data in a chunk must be no longer than 65536 bytes".
			
 
				-	maxUncompressedChunkLen = 65536
			
 
				+	// that "the uncompressed data in a chunk must be no longer than 65536
			
 
				+	// bytes".
			
 
				+	maxBlockSize = 65536
			
 
				 
			
 
				-	// maxEncodedLenOfMaxUncompressedChunkLen equals
			
 
				-	// MaxEncodedLen(maxUncompressedChunkLen), but is hard coded to be a const
			
 
				-	// instead of a variable, so that obufLen can also be a const. Their
			
 
				-	// equivalence is confirmed by TestMaxEncodedLenOfMaxUncompressedChunkLen.
			
 
				-	maxEncodedLenOfMaxUncompressedChunkLen = 76490
			
 
				+	// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
			
 
				+	// hard coded to be a const instead of a variable, so that obufLen can also
			
 
				+	// be a const. Their equivalence is confirmed by
			
 
				+	// TestMaxEncodedLenOfMaxBlockSize.
			
 
				+	maxEncodedLenOfMaxBlockSize = 76490
			
 
				 
			
 
				 	obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
			
 
				-	obufLen       = obufHeaderLen + maxEncodedLenOfMaxUncompressedChunkLen
			
 
				+	obufLen       = obufHeaderLen + maxEncodedLenOfMaxBlockSize
			
 
				 )
			
 
				 
			
 
				 const (
			
--- a/snappy_test.go
+++ b/snappy_test.go
@@ -23,9 +23,9 @@ var (
 
				 	testdata = flag.String("testdata", "testdata", "Directory containing the test data")
			
 
				 )
			
 
				 
			
 
				-func TestMaxEncodedLenOfMaxUncompressedChunkLen(t *testing.T) {
			
 
				-	got := maxEncodedLenOfMaxUncompressedChunkLen
			
 
				-	want := MaxEncodedLen(maxUncompressedChunkLen)
			
 
				+func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
			
 
				+	got := maxEncodedLenOfMaxBlockSize
			
 
				+	want := MaxEncodedLen(maxBlockSize)
			
 
				 	if got != want {
			
 
				 		t.Fatalf("got %d, want %d", got, want)
			
 
				 	}
			
@@ -237,23 +237,24 @@ func TestDecode(t *testing.T) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// TestEncodeNoiseThenRepeats encodes a 32K block for which the first half is
			
 
				-// very incompressible and the second half is very compressible. The encoded
			
 
				-// form's length should be closer to 50% of the original length than 100%.
			
 
				+// TestEncodeNoiseThenRepeats encodes input for which the first half is very
			
 
				+// incompressible and the second half is very compressible. The encoded form's
			
 
				+// length should be closer to 50% of the original length than 100%.
			
 
				 func TestEncodeNoiseThenRepeats(t *testing.T) {
			
 
				-	const origLen = 32768
			
 
				-	src := make([]byte, origLen)
			
 
				-	rng := rand.New(rand.NewSource(1))
			
 
				-	firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
			
 
				-	for i := range firstHalf {
			
 
				-		firstHalf[i] = uint8(rng.Intn(256))
			
 
				-	}
			
 
				-	for i := range secondHalf {
			
 
				-		secondHalf[i] = uint8(i >> 8)
			
 
				-	}
			
 
				-	dst := Encode(nil, src)
			
 
				-	if got, want := len(dst), origLen*3/4; got >= want {
			
 
				-		t.Fatalf("got %d encoded bytes, want less than %d", got, want)
			
 
				+	for _, origLen := range []int{32 * 1024, 256 * 1024, 2048 * 1024} {
			
 
				+		src := make([]byte, origLen)
			
 
				+		rng := rand.New(rand.NewSource(1))
			
 
				+		firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
			
 
				+		for i := range firstHalf {
			
 
				+			firstHalf[i] = uint8(rng.Intn(256))
			
 
				+		}
			
 
				+		for i := range secondHalf {
			
 
				+			secondHalf[i] = uint8(i >> 8)
			
 
				+		}
			
 
				+		dst := Encode(nil, src)
			
 
				+		if got, want := len(dst), origLen*3/4; got >= want {
			
 
				+			t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -272,7 +273,7 @@ func cmp(a, b []byte) error {
 
				 func TestFramingFormat(t *testing.T) {
			
 
				 	// src is comprised of alternating 1e5-sized sequences of random
			
 
				 	// (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
			
 
				-	// because it is larger than maxUncompressedChunkLen (64k).
			
 
				+	// because it is larger than maxBlockSize (64k).
			
 
				 	src := make([]byte, 1e6)
			
 
				 	rng := rand.New(rand.NewSource(1))
			
 
				 	for i := 0; i < 10; i++ {
			
@@ -330,7 +331,7 @@ func TestNewBufferedWriter(t *testing.T) {
 
				 	// Test all 32 possible sub-sequences of these 5 input slices.
			
 
				 	//
			
 
				 	// Their lengths sum to 400,000, which is over 6 times the Writer ibuf
			
 
				-	// capacity: 6 * maxUncompressedChunkLen is 393,216.
			
 
				+	// capacity: 6 * maxBlockSize is 393,216.
			
 
				 	inputs := [][]byte{
			
 
				 		bytes.Repeat([]byte{'a'}, 40000),
			
 
				 		bytes.Repeat([]byte{'b'}, 150000),