snappy_test.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818
  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package snappy
  5. import (
  6. "bytes"
  7. "encoding/binary"
  8. "flag"
  9. "fmt"
  10. "io"
  11. "io/ioutil"
  12. "math/rand"
  13. "net/http"
  14. "os"
  15. "path/filepath"
  16. "strings"
  17. "testing"
  18. )
  19. var (
  20. download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
  21. testdata = flag.String("testdata", "testdata", "Directory containing the test data")
  22. )
  23. func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
  24. got := maxEncodedLenOfMaxBlockSize
  25. want := MaxEncodedLen(maxBlockSize)
  26. if got != want {
  27. t.Fatalf("got %d, want %d", got, want)
  28. }
  29. }
  30. func roundtrip(b, ebuf, dbuf []byte) error {
  31. d, err := Decode(dbuf, Encode(ebuf, b))
  32. if err != nil {
  33. return fmt.Errorf("decoding error: %v", err)
  34. }
  35. if !bytes.Equal(b, d) {
  36. return fmt.Errorf("roundtrip mismatch:\n\twant %v\n\tgot %v", b, d)
  37. }
  38. return nil
  39. }
  40. func TestEmpty(t *testing.T) {
  41. if err := roundtrip(nil, nil, nil); err != nil {
  42. t.Fatal(err)
  43. }
  44. }
  45. func TestSmallCopy(t *testing.T) {
  46. for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
  47. for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
  48. for i := 0; i < 32; i++ {
  49. s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
  50. if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
  51. t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
  52. }
  53. }
  54. }
  55. }
  56. }
  57. func TestSmallRand(t *testing.T) {
  58. rng := rand.New(rand.NewSource(1))
  59. for n := 1; n < 20000; n += 23 {
  60. b := make([]byte, n)
  61. for i := range b {
  62. b[i] = uint8(rng.Intn(256))
  63. }
  64. if err := roundtrip(b, nil, nil); err != nil {
  65. t.Fatal(err)
  66. }
  67. }
  68. }
  69. func TestSmallRegular(t *testing.T) {
  70. for n := 1; n < 20000; n += 23 {
  71. b := make([]byte, n)
  72. for i := range b {
  73. b[i] = uint8(i%10 + 'a')
  74. }
  75. if err := roundtrip(b, nil, nil); err != nil {
  76. t.Fatal(err)
  77. }
  78. }
  79. }
  80. func TestInvalidVarint(t *testing.T) {
  81. testCases := []struct {
  82. desc string
  83. input string
  84. }{{
  85. "invalid varint, final byte has continuation bit set",
  86. "\xff",
  87. }, {
  88. "invalid varint, value overflows uint64",
  89. "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
  90. }, {
  91. // https://github.com/google/snappy/blob/master/format_description.txt
  92. // says that "the stream starts with the uncompressed length [as a
  93. // varint] (up to a maximum of 2^32 - 1)".
  94. "valid varint (as uint64), but value overflows uint32",
  95. "\x80\x80\x80\x80\x10",
  96. }}
  97. for _, tc := range testCases {
  98. input := []byte(tc.input)
  99. if _, err := DecodedLen(input); err != ErrCorrupt {
  100. t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
  101. }
  102. if _, err := Decode(nil, input); err != ErrCorrupt {
  103. t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
  104. }
  105. }
  106. }
  107. func TestDecode(t *testing.T) {
  108. lit40Bytes := make([]byte, 40)
  109. for i := range lit40Bytes {
  110. lit40Bytes[i] = byte(i)
  111. }
  112. lit40 := string(lit40Bytes)
  113. testCases := []struct {
  114. desc string
  115. input string
  116. want string
  117. wantErr error
  118. }{{
  119. `decodedLen=0; valid input`,
  120. "\x00",
  121. "",
  122. nil,
  123. }, {
  124. `decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
  125. "\x03" + "\x08\xff\xff\xff",
  126. "\xff\xff\xff",
  127. nil,
  128. }, {
  129. `decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
  130. "\x02" + "\x08\xff\xff\xff",
  131. "",
  132. ErrCorrupt,
  133. }, {
  134. `decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
  135. "\x03" + "\x08\xff\xff",
  136. "",
  137. ErrCorrupt,
  138. }, {
  139. `decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
  140. "\x28" + "\x9c" + lit40,
  141. lit40,
  142. nil,
  143. }, {
  144. `decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
  145. "\x01" + "\xf0",
  146. "",
  147. ErrCorrupt,
  148. }, {
  149. `decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
  150. "\x03" + "\xf0\x02\xff\xff\xff",
  151. "\xff\xff\xff",
  152. nil,
  153. }, {
  154. `decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
  155. "\x01" + "\xf4\x00",
  156. "",
  157. ErrCorrupt,
  158. }, {
  159. `decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
  160. "\x03" + "\xf4\x02\x00\xff\xff\xff",
  161. "\xff\xff\xff",
  162. nil,
  163. }, {
  164. `decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
  165. "\x01" + "\xf8\x00\x00",
  166. "",
  167. ErrCorrupt,
  168. }, {
  169. `decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
  170. "\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
  171. "\xff\xff\xff",
  172. nil,
  173. }, {
  174. `decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
  175. "\x01" + "\xfc\x00\x00\x00",
  176. "",
  177. ErrCorrupt,
  178. }, {
  179. `decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
  180. "\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
  181. "",
  182. ErrCorrupt,
  183. }, {
  184. `decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
  185. "\x04" + "\xfc\x02\x00\x00\x00\xff",
  186. "",
  187. ErrCorrupt,
  188. }, {
  189. `decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
  190. "\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
  191. "\xff\xff\xff",
  192. nil,
  193. }, {
  194. `decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
  195. "\x04" + "\x01",
  196. "",
  197. ErrCorrupt,
  198. }, {
  199. `decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
  200. "\x04" + "\x02\x00",
  201. "",
  202. ErrCorrupt,
  203. }, {
  204. `decodedLen=4; tagCopy4; unsupported COPY_4 tag`,
  205. "\x04" + "\x03\x00\x00\x00\x00",
  206. "",
  207. errUnsupportedCopy4Tag,
  208. }, {
  209. `decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
  210. "\x04" + "\x0cabcd",
  211. "abcd",
  212. nil,
  213. }, {
  214. `decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
  215. "\x0d" + "\x0cabcd" + "\x15\x04",
  216. "abcdabcdabcda",
  217. nil,
  218. }, {
  219. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
  220. "\x08" + "\x0cabcd" + "\x01\x04",
  221. "abcdabcd",
  222. nil,
  223. }, {
  224. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
  225. "\x08" + "\x0cabcd" + "\x01\x02",
  226. "abcdcdcd",
  227. nil,
  228. }, {
  229. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
  230. "\x08" + "\x0cabcd" + "\x01\x01",
  231. "abcddddd",
  232. nil,
  233. }, {
  234. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`,
  235. "\x08" + "\x0cabcd" + "\x01\x00",
  236. "",
  237. ErrCorrupt,
  238. }, {
  239. `decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
  240. "\x09" + "\x0cabcd" + "\x01\x04",
  241. "",
  242. ErrCorrupt,
  243. }, {
  244. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
  245. "\x08" + "\x0cabcd" + "\x01\x05",
  246. "",
  247. ErrCorrupt,
  248. }, {
  249. `decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
  250. "\x07" + "\x0cabcd" + "\x01\x04",
  251. "",
  252. ErrCorrupt,
  253. }, {
  254. `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
  255. "\x06" + "\x0cabcd" + "\x06\x03\x00",
  256. "abcdbc",
  257. nil,
  258. }}
  259. for i, tc := range testCases {
  260. g, gotErr := Decode(nil, []byte(tc.input))
  261. if got := string(g); got != tc.want || gotErr != tc.wantErr {
  262. t.Errorf("#%d (%s):\ngot %q, %v\nwant %q, %v",
  263. i, tc.desc, got, gotErr, tc.want, tc.wantErr)
  264. }
  265. }
  266. }
  267. // TestDecodeLengthOffset tests decoding an encoding of the form literal +
  268. // copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
  269. func TestDecodeLengthOffset(t *testing.T) {
  270. const (
  271. prefix = "abcdefghijkl"
  272. suffix = "ABCDEFGHIJKL"
  273. )
  274. var gotBuf, wantBuf, inputBuf [256]byte
  275. for length := 1; length < 12; length++ {
  276. for offset := 1; offset < 12; offset++ {
  277. for suffixLen := 0; suffixLen < 12; suffixLen++ {
  278. inputLen := binary.PutUvarint(inputBuf[:], uint64(len(prefix)+length+suffixLen))
  279. inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
  280. inputLen++
  281. inputLen += copy(inputBuf[inputLen:], prefix)
  282. inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
  283. inputBuf[inputLen+1] = byte(offset)
  284. inputBuf[inputLen+2] = 0x00
  285. inputLen += 3
  286. if suffixLen > 0 {
  287. inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
  288. inputLen++
  289. inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
  290. }
  291. input := inputBuf[:inputLen]
  292. got, err := Decode(gotBuf[:], input)
  293. if err != nil {
  294. t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen)
  295. continue
  296. }
  297. wantLen := 0
  298. wantLen += copy(wantBuf[wantLen:], prefix)
  299. for i := 0; i < length; i++ {
  300. wantBuf[wantLen] = wantBuf[wantLen-offset]
  301. wantLen++
  302. }
  303. wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
  304. want := wantBuf[:wantLen]
  305. if !bytes.Equal(got, want) {
  306. t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot % x\nwant % x",
  307. length, offset, suffixLen, input, got, want)
  308. continue
  309. }
  310. }
  311. }
  312. }
  313. }
  314. // TestEncodeNoiseThenRepeats encodes input for which the first half is very
  315. // incompressible and the second half is very compressible. The encoded form's
  316. // length should be closer to 50% of the original length than 100%.
  317. func TestEncodeNoiseThenRepeats(t *testing.T) {
  318. for _, origLen := range []int{32 * 1024, 256 * 1024, 2048 * 1024} {
  319. src := make([]byte, origLen)
  320. rng := rand.New(rand.NewSource(1))
  321. firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
  322. for i := range firstHalf {
  323. firstHalf[i] = uint8(rng.Intn(256))
  324. }
  325. for i := range secondHalf {
  326. secondHalf[i] = uint8(i >> 8)
  327. }
  328. dst := Encode(nil, src)
  329. if got, want := len(dst), origLen*3/4; got >= want {
  330. t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
  331. }
  332. }
  333. }
  334. func cmp(a, b []byte) error {
  335. if len(a) != len(b) {
  336. return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
  337. }
  338. for i := range a {
  339. if a[i] != b[i] {
  340. return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
  341. }
  342. }
  343. return nil
  344. }
  345. func TestFramingFormat(t *testing.T) {
  346. // src is comprised of alternating 1e5-sized sequences of random
  347. // (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
  348. // because it is larger than maxBlockSize (64k).
  349. src := make([]byte, 1e6)
  350. rng := rand.New(rand.NewSource(1))
  351. for i := 0; i < 10; i++ {
  352. if i%2 == 0 {
  353. for j := 0; j < 1e5; j++ {
  354. src[1e5*i+j] = uint8(rng.Intn(256))
  355. }
  356. } else {
  357. for j := 0; j < 1e5; j++ {
  358. src[1e5*i+j] = uint8(i)
  359. }
  360. }
  361. }
  362. buf := new(bytes.Buffer)
  363. if _, err := NewWriter(buf).Write(src); err != nil {
  364. t.Fatalf("Write: encoding: %v", err)
  365. }
  366. dst, err := ioutil.ReadAll(NewReader(buf))
  367. if err != nil {
  368. t.Fatalf("ReadAll: decoding: %v", err)
  369. }
  370. if err := cmp(dst, src); err != nil {
  371. t.Fatal(err)
  372. }
  373. }
  374. func TestWriterGoldenOutput(t *testing.T) {
  375. buf := new(bytes.Buffer)
  376. w := NewBufferedWriter(buf)
  377. defer w.Close()
  378. w.Write([]byte("abcd")) // Not compressible.
  379. w.Flush()
  380. w.Write(bytes.Repeat([]byte{'A'}, 100)) // Compressible.
  381. w.Flush()
  382. got := buf.String()
  383. want := strings.Join([]string{
  384. magicChunk,
  385. "\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
  386. "\x68\x10\xe6\xb6", // Checksum.
  387. "\x61\x62\x63\x64", // Uncompressed payload: "abcd".
  388. "\x00\x0d\x00\x00", // Compressed chunk, 13 bytes long (including 4 byte checksum).
  389. "\x37\xcb\xbc\x9d", // Checksum.
  390. "\x64", // Compressed payload: Uncompressed length (varint encoded): 100.
  391. "\x00\x41", // Compressed payload: tagLiteral, length=1, "A".
  392. "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
  393. "\x8a\x01\x00", // Compressed payload: tagCopy2, length=35, offset=1.
  394. }, "")
  395. if got != want {
  396. t.Fatalf("\ngot: % x\nwant: % x", got, want)
  397. }
  398. }
  399. func TestNewBufferedWriter(t *testing.T) {
  400. // Test all 32 possible sub-sequences of these 5 input slices.
  401. //
  402. // Their lengths sum to 400,000, which is over 6 times the Writer ibuf
  403. // capacity: 6 * maxBlockSize is 393,216.
  404. inputs := [][]byte{
  405. bytes.Repeat([]byte{'a'}, 40000),
  406. bytes.Repeat([]byte{'b'}, 150000),
  407. bytes.Repeat([]byte{'c'}, 60000),
  408. bytes.Repeat([]byte{'d'}, 120000),
  409. bytes.Repeat([]byte{'e'}, 30000),
  410. }
  411. loop:
  412. for i := 0; i < 1<<uint(len(inputs)); i++ {
  413. var want []byte
  414. buf := new(bytes.Buffer)
  415. w := NewBufferedWriter(buf)
  416. for j, input := range inputs {
  417. if i&(1<<uint(j)) == 0 {
  418. continue
  419. }
  420. if _, err := w.Write(input); err != nil {
  421. t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
  422. continue loop
  423. }
  424. want = append(want, input...)
  425. }
  426. if err := w.Close(); err != nil {
  427. t.Errorf("i=%#02x: Close: %v", i, err)
  428. continue
  429. }
  430. got, err := ioutil.ReadAll(NewReader(buf))
  431. if err != nil {
  432. t.Errorf("i=%#02x: ReadAll: %v", i, err)
  433. continue
  434. }
  435. if err := cmp(got, want); err != nil {
  436. t.Errorf("i=%#02x: %v", i, err)
  437. continue
  438. }
  439. }
  440. }
  441. func TestFlush(t *testing.T) {
  442. buf := new(bytes.Buffer)
  443. w := NewBufferedWriter(buf)
  444. defer w.Close()
  445. if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
  446. t.Fatalf("Write: %v", err)
  447. }
  448. if n := buf.Len(); n != 0 {
  449. t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
  450. }
  451. if err := w.Flush(); err != nil {
  452. t.Fatalf("Flush: %v", err)
  453. }
  454. if n := buf.Len(); n == 0 {
  455. t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
  456. }
  457. }
  458. func TestReaderReset(t *testing.T) {
  459. gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
  460. buf := new(bytes.Buffer)
  461. if _, err := NewWriter(buf).Write(gold); err != nil {
  462. t.Fatalf("Write: %v", err)
  463. }
  464. encoded, invalid, partial := buf.String(), "invalid", "partial"
  465. r := NewReader(nil)
  466. for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
  467. if s == partial {
  468. r.Reset(strings.NewReader(encoded))
  469. if _, err := r.Read(make([]byte, 101)); err != nil {
  470. t.Errorf("#%d: %v", i, err)
  471. continue
  472. }
  473. continue
  474. }
  475. r.Reset(strings.NewReader(s))
  476. got, err := ioutil.ReadAll(r)
  477. switch s {
  478. case encoded:
  479. if err != nil {
  480. t.Errorf("#%d: %v", i, err)
  481. continue
  482. }
  483. if err := cmp(got, gold); err != nil {
  484. t.Errorf("#%d: %v", i, err)
  485. continue
  486. }
  487. case invalid:
  488. if err == nil {
  489. t.Errorf("#%d: got nil error, want non-nil", i)
  490. continue
  491. }
  492. }
  493. }
  494. }
  495. func TestWriterReset(t *testing.T) {
  496. gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
  497. const n = 20
  498. for _, buffered := range []bool{false, true} {
  499. var w *Writer
  500. if buffered {
  501. w = NewBufferedWriter(nil)
  502. defer w.Close()
  503. } else {
  504. w = NewWriter(nil)
  505. }
  506. var gots, wants [][]byte
  507. failed := false
  508. for i := 0; i <= n; i++ {
  509. buf := new(bytes.Buffer)
  510. w.Reset(buf)
  511. want := gold[:len(gold)*i/n]
  512. if _, err := w.Write(want); err != nil {
  513. t.Errorf("#%d: Write: %v", i, err)
  514. failed = true
  515. continue
  516. }
  517. if buffered {
  518. if err := w.Flush(); err != nil {
  519. t.Errorf("#%d: Flush: %v", i, err)
  520. failed = true
  521. continue
  522. }
  523. }
  524. got, err := ioutil.ReadAll(NewReader(buf))
  525. if err != nil {
  526. t.Errorf("#%d: ReadAll: %v", i, err)
  527. failed = true
  528. continue
  529. }
  530. gots = append(gots, got)
  531. wants = append(wants, want)
  532. }
  533. if failed {
  534. continue
  535. }
  536. for i := range gots {
  537. if err := cmp(gots[i], wants[i]); err != nil {
  538. t.Errorf("#%d: %v", i, err)
  539. }
  540. }
  541. }
  542. }
  543. func TestWriterResetWithoutFlush(t *testing.T) {
  544. buf0 := new(bytes.Buffer)
  545. buf1 := new(bytes.Buffer)
  546. w := NewBufferedWriter(buf0)
  547. if _, err := w.Write([]byte("xxx")); err != nil {
  548. t.Fatalf("Write #0: %v", err)
  549. }
  550. // Note that we don't Flush the Writer before calling Reset.
  551. w.Reset(buf1)
  552. if _, err := w.Write([]byte("yyy")); err != nil {
  553. t.Fatalf("Write #1: %v", err)
  554. }
  555. if err := w.Flush(); err != nil {
  556. t.Fatalf("Flush: %v", err)
  557. }
  558. got, err := ioutil.ReadAll(NewReader(buf1))
  559. if err != nil {
  560. t.Fatalf("ReadAll: %v", err)
  561. }
  562. if err := cmp(got, []byte("yyy")); err != nil {
  563. t.Fatal(err)
  564. }
  565. }
  566. type writeCounter int
  567. func (c *writeCounter) Write(p []byte) (int, error) {
  568. *c++
  569. return len(p), nil
  570. }
  571. // TestNumUnderlyingWrites tests that each Writer flush only makes one or two
  572. // Write calls on its underlying io.Writer, depending on whether or not the
  573. // flushed buffer was compressible.
  574. func TestNumUnderlyingWrites(t *testing.T) {
  575. testCases := []struct {
  576. input []byte
  577. want int
  578. }{
  579. {bytes.Repeat([]byte{'x'}, 100), 1},
  580. {bytes.Repeat([]byte{'y'}, 100), 1},
  581. {[]byte("ABCDEFGHIJKLMNOPQRST"), 2},
  582. }
  583. var c writeCounter
  584. w := NewBufferedWriter(&c)
  585. defer w.Close()
  586. for i, tc := range testCases {
  587. c = 0
  588. if _, err := w.Write(tc.input); err != nil {
  589. t.Errorf("#%d: Write: %v", i, err)
  590. continue
  591. }
  592. if err := w.Flush(); err != nil {
  593. t.Errorf("#%d: Flush: %v", i, err)
  594. continue
  595. }
  596. if int(c) != tc.want {
  597. t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
  598. continue
  599. }
  600. }
  601. }
  602. func benchDecode(b *testing.B, src []byte) {
  603. encoded := Encode(nil, src)
  604. // Bandwidth is in amount of uncompressed data.
  605. b.SetBytes(int64(len(src)))
  606. b.ResetTimer()
  607. for i := 0; i < b.N; i++ {
  608. Decode(src, encoded)
  609. }
  610. }
  611. func benchEncode(b *testing.B, src []byte) {
  612. // Bandwidth is in amount of uncompressed data.
  613. b.SetBytes(int64(len(src)))
  614. dst := make([]byte, MaxEncodedLen(len(src)))
  615. b.ResetTimer()
  616. for i := 0; i < b.N; i++ {
  617. Encode(dst, src)
  618. }
  619. }
  620. func readFile(b testing.TB, filename string) []byte {
  621. src, err := ioutil.ReadFile(filename)
  622. if err != nil {
  623. b.Skipf("skipping benchmark: %v", err)
  624. }
  625. if len(src) == 0 {
  626. b.Fatalf("%s has zero length", filename)
  627. }
  628. return src
  629. }
  630. // expand returns a slice of length n containing repeated copies of src.
  631. func expand(src []byte, n int) []byte {
  632. dst := make([]byte, n)
  633. for x := dst; len(x) > 0; {
  634. i := copy(x, src)
  635. x = x[i:]
  636. }
  637. return dst
  638. }
  639. func benchWords(b *testing.B, n int, decode bool) {
  640. // Note: the file is OS-language dependent so the resulting values are not
  641. // directly comparable for non-US-English OS installations.
  642. data := expand(readFile(b, "/usr/share/dict/words"), n)
  643. if decode {
  644. benchDecode(b, data)
  645. } else {
  646. benchEncode(b, data)
  647. }
  648. }
  649. func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
  650. func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
  651. func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
  652. func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
  653. func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
  654. func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
  655. func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
  656. func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
  657. func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
  658. func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
  659. func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
  660. func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
  661. func BenchmarkRandomEncode(b *testing.B) {
  662. rng := rand.New(rand.NewSource(1))
  663. data := make([]byte, 1<<20)
  664. for i := range data {
  665. data[i] = uint8(rng.Intn(256))
  666. }
  667. benchEncode(b, data)
  668. }
  669. // testFiles' values are copied directly from
  670. // https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
  671. // The label field is unused in snappy-go.
  672. //
  673. // If this list changes (due to the upstream C++ list changing), remember to
  674. // update the .gitignore file in this repository.
  675. var testFiles = []struct {
  676. label string
  677. filename string
  678. sizeLimit int
  679. }{
  680. {"html", "html", 0},
  681. {"urls", "urls.10K", 0},
  682. {"jpg", "fireworks.jpeg", 0},
  683. {"jpg_200", "fireworks.jpeg", 200},
  684. {"pdf", "paper-100k.pdf", 0},
  685. {"html4", "html_x_4", 0},
  686. {"txt1", "alice29.txt", 0},
  687. {"txt2", "asyoulik.txt", 0},
  688. {"txt3", "lcet10.txt", 0},
  689. {"txt4", "plrabn12.txt", 0},
  690. {"pb", "geo.protodata", 0},
  691. {"gaviota", "kppkn.gtb", 0},
  692. }
  693. // The test data files are present at this canonical URL.
  694. const baseURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
  695. func downloadTestdata(b *testing.B, basename string) (errRet error) {
  696. filename := filepath.Join(*testdata, basename)
  697. if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
  698. return nil
  699. }
  700. if !*download {
  701. b.Skipf("test data not found; skipping benchmark without the -download flag")
  702. }
  703. // Download the official snappy C++ implementation reference test data
  704. // files for benchmarking.
  705. if err := os.Mkdir(*testdata, 0777); err != nil && !os.IsExist(err) {
  706. return fmt.Errorf("failed to create testdata: %s", err)
  707. }
  708. f, err := os.Create(filename)
  709. if err != nil {
  710. return fmt.Errorf("failed to create %s: %s", filename, err)
  711. }
  712. defer f.Close()
  713. defer func() {
  714. if errRet != nil {
  715. os.Remove(filename)
  716. }
  717. }()
  718. url := baseURL + basename
  719. resp, err := http.Get(url)
  720. if err != nil {
  721. return fmt.Errorf("failed to download %s: %s", url, err)
  722. }
  723. defer resp.Body.Close()
  724. if s := resp.StatusCode; s != http.StatusOK {
  725. return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
  726. }
  727. _, err = io.Copy(f, resp.Body)
  728. if err != nil {
  729. return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
  730. }
  731. return nil
  732. }
  733. func benchFile(b *testing.B, n int, decode bool) {
  734. if err := downloadTestdata(b, testFiles[n].filename); err != nil {
  735. b.Fatalf("failed to download testdata: %s", err)
  736. }
  737. data := readFile(b, filepath.Join(*testdata, testFiles[n].filename))
  738. if n := testFiles[n].sizeLimit; 0 < n && n < len(data) {
  739. data = data[:n]
  740. }
  741. if decode {
  742. benchDecode(b, data)
  743. } else {
  744. benchEncode(b, data)
  745. }
  746. }
  747. // Naming convention is kept similar to what snappy's C++ implementation uses.
  748. func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) }
  749. func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) }
  750. func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) }
  751. func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) }
  752. func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) }
  753. func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) }
  754. func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) }
  755. func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) }
  756. func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) }
  757. func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) }
  758. func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
  759. func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
  760. func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) }
  761. func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) }
  762. func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) }
  763. func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) }
  764. func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) }
  765. func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) }
  766. func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) }
  767. func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) }
  768. func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) }
  769. func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) }
  770. func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
  771. func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }