gzip_test.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package gzip
  5. import (
  6. "bufio"
  7. "bytes"
  8. oldgz "compress/gzip"
  9. "io"
  10. "io/ioutil"
  11. "math/rand"
  12. "testing"
  13. "time"
  14. )
  15. // TestEmpty tests that an empty payload still forms a valid GZIP stream.
  16. func TestEmpty(t *testing.T) {
  17. buf := new(bytes.Buffer)
  18. if err := NewWriter(buf).Close(); err != nil {
  19. t.Fatalf("Writer.Close: %v", err)
  20. }
  21. r, err := NewReader(buf)
  22. if err != nil {
  23. t.Fatalf("NewReader: %v", err)
  24. }
  25. b, err := ioutil.ReadAll(r)
  26. if err != nil {
  27. t.Fatalf("ReadAll: %v", err)
  28. }
  29. if len(b) != 0 {
  30. t.Fatalf("got %d bytes, want 0", len(b))
  31. }
  32. if err := r.Close(); err != nil {
  33. t.Fatalf("Reader.Close: %v", err)
  34. }
  35. }
  36. // TestRoundTrip tests that gzipping and then gunzipping is the identity
  37. // function.
  38. func TestRoundTrip(t *testing.T) {
  39. buf := new(bytes.Buffer)
  40. w := NewWriter(buf)
  41. w.Comment = "comment"
  42. w.Extra = []byte("extra")
  43. w.ModTime = time.Unix(1e8, 0)
  44. w.Name = "name"
  45. if _, err := w.Write([]byte("payload")); err != nil {
  46. t.Fatalf("Write: %v", err)
  47. }
  48. if err := w.Close(); err != nil {
  49. t.Fatalf("Writer.Close: %v", err)
  50. }
  51. r, err := NewReader(buf)
  52. if err != nil {
  53. t.Fatalf("NewReader: %v", err)
  54. }
  55. b, err := ioutil.ReadAll(r)
  56. if err != nil {
  57. t.Fatalf("ReadAll: %v", err)
  58. }
  59. if string(b) != "payload" {
  60. t.Fatalf("payload is %q, want %q", string(b), "payload")
  61. }
  62. if r.Comment != "comment" {
  63. t.Fatalf("comment is %q, want %q", r.Comment, "comment")
  64. }
  65. if string(r.Extra) != "extra" {
  66. t.Fatalf("extra is %q, want %q", r.Extra, "extra")
  67. }
  68. if r.ModTime.Unix() != 1e8 {
  69. t.Fatalf("mtime is %d, want %d", r.ModTime.Unix(), uint32(1e8))
  70. }
  71. if r.Name != "name" {
  72. t.Fatalf("name is %q, want %q", r.Name, "name")
  73. }
  74. if err := r.Close(); err != nil {
  75. t.Fatalf("Reader.Close: %v", err)
  76. }
  77. }
  78. // TestLatin1 tests the internal functions for converting to and from Latin-1.
  79. func TestLatin1(t *testing.T) {
  80. latin1 := []byte{0xc4, 'u', 0xdf, 'e', 'r', 'u', 'n', 'g', 0}
  81. utf8 := "Äußerung"
  82. z := Reader{r: bufio.NewReader(bytes.NewReader(latin1))}
  83. s, err := z.readString()
  84. if err != nil {
  85. t.Fatalf("readString: %v", err)
  86. }
  87. if s != utf8 {
  88. t.Fatalf("read latin-1: got %q, want %q", s, utf8)
  89. }
  90. buf := bytes.NewBuffer(make([]byte, 0, len(latin1)))
  91. c := Writer{w: buf}
  92. if err = c.writeString(utf8); err != nil {
  93. t.Fatalf("writeString: %v", err)
  94. }
  95. s = buf.String()
  96. if s != string(latin1) {
  97. t.Fatalf("write utf-8: got %q, want %q", s, string(latin1))
  98. }
  99. }
  100. // TestLatin1RoundTrip tests that metadata that is representable in Latin-1
  101. // survives a round trip.
  102. func TestLatin1RoundTrip(t *testing.T) {
  103. testCases := []struct {
  104. name string
  105. ok bool
  106. }{
  107. {"", true},
  108. {"ASCII is OK", true},
  109. {"unless it contains a NUL\x00", false},
  110. {"no matter where \x00 occurs", false},
  111. {"\x00\x00\x00", false},
  112. {"Látin-1 also passes (U+00E1)", true},
  113. {"but LĀtin Extended-A (U+0100) does not", false},
  114. {"neither does 日本語", false},
  115. {"invalid UTF-8 also \xffails", false},
  116. {"\x00 as does Látin-1 with NUL", false},
  117. }
  118. for _, tc := range testCases {
  119. buf := new(bytes.Buffer)
  120. w := NewWriter(buf)
  121. w.Name = tc.name
  122. err := w.Close()
  123. if (err == nil) != tc.ok {
  124. t.Errorf("Writer.Close: name = %q, err = %v", tc.name, err)
  125. continue
  126. }
  127. if !tc.ok {
  128. continue
  129. }
  130. r, err := NewReader(buf)
  131. if err != nil {
  132. t.Errorf("NewReader: %v", err)
  133. continue
  134. }
  135. _, err = ioutil.ReadAll(r)
  136. if err != nil {
  137. t.Errorf("ReadAll: %v", err)
  138. continue
  139. }
  140. if r.Name != tc.name {
  141. t.Errorf("name is %q, want %q", r.Name, tc.name)
  142. continue
  143. }
  144. if err := r.Close(); err != nil {
  145. t.Errorf("Reader.Close: %v", err)
  146. continue
  147. }
  148. }
  149. }
  150. func TestWriterFlush(t *testing.T) {
  151. buf := new(bytes.Buffer)
  152. w := NewWriter(buf)
  153. w.Comment = "comment"
  154. w.Extra = []byte("extra")
  155. w.ModTime = time.Unix(1e8, 0)
  156. w.Name = "name"
  157. n0 := buf.Len()
  158. if n0 != 0 {
  159. t.Fatalf("buffer size = %d before writes; want 0", n0)
  160. }
  161. if err := w.Flush(); err != nil {
  162. t.Fatal(err)
  163. }
  164. n1 := buf.Len()
  165. if n1 == 0 {
  166. t.Fatal("no data after first flush")
  167. }
  168. w.Write([]byte("x"))
  169. n2 := buf.Len()
  170. if n1 != n2 {
  171. t.Fatalf("after writing a single byte, size changed from %d to %d; want no change", n1, n2)
  172. }
  173. if err := w.Flush(); err != nil {
  174. t.Fatal(err)
  175. }
  176. n3 := buf.Len()
  177. if n2 == n3 {
  178. t.Fatal("Flush didn't flush any data")
  179. }
  180. }
  181. // Multiple gzip files concatenated form a valid gzip file.
  182. func TestConcat(t *testing.T) {
  183. var buf bytes.Buffer
  184. w := NewWriter(&buf)
  185. w.Write([]byte("hello "))
  186. w.Close()
  187. w = NewWriter(&buf)
  188. w.Write([]byte("world\n"))
  189. w.Close()
  190. r, err := NewReader(&buf)
  191. data, err := ioutil.ReadAll(r)
  192. if string(data) != "hello world\n" || err != nil {
  193. t.Fatalf("ReadAll = %q, %v, want %q, nil", data, err, "hello world")
  194. }
  195. }
  196. func TestWriterReset(t *testing.T) {
  197. buf := new(bytes.Buffer)
  198. buf2 := new(bytes.Buffer)
  199. z := NewWriter(buf)
  200. msg := []byte("hello world")
  201. z.Write(msg)
  202. z.Close()
  203. z.Reset(buf2)
  204. z.Write(msg)
  205. z.Close()
  206. if buf.String() != buf2.String() {
  207. t.Errorf("buf2 %q != original buf of %q", buf2.String(), buf.String())
  208. }
  209. }
  210. var testbuf []byte
  211. func testFile(i, level int, t *testing.T) {
  212. dat, _ := ioutil.ReadFile("testdata/test.json")
  213. dl := len(dat)
  214. if len(testbuf) != i*dl {
  215. // Make results predictable
  216. testbuf = make([]byte, i*dl)
  217. for j := 0; j < i; j++ {
  218. copy(testbuf[j*dl:j*dl+dl], dat)
  219. }
  220. }
  221. br := bytes.NewBuffer(testbuf)
  222. var buf bytes.Buffer
  223. w, err := NewWriterLevel(&buf, DefaultCompression)
  224. if err != nil {
  225. t.Fatal(err)
  226. }
  227. n, err := io.Copy(w, br)
  228. if err != nil {
  229. t.Fatal(err)
  230. }
  231. if int(n) != len(testbuf) {
  232. t.Fatal("Short write:", n, "!=", testbuf)
  233. }
  234. err = w.Close()
  235. if err != nil {
  236. t.Fatal(err)
  237. }
  238. r, err := NewReader(&buf)
  239. if err != nil {
  240. t.Fatal(err.Error())
  241. }
  242. decoded, err := ioutil.ReadAll(r)
  243. if err != nil {
  244. t.Fatal(err.Error())
  245. }
  246. if !bytes.Equal(testbuf, decoded) {
  247. t.Errorf("decoded content does not match.")
  248. }
  249. }
  250. func TestFile1xM3(t *testing.T) { testFile(1, -3, t) }
  251. func TestFile1xM2(t *testing.T) { testFile(1, -2, t) }
  252. func TestFile1xM1(t *testing.T) { testFile(1, -1, t) }
  253. func TestFile1x0(t *testing.T) { testFile(1, 0, t) }
  254. func TestFile1x1(t *testing.T) { testFile(1, 1, t) }
  255. func TestFile1x2(t *testing.T) { testFile(1, 2, t) }
  256. func TestFile1x3(t *testing.T) { testFile(1, 3, t) }
  257. func TestFile1x4(t *testing.T) { testFile(1, 4, t) }
  258. func TestFile1x5(t *testing.T) { testFile(1, 5, t) }
  259. func TestFile1x6(t *testing.T) { testFile(1, 6, t) }
  260. func TestFile1x7(t *testing.T) { testFile(1, 7, t) }
  261. func TestFile1x8(t *testing.T) { testFile(1, 8, t) }
  262. func TestFile1x9(t *testing.T) { testFile(1, 9, t) }
  263. func TestFile10(t *testing.T) { testFile(10, DefaultCompression, t) }
  264. func TestFile50(t *testing.T) {
  265. if testing.Short() {
  266. t.Skip("skipping during short test")
  267. }
  268. testFile(50, DefaultCompression, t)
  269. }
  270. func TestFile200(t *testing.T) {
  271. if testing.Short() {
  272. t.Skip("skipping during short test")
  273. }
  274. testFile(200, BestSpeed, t)
  275. }
  276. func testBigGzip(i int, t *testing.T) {
  277. if len(testbuf) != i {
  278. // Make results predictable
  279. rand.Seed(1337)
  280. testbuf = make([]byte, i)
  281. for idx := range testbuf {
  282. testbuf[idx] = byte(65 + rand.Intn(20))
  283. }
  284. }
  285. c := BestCompression
  286. if testing.Short() {
  287. c = BestSpeed
  288. }
  289. br := bytes.NewBuffer(testbuf)
  290. var buf bytes.Buffer
  291. w, err := NewWriterLevel(&buf, c)
  292. if err != nil {
  293. t.Fatal(err)
  294. }
  295. n, err := io.Copy(w, br)
  296. if err != nil {
  297. t.Fatal(err)
  298. }
  299. if int(n) != len(testbuf) {
  300. t.Fatal("Short write:", n, "!=", len(testbuf))
  301. }
  302. err = w.Close()
  303. if err != nil {
  304. t.Fatal(err.Error())
  305. }
  306. r, err := NewReader(&buf)
  307. if err != nil {
  308. t.Fatal(err.Error())
  309. }
  310. decoded, err := ioutil.ReadAll(r)
  311. if err != nil {
  312. t.Fatal(err.Error())
  313. }
  314. if !bytes.Equal(testbuf, decoded) {
  315. t.Errorf("decoded content does not match.")
  316. }
  317. }
  318. func TestGzip1K(t *testing.T) { testBigGzip(1000, t) }
  319. func TestGzip100K(t *testing.T) { testBigGzip(100000, t) }
  320. func TestGzip1M(t *testing.T) {
  321. if testing.Short() {
  322. t.Skip("skipping during short test")
  323. }
  324. testBigGzip(1000000, t)
  325. }
  326. func TestGzip10M(t *testing.T) {
  327. if testing.Short() {
  328. t.Skip("skipping during short test")
  329. }
  330. testBigGzip(10000000, t)
  331. }
  332. // Test if two runs produce identical results.
  333. func TestDeterministicLM2(t *testing.T) { testDeterm(-2, t) }
  334. // Level 0 is not deterministic since it depends on the size of each write.
  335. // func TestDeterministicL0(t *testing.T) { testDeterm(0, t) }
  336. func TestDeterministicL1(t *testing.T) { testDeterm(1, t) }
  337. func TestDeterministicL2(t *testing.T) { testDeterm(2, t) }
  338. func TestDeterministicL3(t *testing.T) { testDeterm(3, t) }
  339. func TestDeterministicL4(t *testing.T) { testDeterm(4, t) }
  340. func TestDeterministicL5(t *testing.T) { testDeterm(5, t) }
  341. func TestDeterministicL6(t *testing.T) { testDeterm(6, t) }
  342. func TestDeterministicL7(t *testing.T) { testDeterm(7, t) }
  343. func TestDeterministicL8(t *testing.T) { testDeterm(8, t) }
  344. func TestDeterministicL9(t *testing.T) { testDeterm(9, t) }
  345. func testDeterm(i int, t *testing.T) {
  346. var length = 500000
  347. if testing.Short() {
  348. length = 100000
  349. }
  350. rand.Seed(1337)
  351. t1 := make([]byte, length)
  352. for idx := range t1 {
  353. t1[idx] = byte(65 + rand.Intn(8))
  354. }
  355. br := bytes.NewBuffer(t1)
  356. var b1 bytes.Buffer
  357. w, err := NewWriterLevel(&b1, i)
  358. if err != nil {
  359. t.Fatal(err)
  360. }
  361. _, err = io.Copy(w, br)
  362. if err != nil {
  363. t.Fatal(err)
  364. }
  365. w.Flush()
  366. w.Close()
  367. // We recreate the buffer, so we have a goos chance of getting a
  368. // different memory address.
  369. rand.Seed(1337)
  370. t2 := make([]byte, length)
  371. for idx := range t2 {
  372. t2[idx] = byte(65 + rand.Intn(8))
  373. }
  374. br2 := bytes.NewBuffer(t2)
  375. var b2 bytes.Buffer
  376. w2, err := NewWriterLevel(&b2, i)
  377. if err != nil {
  378. t.Fatal(err)
  379. }
  380. // We write the same data, but with a different size than
  381. // the default copy.
  382. for {
  383. _, err = io.CopyN(w2, br2, 1234)
  384. if err == io.EOF {
  385. err = nil
  386. break
  387. } else if err != nil {
  388. break
  389. }
  390. }
  391. if err != nil {
  392. t.Fatal(err)
  393. }
  394. w2.Flush()
  395. w2.Close()
  396. b1b := b1.Bytes()
  397. b2b := b2.Bytes()
  398. if bytes.Compare(b1b, b2b) != 0 {
  399. t.Fatalf("Level %d did not produce deterministric result, len(a) = %d, len(b) = %d", i, len(b1b), len(b2b))
  400. }
  401. }
  402. func BenchmarkGzipLM3(b *testing.B) { benchmarkGzipN(b, -3) }
  403. func BenchmarkGzipLM2(b *testing.B) { benchmarkGzipN(b, -2) }
  404. func BenchmarkGzipL1(b *testing.B) { benchmarkGzipN(b, 1) }
  405. func BenchmarkGzipL2(b *testing.B) { benchmarkGzipN(b, 2) }
  406. func BenchmarkGzipL3(b *testing.B) { benchmarkGzipN(b, 3) }
  407. func BenchmarkGzipL4(b *testing.B) { benchmarkGzipN(b, 4) }
  408. func BenchmarkGzipL5(b *testing.B) { benchmarkGzipN(b, 5) }
  409. func BenchmarkGzipL6(b *testing.B) { benchmarkGzipN(b, 6) }
  410. func BenchmarkGzipL7(b *testing.B) { benchmarkGzipN(b, 7) }
  411. func BenchmarkGzipL8(b *testing.B) { benchmarkGzipN(b, 8) }
  412. func BenchmarkGzipL9(b *testing.B) { benchmarkGzipN(b, 9) }
  413. func benchmarkGzipN(b *testing.B, level int) {
  414. dat, _ := ioutil.ReadFile("testdata/test.json")
  415. dat = append(dat, dat...)
  416. dat = append(dat, dat...)
  417. dat = append(dat, dat...)
  418. dat = append(dat, dat...)
  419. dat = append(dat, dat...)
  420. b.SetBytes(int64(len(dat)))
  421. w, _ := NewWriterLevel(ioutil.Discard, level)
  422. b.ResetTimer()
  423. for n := 0; n < b.N; n++ {
  424. w.Reset(ioutil.Discard)
  425. n, err := w.Write(dat)
  426. if n != len(dat) {
  427. panic("short write")
  428. }
  429. if err != nil {
  430. panic(err)
  431. }
  432. err = w.Close()
  433. if err != nil {
  434. panic(err)
  435. }
  436. }
  437. }
  438. func BenchmarkOldGzipL1(b *testing.B) { benchmarkOldGzipN(b, 1) }
  439. func BenchmarkOldGzipL2(b *testing.B) { benchmarkOldGzipN(b, 2) }
  440. func BenchmarkOldGzipL3(b *testing.B) { benchmarkOldGzipN(b, 3) }
  441. func BenchmarkOldGzipL4(b *testing.B) { benchmarkOldGzipN(b, 4) }
  442. func BenchmarkOldGzipL5(b *testing.B) { benchmarkOldGzipN(b, 5) }
  443. func BenchmarkOldGzipL6(b *testing.B) { benchmarkOldGzipN(b, 6) }
  444. func BenchmarkOldGzipL7(b *testing.B) { benchmarkOldGzipN(b, 7) }
  445. func BenchmarkOldGzipL8(b *testing.B) { benchmarkOldGzipN(b, 8) }
  446. func BenchmarkOldGzipL9(b *testing.B) { benchmarkOldGzipN(b, 9) }
  447. func benchmarkOldGzipN(b *testing.B, level int) {
  448. dat, _ := ioutil.ReadFile("testdata/test.json")
  449. dat = append(dat, dat...)
  450. dat = append(dat, dat...)
  451. dat = append(dat, dat...)
  452. dat = append(dat, dat...)
  453. dat = append(dat, dat...)
  454. b.SetBytes(int64(len(dat)))
  455. w, _ := oldgz.NewWriterLevel(ioutil.Discard, level)
  456. b.ResetTimer()
  457. for n := 0; n < b.N; n++ {
  458. w.Reset(ioutil.Discard)
  459. n, err := w.Write(dat)
  460. if n != len(dat) {
  461. panic("short write")
  462. }
  463. if err != nil {
  464. panic(err)
  465. }
  466. err = w.Close()
  467. if err != nil {
  468. panic(err)
  469. }
  470. }
  471. }