encoder_test.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "math/rand"
  11. "os"
  12. "runtime"
  13. "strings"
  14. "sync"
  15. "testing"
  16. "time"
  17. "github.com/klauspost/compress/zip"
  18. "github.com/klauspost/compress/zstd/internal/xxhash"
  19. )
  20. var testWindowSizes = []int{MinWindowSize, 1 << 16, 1 << 22, 1 << 24}
  21. func TestEncoder_EncodeAllSimple(t *testing.T) {
  22. in, err := ioutil.ReadFile("testdata/z000028")
  23. if err != nil {
  24. t.Fatal(err)
  25. }
  26. dec, err := NewReader(nil)
  27. if err != nil {
  28. t.Fatal(err)
  29. }
  30. defer dec.Close()
  31. in = append(in, in...)
  32. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  33. t.Run(level.String(), func(t *testing.T) {
  34. e, err := NewWriter(nil, WithEncoderLevel(level), WithEncoderConcurrency(2), WithWindowSize(128<<10), WithZeroFrames(true))
  35. if err != nil {
  36. t.Fatal(err)
  37. }
  38. defer e.Close()
  39. start := time.Now()
  40. dst := e.EncodeAll(in, nil)
  41. t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  42. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  43. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  44. decoded, err := dec.DecodeAll(dst, nil)
  45. if err != nil {
  46. t.Error(err, len(decoded))
  47. }
  48. if !bytes.Equal(decoded, in) {
  49. ioutil.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm)
  50. ioutil.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm)
  51. t.Fatal("Decoded does not match")
  52. }
  53. t.Log("Encoded content matched")
  54. })
  55. }
  56. }
  57. func TestEncoder_EncodeAllConcurrent(t *testing.T) {
  58. in, err := ioutil.ReadFile("testdata/z000028")
  59. if err != nil {
  60. t.Fatal(err)
  61. }
  62. in = append(in, in...)
  63. // When running race no more than 8k goroutines allowed.
  64. n := 4000 / runtime.GOMAXPROCS(0)
  65. if testing.Short() {
  66. n = 200 / runtime.GOMAXPROCS(0)
  67. }
  68. dec, err := NewReader(nil)
  69. if err != nil {
  70. t.Fatal(err)
  71. }
  72. defer dec.Close()
  73. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  74. t.Run(level.String(), func(t *testing.T) {
  75. rng := rand.New(rand.NewSource(0x1337))
  76. e, err := NewWriter(nil, WithEncoderLevel(level), WithZeroFrames(true))
  77. if err != nil {
  78. t.Fatal(err)
  79. }
  80. defer e.Close()
  81. var wg sync.WaitGroup
  82. wg.Add(n)
  83. for i := 0; i < n; i++ {
  84. in := in[rng.Int()&1023:]
  85. in = in[:rng.Intn(len(in))]
  86. go func() {
  87. defer wg.Done()
  88. dst := e.EncodeAll(in, nil)
  89. //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  90. decoded, err := dec.DecodeAll(dst, nil)
  91. if err != nil {
  92. t.Error(err, len(decoded))
  93. }
  94. if !bytes.Equal(decoded, in) {
  95. //ioutil.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm)
  96. //ioutil.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm)
  97. t.Fatal("Decoded does not match")
  98. }
  99. }()
  100. }
  101. wg.Wait()
  102. t.Log("Encoded content matched.", n, "goroutines")
  103. })
  104. }
  105. }
  106. func TestEncoder_EncodeAllEncodeXML(t *testing.T) {
  107. f, err := os.Open("testdata/xml.zst")
  108. if err != nil {
  109. t.Fatal(err)
  110. }
  111. dec, err := NewReader(f)
  112. if err != nil {
  113. t.Fatal(err)
  114. }
  115. defer dec.Close()
  116. in, err := ioutil.ReadAll(dec)
  117. if err != nil {
  118. t.Fatal(err)
  119. }
  120. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  121. t.Run(level.String(), func(t *testing.T) {
  122. e, err := NewWriter(nil, WithEncoderLevel(level))
  123. if err != nil {
  124. t.Fatal(err)
  125. }
  126. defer e.Close()
  127. start := time.Now()
  128. dst := e.EncodeAll(in, nil)
  129. t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  130. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  131. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  132. decoded, err := dec.DecodeAll(dst, nil)
  133. if err != nil {
  134. t.Error(err, len(decoded))
  135. }
  136. if !bytes.Equal(decoded, in) {
  137. ioutil.WriteFile("testdata/"+t.Name()+"-xml.got", decoded, os.ModePerm)
  138. t.Fatal("Decoded does not match")
  139. }
  140. t.Log("Encoded content matched")
  141. })
  142. }
  143. }
  144. func TestEncoderRegression(t *testing.T) {
  145. defer timeout(2 * time.Minute)()
  146. data, err := ioutil.ReadFile("testdata/comp-crashers.zip")
  147. if err != nil {
  148. t.Fatal(err)
  149. }
  150. // We can't close the decoder.
  151. dec, err := NewReader(nil)
  152. if err != nil {
  153. t.Error(err)
  154. return
  155. }
  156. defer dec.Close()
  157. testWindowSizes := testWindowSizes
  158. if testing.Short() {
  159. testWindowSizes = []int{1 << 20}
  160. }
  161. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  162. t.Run(level.String(), func(t *testing.T) {
  163. for _, windowSize := range testWindowSizes {
  164. t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
  165. zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
  166. if err != nil {
  167. t.Fatal(err)
  168. }
  169. enc, err := NewWriter(
  170. nil,
  171. WithEncoderCRC(true),
  172. WithEncoderLevel(level),
  173. WithWindowSize(windowSize),
  174. )
  175. if err != nil {
  176. t.Fatal(err)
  177. }
  178. defer enc.Close()
  179. for i, tt := range zr.File {
  180. if !strings.HasSuffix(t.Name(), "") {
  181. continue
  182. }
  183. if testing.Short() && i > 100 {
  184. break
  185. }
  186. t.Run(tt.Name, func(t *testing.T) {
  187. r, err := tt.Open()
  188. if err != nil {
  189. t.Error(err)
  190. return
  191. }
  192. in, err := ioutil.ReadAll(r)
  193. if err != nil {
  194. t.Error(err)
  195. }
  196. encoded := enc.EncodeAll(in, nil)
  197. got, err := dec.DecodeAll(encoded, nil)
  198. if err != nil {
  199. t.Logf("error: %v\nwant: %v\ngot: %v", err, len(in), len(got))
  200. t.Fatal(err)
  201. }
  202. // Use the Writer
  203. var dst bytes.Buffer
  204. enc.Reset(&dst)
  205. _, err = enc.Write(in)
  206. if err != nil {
  207. t.Error(err)
  208. }
  209. err = enc.Close()
  210. if err != nil {
  211. t.Error(err)
  212. }
  213. encoded = dst.Bytes()
  214. got, err = dec.DecodeAll(encoded, nil)
  215. if err != nil {
  216. t.Logf("error: %v\nwant: %v\ngot: %v", err, in, got)
  217. t.Fatal(err)
  218. }
  219. })
  220. }
  221. })
  222. }
  223. })
  224. }
  225. }
  226. func TestEncoder_EncodeAllTwain(t *testing.T) {
  227. in, err := ioutil.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt")
  228. if err != nil {
  229. t.Fatal(err)
  230. }
  231. testWindowSizes := testWindowSizes
  232. if testing.Short() {
  233. testWindowSizes = []int{1 << 20}
  234. }
  235. dec, err := NewReader(nil)
  236. if err != nil {
  237. t.Fatal(err)
  238. }
  239. defer dec.Close()
  240. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  241. t.Run(level.String(), func(t *testing.T) {
  242. for _, windowSize := range testWindowSizes {
  243. t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
  244. e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
  245. if err != nil {
  246. t.Fatal(err)
  247. }
  248. defer e.Close()
  249. start := time.Now()
  250. dst := e.EncodeAll(in, nil)
  251. t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  252. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  253. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  254. decoded, err := dec.DecodeAll(dst, nil)
  255. if err != nil {
  256. t.Error(err, len(decoded))
  257. }
  258. if !bytes.Equal(decoded, in) {
  259. ioutil.WriteFile("testdata/"+t.Name()+"-Mark.Twain-Tom.Sawyer.txt.got", decoded, os.ModePerm)
  260. t.Fatal("Decoded does not match")
  261. }
  262. t.Log("Encoded content matched")
  263. })
  264. }
  265. })
  266. }
  267. }
  268. func TestEncoder_EncodeAllPi(t *testing.T) {
  269. in, err := ioutil.ReadFile("../testdata/pi.txt")
  270. if err != nil {
  271. t.Fatal(err)
  272. }
  273. testWindowSizes := testWindowSizes
  274. if testing.Short() {
  275. testWindowSizes = []int{1 << 20}
  276. }
  277. dec, err := NewReader(nil)
  278. if err != nil {
  279. t.Fatal(err)
  280. }
  281. defer dec.Close()
  282. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  283. t.Run(level.String(), func(t *testing.T) {
  284. for _, windowSize := range testWindowSizes {
  285. t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
  286. e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
  287. if err != nil {
  288. t.Fatal(err)
  289. }
  290. defer e.Close()
  291. start := time.Now()
  292. dst := e.EncodeAll(in, nil)
  293. t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  294. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  295. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  296. decoded, err := dec.DecodeAll(dst, nil)
  297. if err != nil {
  298. t.Error(err, len(decoded))
  299. }
  300. if !bytes.Equal(decoded, in) {
  301. ioutil.WriteFile("testdata/"+t.Name()+"-pi.txt.got", decoded, os.ModePerm)
  302. t.Fatal("Decoded does not match")
  303. }
  304. t.Log("Encoded content matched")
  305. })
  306. }
  307. })
  308. }
  309. }
  310. func TestWithEncoderPadding(t *testing.T) {
  311. n := 100
  312. if testing.Short() {
  313. n = 5
  314. }
  315. rng := rand.New(rand.NewSource(0x1337))
  316. d, err := NewReader(nil)
  317. if err != nil {
  318. t.Fatal(err)
  319. }
  320. defer d.Close()
  321. for i := 0; i < n; i++ {
  322. padding := (rng.Int() & 0xfff) + 1
  323. src := make([]byte, (rng.Int()&0xfffff)+1)
  324. for i := range src {
  325. src[i] = uint8(rng.Uint32()) & 7
  326. }
  327. e, err := NewWriter(nil, WithEncoderPadding(padding), WithEncoderCRC(rng.Uint32()&1 == 0))
  328. if err != nil {
  329. t.Fatal(err)
  330. }
  331. // Test the added padding is invisible.
  332. dst := e.EncodeAll(src, nil)
  333. if len(dst)%padding != 0 {
  334. t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
  335. }
  336. got, err := d.DecodeAll(dst, nil)
  337. if err != nil {
  338. t.Fatal(err)
  339. }
  340. if !bytes.Equal(src, got) {
  341. t.Fatal("output mismatch")
  342. }
  343. // Test when we supply data as well.
  344. dst = e.EncodeAll(src, make([]byte, rng.Int()&255))
  345. if len(dst)%padding != 0 {
  346. t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
  347. }
  348. // Test using the writer.
  349. var buf bytes.Buffer
  350. e.Reset(&buf)
  351. _, err = io.Copy(e, bytes.NewBuffer(src))
  352. if err != nil {
  353. t.Fatal(err)
  354. }
  355. err = e.Close()
  356. if err != nil {
  357. t.Fatal(err)
  358. }
  359. dst = buf.Bytes()
  360. if len(dst)%padding != 0 {
  361. t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
  362. }
  363. // Test the added padding is invisible.
  364. got, err = d.DecodeAll(dst, nil)
  365. if err != nil {
  366. t.Fatal(err)
  367. }
  368. if !bytes.Equal(src, got) {
  369. t.Fatal("output mismatch")
  370. }
  371. // Try after reset
  372. buf.Reset()
  373. e.Reset(&buf)
  374. _, err = io.Copy(e, bytes.NewBuffer(src))
  375. if err != nil {
  376. t.Fatal(err)
  377. }
  378. err = e.Close()
  379. if err != nil {
  380. t.Fatal(err)
  381. }
  382. dst = buf.Bytes()
  383. if len(dst)%padding != 0 {
  384. t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
  385. }
  386. // Test the added padding is invisible.
  387. got, err = d.DecodeAll(dst, nil)
  388. if err != nil {
  389. t.Fatal(err)
  390. }
  391. if !bytes.Equal(src, got) {
  392. t.Fatal("output mismatch")
  393. }
  394. }
  395. }
  396. func TestEncoder_EncoderXML(t *testing.T) {
  397. testEncoderRoundtrip(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe})
  398. testEncoderRoundtripWriter(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe})
  399. }
  400. func TestEncoder_EncoderTwain(t *testing.T) {
  401. testEncoderRoundtrip(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6})
  402. testEncoderRoundtripWriter(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6})
  403. }
  404. func TestEncoder_EncoderPi(t *testing.T) {
  405. testEncoderRoundtrip(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb})
  406. testEncoderRoundtripWriter(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb})
  407. }
  408. func TestEncoder_EncoderSilesia(t *testing.T) {
  409. testEncoderRoundtrip(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b})
  410. testEncoderRoundtripWriter(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b})
  411. }
  412. func TestEncoder_EncoderSimple(t *testing.T) {
  413. testEncoderRoundtrip(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95})
  414. testEncoderRoundtripWriter(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95})
  415. }
  416. func TestEncoder_EncoderHTML(t *testing.T) {
  417. testEncoderRoundtrip(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37})
  418. testEncoderRoundtripWriter(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37})
  419. }
  420. func TestEncoder_EncoderEnwik9(t *testing.T) {
  421. testEncoderRoundtrip(t, "./testdata/enwik9.zst", []byte{0x28, 0xfa, 0xf4, 0x30, 0xca, 0x4b, 0x64, 0x12})
  422. testEncoderRoundtripWriter(t, "./testdata/enwik9.zst", []byte{0x28, 0xfa, 0xf4, 0x30, 0xca, 0x4b, 0x64, 0x12})
  423. }
  424. // test roundtrip using io.ReaderFrom interface.
  425. func testEncoderRoundtrip(t *testing.T, file string, wantCRC []byte) {
  426. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  427. t.Run(level.String(), func(t *testing.T) {
  428. f, err := os.Open(file)
  429. if err != nil {
  430. if os.IsNotExist(err) {
  431. t.Skip("No input file:", file)
  432. return
  433. }
  434. t.Fatal(err)
  435. }
  436. defer f.Close()
  437. input := io.Reader(f)
  438. if strings.HasSuffix(file, ".zst") {
  439. dec, err := NewReader(f)
  440. if err != nil {
  441. t.Fatal(err)
  442. }
  443. input = dec
  444. defer dec.Close()
  445. }
  446. pr, pw := io.Pipe()
  447. dec2, err := NewReader(pr)
  448. if err != nil {
  449. t.Fatal(err)
  450. }
  451. defer dec2.Close()
  452. enc, err := NewWriter(pw, WithEncoderCRC(true), WithEncoderLevel(level))
  453. if err != nil {
  454. t.Fatal(err)
  455. }
  456. defer enc.Close()
  457. var wantSize int64
  458. start := time.Now()
  459. go func() {
  460. n, err := enc.ReadFrom(input)
  461. if err != nil {
  462. t.Fatal(err)
  463. }
  464. wantSize = n
  465. err = enc.Close()
  466. if err != nil {
  467. t.Fatal(err)
  468. }
  469. pw.Close()
  470. }()
  471. var gotSize int64
  472. // Check CRC
  473. d := xxhash.New()
  474. if true {
  475. gotSize, err = io.Copy(d, dec2)
  476. } else {
  477. fout, err := os.Create(file + ".got")
  478. if err != nil {
  479. t.Fatal(err)
  480. }
  481. gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2)
  482. if err != nil {
  483. t.Fatal(err)
  484. }
  485. }
  486. if wantSize != gotSize {
  487. t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize)
  488. }
  489. if err != nil {
  490. t.Fatal(err)
  491. }
  492. if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) {
  493. t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC)
  494. } else if len(wantCRC) != 8 {
  495. t.Logf("Unable to verify CRC: %#v", gotCRC)
  496. } else {
  497. t.Logf("CRC Verified: %#v", gotCRC)
  498. }
  499. t.Log("Encoder len", wantSize)
  500. mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  501. t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec)
  502. })
  503. }
  504. }
  505. type writerWrapper struct {
  506. w io.Writer
  507. }
  508. func (w writerWrapper) Write(p []byte) (n int, err error) {
  509. return w.w.Write(p)
  510. }
  511. // test roundtrip using plain io.Writer interface.
  512. func testEncoderRoundtripWriter(t *testing.T, file string, wantCRC []byte) {
  513. f, err := os.Open(file)
  514. if err != nil {
  515. if os.IsNotExist(err) {
  516. t.Skip("No input file:", file)
  517. return
  518. }
  519. t.Fatal(err)
  520. }
  521. defer f.Close()
  522. input := io.Reader(f)
  523. if strings.HasSuffix(file, ".zst") {
  524. dec, err := NewReader(f)
  525. if err != nil {
  526. t.Fatal(err)
  527. }
  528. input = dec
  529. defer dec.Close()
  530. }
  531. pr, pw := io.Pipe()
  532. dec2, err := NewReader(pr)
  533. if err != nil {
  534. t.Fatal(err)
  535. }
  536. defer dec2.Close()
  537. enc, err := NewWriter(pw, WithEncoderCRC(true))
  538. if err != nil {
  539. t.Fatal(err)
  540. }
  541. defer enc.Close()
  542. encW := writerWrapper{w: enc}
  543. var wantSize int64
  544. start := time.Now()
  545. go func() {
  546. n, err := io.CopyBuffer(encW, input, make([]byte, 1337))
  547. if err != nil {
  548. t.Fatal(err)
  549. }
  550. wantSize = n
  551. err = enc.Close()
  552. if err != nil {
  553. t.Fatal(err)
  554. }
  555. pw.Close()
  556. }()
  557. var gotSize int64
  558. // Check CRC
  559. d := xxhash.New()
  560. if true {
  561. gotSize, err = io.Copy(d, dec2)
  562. } else {
  563. fout, err := os.Create(file + ".got")
  564. if err != nil {
  565. t.Fatal(err)
  566. }
  567. gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2)
  568. if err != nil {
  569. t.Fatal(err)
  570. }
  571. }
  572. if wantSize != gotSize {
  573. t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize)
  574. }
  575. if err != nil {
  576. t.Fatal(err)
  577. }
  578. if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) {
  579. t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC)
  580. } else if len(wantCRC) != 8 {
  581. t.Logf("Unable to verify CRC: %#v", gotCRC)
  582. } else {
  583. t.Logf("CRC Verified: %#v", gotCRC)
  584. }
  585. t.Log("Fast Encoder len", wantSize)
  586. mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  587. t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec)
  588. }
  589. func TestEncoder_EncodeAllSilesia(t *testing.T) {
  590. if testing.Short() {
  591. t.SkipNow()
  592. }
  593. in, err := ioutil.ReadFile("testdata/silesia.tar")
  594. if err != nil {
  595. if os.IsNotExist(err) {
  596. t.Skip("Missing testdata/silesia.tar")
  597. return
  598. }
  599. t.Fatal(err)
  600. }
  601. var e Encoder
  602. start := time.Now()
  603. dst := e.EncodeAll(in, nil)
  604. t.Log("Fast Encoder len", len(in), "-> zstd len", len(dst))
  605. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  606. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  607. dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20))
  608. if err != nil {
  609. t.Fatal(err)
  610. }
  611. defer dec.Close()
  612. decoded, err := dec.DecodeAll(dst, nil)
  613. if err != nil {
  614. t.Error(err, len(decoded))
  615. }
  616. if !bytes.Equal(decoded, in) {
  617. ioutil.WriteFile("testdata/"+t.Name()+"-silesia.tar.got", decoded, os.ModePerm)
  618. t.Fatal("Decoded does not match")
  619. }
  620. t.Log("Encoded content matched")
  621. }
  622. func TestEncoder_EncodeAllEmpty(t *testing.T) {
  623. if testing.Short() {
  624. t.SkipNow()
  625. }
  626. var in []byte
  627. e, err := NewWriter(nil, WithZeroFrames(true))
  628. if err != nil {
  629. t.Fatal(err)
  630. }
  631. defer e.Close()
  632. dst := e.EncodeAll(in, nil)
  633. if len(dst) == 0 {
  634. t.Fatal("Requested zero frame, but got nothing.")
  635. }
  636. t.Log("Block Encoder len", len(in), "-> zstd len", len(dst), dst)
  637. dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20))
  638. if err != nil {
  639. t.Fatal(err)
  640. }
  641. defer dec.Close()
  642. decoded, err := dec.DecodeAll(dst, nil)
  643. if err != nil {
  644. t.Error(err, len(decoded))
  645. }
  646. if !bytes.Equal(decoded, in) {
  647. t.Fatal("Decoded does not match")
  648. }
  649. // Test buffer writer.
  650. var buf bytes.Buffer
  651. e.Reset(&buf)
  652. err = e.Close()
  653. if err != nil {
  654. t.Fatal(err)
  655. }
  656. dst = buf.Bytes()
  657. if len(dst) == 0 {
  658. t.Fatal("Requested zero frame, but got nothing.")
  659. }
  660. t.Log("Buffer Encoder len", len(in), "-> zstd len", len(dst))
  661. decoded, err = dec.DecodeAll(dst, nil)
  662. if err != nil {
  663. t.Error(err, len(decoded))
  664. }
  665. if !bytes.Equal(decoded, in) {
  666. t.Fatal("Decoded does not match")
  667. }
  668. t.Log("Encoded content matched")
  669. }
  670. func TestEncoder_EncodeAllEnwik9(t *testing.T) {
  671. if false || testing.Short() {
  672. t.SkipNow()
  673. }
  674. file := "testdata/enwik9.zst"
  675. f, err := os.Open(file)
  676. if err != nil {
  677. if os.IsNotExist(err) {
  678. t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" +
  679. "compress it with 'zstd -15 -T0 enwik9' and place it in " + file)
  680. }
  681. }
  682. dec, err := NewReader(f)
  683. if err != nil {
  684. t.Fatal(err)
  685. }
  686. defer dec.Close()
  687. in, err := ioutil.ReadAll(dec)
  688. if err != nil {
  689. t.Fatal(err)
  690. }
  691. start := time.Now()
  692. var e Encoder
  693. dst := e.EncodeAll(in, nil)
  694. t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  695. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  696. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  697. decoded, err := dec.DecodeAll(dst, nil)
  698. if err != nil {
  699. t.Error(err, len(decoded))
  700. }
  701. if !bytes.Equal(decoded, in) {
  702. ioutil.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm)
  703. t.Fatal("Decoded does not match")
  704. }
  705. t.Log("Encoded content matched")
  706. }
  707. func BenchmarkEncoder_EncodeAllXML(b *testing.B) {
  708. f, err := os.Open("testdata/xml.zst")
  709. if err != nil {
  710. b.Fatal(err)
  711. }
  712. dec, err := NewReader(f)
  713. if err != nil {
  714. b.Fatal(err)
  715. }
  716. in, err := ioutil.ReadAll(dec)
  717. if err != nil {
  718. b.Fatal(err)
  719. }
  720. dec.Close()
  721. enc := Encoder{}
  722. dst := enc.EncodeAll(in, nil)
  723. wantSize := len(dst)
  724. b.Log("Output size:", len(dst))
  725. b.ResetTimer()
  726. b.ReportAllocs()
  727. b.SetBytes(int64(len(in)))
  728. for i := 0; i < b.N; i++ {
  729. dst := enc.EncodeAll(in, dst[:0])
  730. if len(dst) != wantSize {
  731. b.Fatal(len(dst), "!=", wantSize)
  732. }
  733. }
  734. }
  735. func BenchmarkEncoder_EncodeAllSimple(b *testing.B) {
  736. f, err := os.Open("testdata/z000028")
  737. if err != nil {
  738. b.Fatal(err)
  739. }
  740. in, err := ioutil.ReadAll(f)
  741. if err != nil {
  742. b.Fatal(err)
  743. }
  744. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  745. b.Run(level.String(), func(b *testing.B) {
  746. enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level))
  747. if err != nil {
  748. b.Fatal(err)
  749. }
  750. defer enc.Close()
  751. dst := enc.EncodeAll(in, nil)
  752. wantSize := len(dst)
  753. b.ResetTimer()
  754. b.ReportAllocs()
  755. b.SetBytes(int64(len(in)))
  756. for i := 0; i < b.N; i++ {
  757. dst := enc.EncodeAll(in, dst[:0])
  758. if len(dst) != wantSize {
  759. b.Fatal(len(dst), "!=", wantSize)
  760. }
  761. }
  762. })
  763. }
  764. }
  765. func BenchmarkEncoder_EncodeAllSimple4K(b *testing.B) {
  766. f, err := os.Open("testdata/z000028")
  767. if err != nil {
  768. b.Fatal(err)
  769. }
  770. in, err := ioutil.ReadAll(f)
  771. if err != nil {
  772. b.Fatal(err)
  773. }
  774. in = in[:4096]
  775. for level := EncoderLevel(speedNotSet + 1); level < speedLast; level++ {
  776. b.Run(level.String(), func(b *testing.B) {
  777. enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level))
  778. if err != nil {
  779. b.Fatal(err)
  780. }
  781. defer enc.Close()
  782. dst := enc.EncodeAll(in, nil)
  783. wantSize := len(dst)
  784. b.ResetTimer()
  785. b.ReportAllocs()
  786. b.SetBytes(int64(len(in)))
  787. for i := 0; i < b.N; i++ {
  788. dst := enc.EncodeAll(in, dst[:0])
  789. if len(dst) != wantSize {
  790. b.Fatal(len(dst), "!=", wantSize)
  791. }
  792. }
  793. })
  794. }
  795. }
  796. func BenchmarkEncoder_EncodeAllHTML(b *testing.B) {
  797. f, err := os.Open("../testdata/html.txt")
  798. if err != nil {
  799. b.Fatal(err)
  800. }
  801. in, err := ioutil.ReadAll(f)
  802. if err != nil {
  803. b.Fatal(err)
  804. }
  805. enc := Encoder{}
  806. dst := enc.EncodeAll(in, nil)
  807. wantSize := len(dst)
  808. b.ResetTimer()
  809. b.ReportAllocs()
  810. b.SetBytes(int64(len(in)))
  811. for i := 0; i < b.N; i++ {
  812. dst := enc.EncodeAll(in, dst[:0])
  813. if len(dst) != wantSize {
  814. b.Fatal(len(dst), "!=", wantSize)
  815. }
  816. }
  817. }
  818. func BenchmarkEncoder_EncodeAllTwain(b *testing.B) {
  819. f, err := os.Open("../testdata/Mark.Twain-Tom.Sawyer.txt")
  820. if err != nil {
  821. b.Fatal(err)
  822. }
  823. in, err := ioutil.ReadAll(f)
  824. if err != nil {
  825. b.Fatal(err)
  826. }
  827. enc := Encoder{}
  828. dst := enc.EncodeAll(in, nil)
  829. wantSize := len(dst)
  830. b.ResetTimer()
  831. b.ReportAllocs()
  832. b.SetBytes(int64(len(in)))
  833. for i := 0; i < b.N; i++ {
  834. dst := enc.EncodeAll(in, dst[:0])
  835. if len(dst) != wantSize {
  836. b.Fatal(len(dst), "!=", wantSize)
  837. }
  838. }
  839. }
  840. func BenchmarkEncoder_EncodeAllPi(b *testing.B) {
  841. f, err := os.Open("../testdata/pi.txt")
  842. if err != nil {
  843. b.Fatal(err)
  844. }
  845. in, err := ioutil.ReadAll(f)
  846. if err != nil {
  847. b.Fatal(err)
  848. }
  849. enc := Encoder{}
  850. dst := enc.EncodeAll(in, nil)
  851. wantSize := len(dst)
  852. b.ResetTimer()
  853. b.ReportAllocs()
  854. b.SetBytes(int64(len(in)))
  855. for i := 0; i < b.N; i++ {
  856. dst := enc.EncodeAll(in, dst[:0])
  857. if len(dst) != wantSize {
  858. b.Fatal(len(dst), "!=", wantSize)
  859. }
  860. }
  861. }
  862. func BenchmarkRandomEncodeAllFastest(b *testing.B) {
  863. rng := rand.New(rand.NewSource(1))
  864. data := make([]byte, 10<<20)
  865. for i := range data {
  866. data[i] = uint8(rng.Intn(256))
  867. }
  868. enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(1))
  869. defer enc.Close()
  870. dst := enc.EncodeAll(data, nil)
  871. wantSize := len(dst)
  872. b.ResetTimer()
  873. b.ReportAllocs()
  874. b.SetBytes(int64(len(data)))
  875. for i := 0; i < b.N; i++ {
  876. dst := enc.EncodeAll(data, dst[:0])
  877. if len(dst) != wantSize {
  878. b.Fatal(len(dst), "!=", wantSize)
  879. }
  880. }
  881. }
  882. func BenchmarkRandomEncodeAllDefault(b *testing.B) {
  883. rng := rand.New(rand.NewSource(1))
  884. data := make([]byte, 10<<20)
  885. for i := range data {
  886. data[i] = uint8(rng.Intn(256))
  887. }
  888. enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1))
  889. defer enc.Close()
  890. dst := enc.EncodeAll(data, nil)
  891. wantSize := len(dst)
  892. b.ResetTimer()
  893. b.ReportAllocs()
  894. b.SetBytes(int64(len(data)))
  895. for i := 0; i < b.N; i++ {
  896. dst := enc.EncodeAll(data, dst[:0])
  897. if len(dst) != wantSize {
  898. b.Fatal(len(dst), "!=", wantSize)
  899. }
  900. }
  901. }
  902. func BenchmarkRandomEncoderFastest(b *testing.B) {
  903. rng := rand.New(rand.NewSource(1))
  904. data := make([]byte, 10<<20)
  905. for i := range data {
  906. data[i] = uint8(rng.Intn(256))
  907. }
  908. wantSize := int64(len(data))
  909. enc, _ := NewWriter(ioutil.Discard, WithEncoderLevel(SpeedFastest))
  910. defer enc.Close()
  911. n, err := io.Copy(enc, bytes.NewBuffer(data))
  912. if err != nil {
  913. b.Fatal(err)
  914. }
  915. if n != wantSize {
  916. b.Fatal(n, "!=", wantSize)
  917. }
  918. b.ResetTimer()
  919. b.ReportAllocs()
  920. b.SetBytes(wantSize)
  921. for i := 0; i < b.N; i++ {
  922. enc.Reset(ioutil.Discard)
  923. n, err := io.Copy(enc, bytes.NewBuffer(data))
  924. if err != nil {
  925. b.Fatal(err)
  926. }
  927. if n != wantSize {
  928. b.Fatal(n, "!=", wantSize)
  929. }
  930. }
  931. }
  932. func BenchmarkRandomEncoderDefault(b *testing.B) {
  933. rng := rand.New(rand.NewSource(1))
  934. data := make([]byte, 10<<20)
  935. for i := range data {
  936. data[i] = uint8(rng.Intn(256))
  937. }
  938. wantSize := int64(len(data))
  939. enc, _ := NewWriter(ioutil.Discard, WithEncoderLevel(SpeedDefault))
  940. defer enc.Close()
  941. n, err := io.Copy(enc, bytes.NewBuffer(data))
  942. if err != nil {
  943. b.Fatal(err)
  944. }
  945. if n != wantSize {
  946. b.Fatal(n, "!=", wantSize)
  947. }
  948. b.ResetTimer()
  949. b.ReportAllocs()
  950. b.SetBytes(wantSize)
  951. for i := 0; i < b.N; i++ {
  952. enc.Reset(ioutil.Discard)
  953. n, err := io.Copy(enc, bytes.NewBuffer(data))
  954. if err != nil {
  955. b.Fatal(err)
  956. }
  957. if n != wantSize {
  958. b.Fatal(n, "!=", wantSize)
  959. }
  960. }
  961. }