snappy_test.go 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319
  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package snappy
  5. import (
  6. "bytes"
  7. "encoding/binary"
  8. "flag"
  9. "fmt"
  10. "io"
  11. "io/ioutil"
  12. "math/rand"
  13. "net/http"
  14. "os"
  15. "os/exec"
  16. "path/filepath"
  17. "runtime"
  18. "strings"
  19. "testing"
  20. )
  21. var (
  22. download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
  23. testdataDir = flag.String("testdataDir", "testdata", "Directory containing the test data")
  24. benchdataDir = flag.String("benchdataDir", "testdata/bench", "Directory containing the benchmark data")
  25. )
  26. // goEncoderShouldMatchCppEncoder is whether to test that the algorithm used by
  27. // Go's encoder matches byte-for-byte what the C++ snappy encoder produces, on
  28. // this GOARCH. There is more than one valid encoding of any given input, and
  29. // there is more than one good algorithm along the frontier of trading off
  30. // throughput for output size. Nonetheless, we presume that the C++ encoder's
  31. // algorithm is a good one and has been tested on a wide range of inputs, so
  32. // matching that exactly should mean that the Go encoder's algorithm is also
  33. // good, without needing to gather our own corpus of test data.
  34. //
  35. // The exact algorithm used by the C++ code is potentially endian dependent, as
  36. // it puns a byte pointer to a uint32 pointer to load, hash and compare 4 bytes
  37. // at a time. The Go implementation is endian agnostic, in that its output is
  38. // the same (as little-endian C++ code), regardless of the CPU's endianness.
  39. //
  40. // Thus, when comparing Go's output to C++ output generated beforehand, such as
  41. // the "testdata/pi.txt.rawsnappy" file generated by C++ code on a little-
  42. // endian system, we can run that test regardless of the runtime.GOARCH value.
  43. //
  44. // When comparing Go's output to dynamically generated C++ output, i.e. the
  45. // result of fork/exec'ing a C++ program, we can run that test only on
  46. // little-endian systems, because the C++ output might be different on
  47. // big-endian systems. The runtime package doesn't export endianness per se,
  48. // but we can restrict this match-C++ test to common little-endian systems.
  49. const goEncoderShouldMatchCppEncoder = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "arm"
  50. func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
  51. got := maxEncodedLenOfMaxBlockSize
  52. want := MaxEncodedLen(maxBlockSize)
  53. if got != want {
  54. t.Fatalf("got %d, want %d", got, want)
  55. }
  56. }
  57. func cmp(a, b []byte) error {
  58. if bytes.Equal(a, b) {
  59. return nil
  60. }
  61. if len(a) != len(b) {
  62. return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
  63. }
  64. for i := range a {
  65. if a[i] != b[i] {
  66. return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
  67. }
  68. }
  69. return nil
  70. }
  71. func roundtrip(b, ebuf, dbuf []byte) error {
  72. d, err := Decode(dbuf, Encode(ebuf, b))
  73. if err != nil {
  74. return fmt.Errorf("decoding error: %v", err)
  75. }
  76. if err := cmp(d, b); err != nil {
  77. return fmt.Errorf("roundtrip mismatch: %v", err)
  78. }
  79. return nil
  80. }
  81. func TestEmpty(t *testing.T) {
  82. if err := roundtrip(nil, nil, nil); err != nil {
  83. t.Fatal(err)
  84. }
  85. }
  86. func TestSmallCopy(t *testing.T) {
  87. for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
  88. for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
  89. for i := 0; i < 32; i++ {
  90. s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
  91. if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
  92. t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
  93. }
  94. }
  95. }
  96. }
  97. }
  98. func TestSmallRand(t *testing.T) {
  99. rng := rand.New(rand.NewSource(1))
  100. for n := 1; n < 20000; n += 23 {
  101. b := make([]byte, n)
  102. for i := range b {
  103. b[i] = uint8(rng.Intn(256))
  104. }
  105. if err := roundtrip(b, nil, nil); err != nil {
  106. t.Fatal(err)
  107. }
  108. }
  109. }
  110. func TestSmallRegular(t *testing.T) {
  111. for n := 1; n < 20000; n += 23 {
  112. b := make([]byte, n)
  113. for i := range b {
  114. b[i] = uint8(i%10 + 'a')
  115. }
  116. if err := roundtrip(b, nil, nil); err != nil {
  117. t.Fatal(err)
  118. }
  119. }
  120. }
  121. func TestInvalidVarint(t *testing.T) {
  122. testCases := []struct {
  123. desc string
  124. input string
  125. }{{
  126. "invalid varint, final byte has continuation bit set",
  127. "\xff",
  128. }, {
  129. "invalid varint, value overflows uint64",
  130. "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
  131. }, {
  132. // https://github.com/google/snappy/blob/master/format_description.txt
  133. // says that "the stream starts with the uncompressed length [as a
  134. // varint] (up to a maximum of 2^32 - 1)".
  135. "valid varint (as uint64), but value overflows uint32",
  136. "\x80\x80\x80\x80\x10",
  137. }}
  138. for _, tc := range testCases {
  139. input := []byte(tc.input)
  140. if _, err := DecodedLen(input); err != ErrCorrupt {
  141. t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
  142. }
  143. if _, err := Decode(nil, input); err != ErrCorrupt {
  144. t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
  145. }
  146. }
  147. }
  148. func TestDecode(t *testing.T) {
  149. lit40Bytes := make([]byte, 40)
  150. for i := range lit40Bytes {
  151. lit40Bytes[i] = byte(i)
  152. }
  153. lit40 := string(lit40Bytes)
  154. testCases := []struct {
  155. desc string
  156. input string
  157. want string
  158. wantErr error
  159. }{{
  160. `decodedLen=0; valid input`,
  161. "\x00",
  162. "",
  163. nil,
  164. }, {
  165. `decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
  166. "\x03" + "\x08\xff\xff\xff",
  167. "\xff\xff\xff",
  168. nil,
  169. }, {
  170. `decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
  171. "\x02" + "\x08\xff\xff\xff",
  172. "",
  173. ErrCorrupt,
  174. }, {
  175. `decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
  176. "\x03" + "\x08\xff\xff",
  177. "",
  178. ErrCorrupt,
  179. }, {
  180. `decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
  181. "\x28" + "\x9c" + lit40,
  182. lit40,
  183. nil,
  184. }, {
  185. `decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
  186. "\x01" + "\xf0",
  187. "",
  188. ErrCorrupt,
  189. }, {
  190. `decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
  191. "\x03" + "\xf0\x02\xff\xff\xff",
  192. "\xff\xff\xff",
  193. nil,
  194. }, {
  195. `decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
  196. "\x01" + "\xf4\x00",
  197. "",
  198. ErrCorrupt,
  199. }, {
  200. `decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
  201. "\x03" + "\xf4\x02\x00\xff\xff\xff",
  202. "\xff\xff\xff",
  203. nil,
  204. }, {
  205. `decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
  206. "\x01" + "\xf8\x00\x00",
  207. "",
  208. ErrCorrupt,
  209. }, {
  210. `decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
  211. "\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
  212. "\xff\xff\xff",
  213. nil,
  214. }, {
  215. `decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
  216. "\x01" + "\xfc\x00\x00\x00",
  217. "",
  218. ErrCorrupt,
  219. }, {
  220. `decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
  221. "\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
  222. "",
  223. ErrCorrupt,
  224. }, {
  225. `decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
  226. "\x04" + "\xfc\x02\x00\x00\x00\xff",
  227. "",
  228. ErrCorrupt,
  229. }, {
  230. `decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
  231. "\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
  232. "\xff\xff\xff",
  233. nil,
  234. }, {
  235. `decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
  236. "\x04" + "\x01",
  237. "",
  238. ErrCorrupt,
  239. }, {
  240. `decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
  241. "\x04" + "\x02\x00",
  242. "",
  243. ErrCorrupt,
  244. }, {
  245. `decodedLen=4; tagCopy4; unsupported COPY_4 tag`,
  246. "\x04" + "\x03\x00\x00\x00\x00",
  247. "",
  248. errUnsupportedCopy4Tag,
  249. }, {
  250. `decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
  251. "\x04" + "\x0cabcd",
  252. "abcd",
  253. nil,
  254. }, {
  255. `decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
  256. "\x0d" + "\x0cabcd" + "\x15\x04",
  257. "abcdabcdabcda",
  258. nil,
  259. }, {
  260. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
  261. "\x08" + "\x0cabcd" + "\x01\x04",
  262. "abcdabcd",
  263. nil,
  264. }, {
  265. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
  266. "\x08" + "\x0cabcd" + "\x01\x02",
  267. "abcdcdcd",
  268. nil,
  269. }, {
  270. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
  271. "\x08" + "\x0cabcd" + "\x01\x01",
  272. "abcddddd",
  273. nil,
  274. }, {
  275. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`,
  276. "\x08" + "\x0cabcd" + "\x01\x00",
  277. "",
  278. ErrCorrupt,
  279. }, {
  280. `decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
  281. "\x09" + "\x0cabcd" + "\x01\x04",
  282. "",
  283. ErrCorrupt,
  284. }, {
  285. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
  286. "\x08" + "\x0cabcd" + "\x01\x05",
  287. "",
  288. ErrCorrupt,
  289. }, {
  290. `decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
  291. "\x07" + "\x0cabcd" + "\x01\x04",
  292. "",
  293. ErrCorrupt,
  294. }, {
  295. `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
  296. "\x06" + "\x0cabcd" + "\x06\x03\x00",
  297. "abcdbc",
  298. nil,
  299. }}
  300. const (
  301. // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
  302. // not present in either the input or the output. It is written to dBuf
  303. // to check that Decode does not write bytes past the end of
  304. // dBuf[:dLen].
  305. //
  306. // The magic number 37 was chosen because it is prime. A more 'natural'
  307. // number like 32 might lead to a false negative if, for example, a
  308. // byte was incorrectly copied 4*8 bytes later.
  309. notPresentBase = 0xa0
  310. notPresentLen = 37
  311. )
  312. var dBuf [100]byte
  313. loop:
  314. for i, tc := range testCases {
  315. input := []byte(tc.input)
  316. for _, x := range input {
  317. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  318. t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
  319. continue loop
  320. }
  321. }
  322. dLen, n := binary.Uvarint(input)
  323. if n <= 0 {
  324. t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
  325. continue
  326. }
  327. if dLen > uint64(len(dBuf)) {
  328. t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
  329. continue
  330. }
  331. for j := range dBuf {
  332. dBuf[j] = byte(notPresentBase + j%notPresentLen)
  333. }
  334. g, gotErr := Decode(dBuf[:], input)
  335. if got := string(g); got != tc.want || gotErr != tc.wantErr {
  336. t.Errorf("#%d (%s):\ngot %q, %v\nwant %q, %v",
  337. i, tc.desc, got, gotErr, tc.want, tc.wantErr)
  338. continue
  339. }
  340. for j, x := range dBuf {
  341. if uint64(j) < dLen {
  342. continue
  343. }
  344. if w := byte(notPresentBase + j%notPresentLen); x != w {
  345. t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
  346. i, tc.desc, j, x, w, dBuf)
  347. continue loop
  348. }
  349. }
  350. }
  351. }
  352. // TestDecodeLengthOffset tests decoding an encoding of the form literal +
  353. // copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
  354. func TestDecodeLengthOffset(t *testing.T) {
  355. const (
  356. prefix = "abcdefghijklmnopqr"
  357. suffix = "ABCDEFGHIJKLMNOPQR"
  358. // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
  359. // not present in either the input or the output. It is written to
  360. // gotBuf to check that Decode does not write bytes past the end of
  361. // gotBuf[:totalLen].
  362. //
  363. // The magic number 37 was chosen because it is prime. A more 'natural'
  364. // number like 32 might lead to a false negative if, for example, a
  365. // byte was incorrectly copied 4*8 bytes later.
  366. notPresentBase = 0xa0
  367. notPresentLen = 37
  368. )
  369. var gotBuf, wantBuf, inputBuf [128]byte
  370. for length := 1; length <= 18; length++ {
  371. for offset := 1; offset <= 18; offset++ {
  372. loop:
  373. for suffixLen := 0; suffixLen <= 18; suffixLen++ {
  374. totalLen := len(prefix) + length + suffixLen
  375. inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
  376. inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
  377. inputLen++
  378. inputLen += copy(inputBuf[inputLen:], prefix)
  379. inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
  380. inputBuf[inputLen+1] = byte(offset)
  381. inputBuf[inputLen+2] = 0x00
  382. inputLen += 3
  383. if suffixLen > 0 {
  384. inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
  385. inputLen++
  386. inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
  387. }
  388. input := inputBuf[:inputLen]
  389. for i := range gotBuf {
  390. gotBuf[i] = byte(notPresentBase + i%notPresentLen)
  391. }
  392. got, err := Decode(gotBuf[:], input)
  393. if err != nil {
  394. t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
  395. continue
  396. }
  397. wantLen := 0
  398. wantLen += copy(wantBuf[wantLen:], prefix)
  399. for i := 0; i < length; i++ {
  400. wantBuf[wantLen] = wantBuf[wantLen-offset]
  401. wantLen++
  402. }
  403. wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
  404. want := wantBuf[:wantLen]
  405. for _, x := range input {
  406. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  407. t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
  408. length, offset, suffixLen, x, input)
  409. continue loop
  410. }
  411. }
  412. for i, x := range gotBuf {
  413. if i < totalLen {
  414. continue
  415. }
  416. if w := byte(notPresentBase + i%notPresentLen); x != w {
  417. t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
  418. "Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
  419. length, offset, suffixLen, totalLen, i, x, w, gotBuf)
  420. continue loop
  421. }
  422. }
  423. for _, x := range want {
  424. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  425. t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
  426. length, offset, suffixLen, x, want)
  427. continue loop
  428. }
  429. }
  430. if !bytes.Equal(got, want) {
  431. t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot % x\nwant % x",
  432. length, offset, suffixLen, input, got, want)
  433. continue
  434. }
  435. }
  436. }
  437. }
  438. }
  439. const (
  440. goldenText = "Mark.Twain-Tom.Sawyer.txt"
  441. goldenCompressed = goldenText + ".rawsnappy"
  442. )
  443. func TestDecodeGoldenInput(t *testing.T) {
  444. tDir := filepath.FromSlash(*testdataDir)
  445. src, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed))
  446. if err != nil {
  447. t.Fatalf("ReadFile: %v", err)
  448. }
  449. got, err := Decode(nil, src)
  450. if err != nil {
  451. t.Fatalf("Decode: %v", err)
  452. }
  453. want, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
  454. if err != nil {
  455. t.Fatalf("ReadFile: %v", err)
  456. }
  457. if err := cmp(got, want); err != nil {
  458. t.Fatal(err)
  459. }
  460. }
  461. func TestEncodeGoldenInput(t *testing.T) {
  462. tDir := filepath.FromSlash(*testdataDir)
  463. src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
  464. if err != nil {
  465. t.Fatalf("ReadFile: %v", err)
  466. }
  467. got := Encode(nil, src)
  468. want, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed))
  469. if err != nil {
  470. t.Fatalf("ReadFile: %v", err)
  471. }
  472. if err := cmp(got, want); err != nil {
  473. t.Fatal(err)
  474. }
  475. }
  476. func TestExtendMatchGoldenInput(t *testing.T) {
  477. tDir := filepath.FromSlash(*testdataDir)
  478. src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
  479. if err != nil {
  480. t.Fatalf("ReadFile: %v", err)
  481. }
  482. for i, tc := range extendMatchGoldenTestCases {
  483. got := extendMatch(src, tc.i, tc.j)
  484. if got != tc.want {
  485. t.Errorf("test #%d: i, j = %5d, %5d: got %5d (= j + %6d), want %5d (= j + %6d)",
  486. i, tc.i, tc.j, got, got-tc.j, tc.want, tc.want-tc.j)
  487. }
  488. }
  489. }
  490. func TestExtendMatch(t *testing.T) {
  491. // ref is a simple, reference implementation of extendMatch.
  492. ref := func(src []byte, i, j int) int {
  493. for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
  494. }
  495. return j
  496. }
  497. nums := []int{0, 1, 2, 7, 8, 9, 29, 30, 31, 32, 33, 34, 38, 39, 40}
  498. for yIndex := 40; yIndex > 30; yIndex-- {
  499. xxx := bytes.Repeat([]byte("x"), 40)
  500. if yIndex < len(xxx) {
  501. xxx[yIndex] = 'y'
  502. }
  503. for _, i := range nums {
  504. for _, j := range nums {
  505. if i >= j {
  506. continue
  507. }
  508. got := extendMatch(xxx, i, j)
  509. want := ref(xxx, i, j)
  510. if got != want {
  511. t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
  512. }
  513. }
  514. }
  515. }
  516. }
  517. const snappytoolCmdName = "cmd/snappytool/snappytool"
  518. func skipTestSameEncodingAsCpp() (msg string) {
  519. if !goEncoderShouldMatchCppEncoder {
  520. return fmt.Sprintf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH)
  521. }
  522. if _, err := os.Stat(snappytoolCmdName); err != nil {
  523. return fmt.Sprintf("could not find snappytool: %v", err)
  524. }
  525. return ""
  526. }
  527. func runTestSameEncodingAsCpp(src []byte) error {
  528. got := Encode(nil, src)
  529. cmd := exec.Command(snappytoolCmdName, "-e")
  530. cmd.Stdin = bytes.NewReader(src)
  531. want, err := cmd.Output()
  532. if err != nil {
  533. return fmt.Errorf("could not run snappytool: %v", err)
  534. }
  535. return cmp(got, want)
  536. }
  537. func TestSameEncodingAsCppShortCopies(t *testing.T) {
  538. if msg := skipTestSameEncodingAsCpp(); msg != "" {
  539. t.Skip(msg)
  540. }
  541. src := bytes.Repeat([]byte{'a'}, 20)
  542. for i := 0; i <= len(src); i++ {
  543. if err := runTestSameEncodingAsCpp(src[:i]); err != nil {
  544. t.Errorf("i=%d: %v", i, err)
  545. }
  546. }
  547. }
  548. func TestSameEncodingAsCppLongFiles(t *testing.T) {
  549. if msg := skipTestSameEncodingAsCpp(); msg != "" {
  550. t.Skip(msg)
  551. }
  552. bDir := filepath.FromSlash(*benchdataDir)
  553. failed := false
  554. for i, tf := range testFiles {
  555. if err := downloadBenchmarkFiles(t, tf.filename); err != nil {
  556. t.Fatalf("failed to download testdata: %s", err)
  557. }
  558. data := readFile(t, filepath.Join(bDir, tf.filename))
  559. if n := tf.sizeLimit; 0 < n && n < len(data) {
  560. data = data[:n]
  561. }
  562. if err := runTestSameEncodingAsCpp(data); err != nil {
  563. t.Errorf("i=%d: %v", i, err)
  564. failed = true
  565. }
  566. }
  567. if failed {
  568. t.Errorf("was the snappytool program built against the C++ snappy library version " +
  569. "d53de187 or later, commited on 2016-04-05? See " +
  570. "https://github.com/google/snappy/commit/d53de18799418e113e44444252a39b12a0e4e0cc")
  571. }
  572. }
  573. // TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
  574. // described in decode_amd64.s and its claim of a 10 byte overrun worst case.
  575. func TestSlowForwardCopyOverrun(t *testing.T) {
  576. const base = 100
  577. for length := 1; length < 18; length++ {
  578. for offset := 1; offset < 18; offset++ {
  579. highWaterMark := base
  580. d := base
  581. l := length
  582. o := offset
  583. // makeOffsetAtLeast8
  584. for o < 8 {
  585. if end := d + 8; highWaterMark < end {
  586. highWaterMark = end
  587. }
  588. l -= o
  589. d += o
  590. o += o
  591. }
  592. // fixUpSlowForwardCopy
  593. a := d
  594. d += l
  595. // finishSlowForwardCopy
  596. for l > 0 {
  597. if end := a + 8; highWaterMark < end {
  598. highWaterMark = end
  599. }
  600. a += 8
  601. l -= 8
  602. }
  603. dWant := base + length
  604. overrun := highWaterMark - dWant
  605. if d != dWant || overrun < 0 || 10 < overrun {
  606. t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
  607. length, offset, d, overrun, dWant)
  608. }
  609. }
  610. }
  611. }
  612. // TestEncodeNoiseThenRepeats encodes input for which the first half is very
  613. // incompressible and the second half is very compressible. The encoded form's
  614. // length should be closer to 50% of the original length than 100%.
  615. func TestEncodeNoiseThenRepeats(t *testing.T) {
  616. for _, origLen := range []int{256 * 1024, 2048 * 1024} {
  617. src := make([]byte, origLen)
  618. rng := rand.New(rand.NewSource(1))
  619. firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
  620. for i := range firstHalf {
  621. firstHalf[i] = uint8(rng.Intn(256))
  622. }
  623. for i := range secondHalf {
  624. secondHalf[i] = uint8(i >> 8)
  625. }
  626. dst := Encode(nil, src)
  627. if got, want := len(dst), origLen*3/4; got >= want {
  628. t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
  629. }
  630. }
  631. }
  632. func TestFramingFormat(t *testing.T) {
  633. // src is comprised of alternating 1e5-sized sequences of random
  634. // (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
  635. // because it is larger than maxBlockSize (64k).
  636. src := make([]byte, 1e6)
  637. rng := rand.New(rand.NewSource(1))
  638. for i := 0; i < 10; i++ {
  639. if i%2 == 0 {
  640. for j := 0; j < 1e5; j++ {
  641. src[1e5*i+j] = uint8(rng.Intn(256))
  642. }
  643. } else {
  644. for j := 0; j < 1e5; j++ {
  645. src[1e5*i+j] = uint8(i)
  646. }
  647. }
  648. }
  649. buf := new(bytes.Buffer)
  650. if _, err := NewWriter(buf).Write(src); err != nil {
  651. t.Fatalf("Write: encoding: %v", err)
  652. }
  653. dst, err := ioutil.ReadAll(NewReader(buf))
  654. if err != nil {
  655. t.Fatalf("ReadAll: decoding: %v", err)
  656. }
  657. if err := cmp(dst, src); err != nil {
  658. t.Fatal(err)
  659. }
  660. }
  661. func TestWriterGoldenOutput(t *testing.T) {
  662. buf := new(bytes.Buffer)
  663. w := NewBufferedWriter(buf)
  664. defer w.Close()
  665. w.Write([]byte("abcd")) // Not compressible.
  666. w.Flush()
  667. w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible.
  668. w.Flush()
  669. // The next chunk is also compressible, but a naive, greedy encoding of the
  670. // overall length 67 copy as a length 64 copy (the longest expressible as a
  671. // tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte
  672. // tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4
  673. // bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2
  674. // (of length 60) and a 2-byte tagCopy1 (of length 7).
  675. w.Write(bytes.Repeat([]byte{'B'}, 68))
  676. w.Write([]byte("efC")) // Not compressible.
  677. w.Write(bytes.Repeat([]byte{'C'}, 20)) // Compressible.
  678. w.Write(bytes.Repeat([]byte{'B'}, 20)) // Compressible.
  679. w.Write([]byte("g")) // Not compressible.
  680. w.Flush()
  681. got := buf.String()
  682. want := strings.Join([]string{
  683. magicChunk,
  684. "\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
  685. "\x68\x10\xe6\xb6", // Checksum.
  686. "\x61\x62\x63\x64", // Uncompressed payload: "abcd".
  687. "\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum).
  688. "\x5f\xeb\xf2\x10", // Checksum.
  689. "\x96\x01", // Compressed payload: Uncompressed length (varint encoded): 150.
  690. "\x00\x41", // Compressed payload: tagLiteral, length=1, "A".
  691. "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
  692. "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
  693. "\x52\x01\x00", // Compressed payload: tagCopy2, length=21, offset=1.
  694. "\x00\x18\x00\x00", // Compressed chunk, 24 bytes long (including 4 byte checksum).
  695. "\x30\x85\x69\xeb", // Checksum.
  696. "\x70", // Compressed payload: Uncompressed length (varint encoded): 112.
  697. "\x00\x42", // Compressed payload: tagLiteral, length=1, "B".
  698. "\xee\x01\x00", // Compressed payload: tagCopy2, length=60, offset=1.
  699. "\x0d\x01", // Compressed payload: tagCopy1, length=7, offset=1.
  700. "\x08\x65\x66\x43", // Compressed payload: tagLiteral, length=3, "efC".
  701. "\x4e\x01\x00", // Compressed payload: tagCopy2, length=20, offset=1.
  702. "\x4e\x5a\x00", // Compressed payload: tagCopy2, length=20, offset=90.
  703. "\x00\x67", // Compressed payload: tagLiteral, length=1, "g".
  704. }, "")
  705. if got != want {
  706. t.Fatalf("\ngot: % x\nwant: % x", got, want)
  707. }
  708. }
  709. func TestEmitLiteral(t *testing.T) {
  710. testCases := []struct {
  711. length int
  712. want string
  713. }{
  714. {1, "\x00"},
  715. {2, "\x04"},
  716. {59, "\xe8"},
  717. {60, "\xec"},
  718. {61, "\xf0\x3c"},
  719. {62, "\xf0\x3d"},
  720. {254, "\xf0\xfd"},
  721. {255, "\xf0\xfe"},
  722. {256, "\xf0\xff"},
  723. {257, "\xf4\x00\x01"},
  724. {65534, "\xf4\xfd\xff"},
  725. {65535, "\xf4\xfe\xff"},
  726. {65536, "\xf4\xff\xff"},
  727. }
  728. dst := make([]byte, 70000)
  729. nines := bytes.Repeat([]byte{0x99}, 65536)
  730. for _, tc := range testCases {
  731. lit := nines[:tc.length]
  732. n := emitLiteral(dst, lit)
  733. if !bytes.HasSuffix(dst[:n], lit) {
  734. t.Errorf("length=%d: did not end with that many literal bytes", tc.length)
  735. continue
  736. }
  737. got := string(dst[:n-tc.length])
  738. if got != tc.want {
  739. t.Errorf("length=%d:\ngot % x\nwant % x", tc.length, got, tc.want)
  740. continue
  741. }
  742. }
  743. }
  744. func TestEmitCopy(t *testing.T) {
  745. testCases := []struct {
  746. offset int
  747. length int
  748. want string
  749. }{
  750. {8, 04, "\x01\x08"},
  751. {8, 11, "\x1d\x08"},
  752. {8, 12, "\x2e\x08\x00"},
  753. {8, 13, "\x32\x08\x00"},
  754. {8, 59, "\xea\x08\x00"},
  755. {8, 60, "\xee\x08\x00"},
  756. {8, 61, "\xf2\x08\x00"},
  757. {8, 62, "\xf6\x08\x00"},
  758. {8, 63, "\xfa\x08\x00"},
  759. {8, 64, "\xfe\x08\x00"},
  760. {8, 65, "\xee\x08\x00\x05\x08"},
  761. {8, 66, "\xee\x08\x00\x09\x08"},
  762. {8, 67, "\xee\x08\x00\x0d\x08"},
  763. {8, 68, "\xfe\x08\x00\x01\x08"},
  764. {8, 69, "\xfe\x08\x00\x05\x08"},
  765. {8, 80, "\xfe\x08\x00\x3e\x08\x00"},
  766. {256, 04, "\x21\x00"},
  767. {256, 11, "\x3d\x00"},
  768. {256, 12, "\x2e\x00\x01"},
  769. {256, 13, "\x32\x00\x01"},
  770. {256, 59, "\xea\x00\x01"},
  771. {256, 60, "\xee\x00\x01"},
  772. {256, 61, "\xf2\x00\x01"},
  773. {256, 62, "\xf6\x00\x01"},
  774. {256, 63, "\xfa\x00\x01"},
  775. {256, 64, "\xfe\x00\x01"},
  776. {256, 65, "\xee\x00\x01\x25\x00"},
  777. {256, 66, "\xee\x00\x01\x29\x00"},
  778. {256, 67, "\xee\x00\x01\x2d\x00"},
  779. {256, 68, "\xfe\x00\x01\x21\x00"},
  780. {256, 69, "\xfe\x00\x01\x25\x00"},
  781. {256, 80, "\xfe\x00\x01\x3e\x00\x01"},
  782. {2048, 04, "\x0e\x00\x08"},
  783. {2048, 11, "\x2a\x00\x08"},
  784. {2048, 12, "\x2e\x00\x08"},
  785. {2048, 13, "\x32\x00\x08"},
  786. {2048, 59, "\xea\x00\x08"},
  787. {2048, 60, "\xee\x00\x08"},
  788. {2048, 61, "\xf2\x00\x08"},
  789. {2048, 62, "\xf6\x00\x08"},
  790. {2048, 63, "\xfa\x00\x08"},
  791. {2048, 64, "\xfe\x00\x08"},
  792. {2048, 65, "\xee\x00\x08\x12\x00\x08"},
  793. {2048, 66, "\xee\x00\x08\x16\x00\x08"},
  794. {2048, 67, "\xee\x00\x08\x1a\x00\x08"},
  795. {2048, 68, "\xfe\x00\x08\x0e\x00\x08"},
  796. {2048, 69, "\xfe\x00\x08\x12\x00\x08"},
  797. {2048, 80, "\xfe\x00\x08\x3e\x00\x08"},
  798. }
  799. dst := make([]byte, 1024)
  800. for _, tc := range testCases {
  801. n := emitCopy(dst, tc.offset, tc.length)
  802. got := string(dst[:n])
  803. if got != tc.want {
  804. t.Errorf("offset=%d, length=%d:\ngot % x\nwant % x", tc.offset, tc.length, got, tc.want)
  805. }
  806. }
  807. }
  808. func TestNewBufferedWriter(t *testing.T) {
  809. // Test all 32 possible sub-sequences of these 5 input slices.
  810. //
  811. // Their lengths sum to 400,000, which is over 6 times the Writer ibuf
  812. // capacity: 6 * maxBlockSize is 393,216.
  813. inputs := [][]byte{
  814. bytes.Repeat([]byte{'a'}, 40000),
  815. bytes.Repeat([]byte{'b'}, 150000),
  816. bytes.Repeat([]byte{'c'}, 60000),
  817. bytes.Repeat([]byte{'d'}, 120000),
  818. bytes.Repeat([]byte{'e'}, 30000),
  819. }
  820. loop:
  821. for i := 0; i < 1<<uint(len(inputs)); i++ {
  822. var want []byte
  823. buf := new(bytes.Buffer)
  824. w := NewBufferedWriter(buf)
  825. for j, input := range inputs {
  826. if i&(1<<uint(j)) == 0 {
  827. continue
  828. }
  829. if _, err := w.Write(input); err != nil {
  830. t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
  831. continue loop
  832. }
  833. want = append(want, input...)
  834. }
  835. if err := w.Close(); err != nil {
  836. t.Errorf("i=%#02x: Close: %v", i, err)
  837. continue
  838. }
  839. got, err := ioutil.ReadAll(NewReader(buf))
  840. if err != nil {
  841. t.Errorf("i=%#02x: ReadAll: %v", i, err)
  842. continue
  843. }
  844. if err := cmp(got, want); err != nil {
  845. t.Errorf("i=%#02x: %v", i, err)
  846. continue
  847. }
  848. }
  849. }
  850. func TestFlush(t *testing.T) {
  851. buf := new(bytes.Buffer)
  852. w := NewBufferedWriter(buf)
  853. defer w.Close()
  854. if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
  855. t.Fatalf("Write: %v", err)
  856. }
  857. if n := buf.Len(); n != 0 {
  858. t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
  859. }
  860. if err := w.Flush(); err != nil {
  861. t.Fatalf("Flush: %v", err)
  862. }
  863. if n := buf.Len(); n == 0 {
  864. t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
  865. }
  866. }
  867. func TestReaderUncompressedDataOK(t *testing.T) {
  868. r := NewReader(strings.NewReader(magicChunk +
  869. "\x01\x08\x00\x00" + // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
  870. "\x68\x10\xe6\xb6" + // Checksum.
  871. "\x61\x62\x63\x64", // Uncompressed payload: "abcd".
  872. ))
  873. g, err := ioutil.ReadAll(r)
  874. if err != nil {
  875. t.Fatal(err)
  876. }
  877. if got, want := string(g), "abcd"; got != want {
  878. t.Fatalf("got %q, want %q", got, want)
  879. }
  880. }
  881. func TestReaderUncompressedDataNoPayload(t *testing.T) {
  882. r := NewReader(strings.NewReader(magicChunk +
  883. "\x01\x04\x00\x00" + // Uncompressed chunk, 4 bytes long.
  884. "", // No payload; corrupt input.
  885. ))
  886. if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
  887. t.Fatalf("got %v, want %v", err, ErrCorrupt)
  888. }
  889. }
  890. func TestReaderUncompressedDataTooLong(t *testing.T) {
  891. // https://github.com/google/snappy/blob/master/framing_format.txt section
  892. // 4.3 says that "the maximum legal chunk length... is 65540", or 0x10004.
  893. const n = 0x10005
  894. r := NewReader(strings.NewReader(magicChunk +
  895. "\x01\x05\x00\x01" + // Uncompressed chunk, n bytes long.
  896. strings.Repeat("\x00", n),
  897. ))
  898. if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
  899. t.Fatalf("got %v, want %v", err, ErrCorrupt)
  900. }
  901. }
  902. func TestReaderReset(t *testing.T) {
  903. gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
  904. buf := new(bytes.Buffer)
  905. if _, err := NewWriter(buf).Write(gold); err != nil {
  906. t.Fatalf("Write: %v", err)
  907. }
  908. encoded, invalid, partial := buf.String(), "invalid", "partial"
  909. r := NewReader(nil)
  910. for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
  911. if s == partial {
  912. r.Reset(strings.NewReader(encoded))
  913. if _, err := r.Read(make([]byte, 101)); err != nil {
  914. t.Errorf("#%d: %v", i, err)
  915. continue
  916. }
  917. continue
  918. }
  919. r.Reset(strings.NewReader(s))
  920. got, err := ioutil.ReadAll(r)
  921. switch s {
  922. case encoded:
  923. if err != nil {
  924. t.Errorf("#%d: %v", i, err)
  925. continue
  926. }
  927. if err := cmp(got, gold); err != nil {
  928. t.Errorf("#%d: %v", i, err)
  929. continue
  930. }
  931. case invalid:
  932. if err == nil {
  933. t.Errorf("#%d: got nil error, want non-nil", i)
  934. continue
  935. }
  936. }
  937. }
  938. }
  939. func TestWriterReset(t *testing.T) {
  940. gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
  941. const n = 20
  942. for _, buffered := range []bool{false, true} {
  943. var w *Writer
  944. if buffered {
  945. w = NewBufferedWriter(nil)
  946. defer w.Close()
  947. } else {
  948. w = NewWriter(nil)
  949. }
  950. var gots, wants [][]byte
  951. failed := false
  952. for i := 0; i <= n; i++ {
  953. buf := new(bytes.Buffer)
  954. w.Reset(buf)
  955. want := gold[:len(gold)*i/n]
  956. if _, err := w.Write(want); err != nil {
  957. t.Errorf("#%d: Write: %v", i, err)
  958. failed = true
  959. continue
  960. }
  961. if buffered {
  962. if err := w.Flush(); err != nil {
  963. t.Errorf("#%d: Flush: %v", i, err)
  964. failed = true
  965. continue
  966. }
  967. }
  968. got, err := ioutil.ReadAll(NewReader(buf))
  969. if err != nil {
  970. t.Errorf("#%d: ReadAll: %v", i, err)
  971. failed = true
  972. continue
  973. }
  974. gots = append(gots, got)
  975. wants = append(wants, want)
  976. }
  977. if failed {
  978. continue
  979. }
  980. for i := range gots {
  981. if err := cmp(gots[i], wants[i]); err != nil {
  982. t.Errorf("#%d: %v", i, err)
  983. }
  984. }
  985. }
  986. }
  987. func TestWriterResetWithoutFlush(t *testing.T) {
  988. buf0 := new(bytes.Buffer)
  989. buf1 := new(bytes.Buffer)
  990. w := NewBufferedWriter(buf0)
  991. if _, err := w.Write([]byte("xxx")); err != nil {
  992. t.Fatalf("Write #0: %v", err)
  993. }
  994. // Note that we don't Flush the Writer before calling Reset.
  995. w.Reset(buf1)
  996. if _, err := w.Write([]byte("yyy")); err != nil {
  997. t.Fatalf("Write #1: %v", err)
  998. }
  999. if err := w.Flush(); err != nil {
  1000. t.Fatalf("Flush: %v", err)
  1001. }
  1002. got, err := ioutil.ReadAll(NewReader(buf1))
  1003. if err != nil {
  1004. t.Fatalf("ReadAll: %v", err)
  1005. }
  1006. if err := cmp(got, []byte("yyy")); err != nil {
  1007. t.Fatal(err)
  1008. }
  1009. }
  1010. type writeCounter int
  1011. func (c *writeCounter) Write(p []byte) (int, error) {
  1012. *c++
  1013. return len(p), nil
  1014. }
  1015. // TestNumUnderlyingWrites tests that each Writer flush only makes one or two
  1016. // Write calls on its underlying io.Writer, depending on whether or not the
  1017. // flushed buffer was compressible.
  1018. func TestNumUnderlyingWrites(t *testing.T) {
  1019. testCases := []struct {
  1020. input []byte
  1021. want int
  1022. }{
  1023. {bytes.Repeat([]byte{'x'}, 100), 1},
  1024. {bytes.Repeat([]byte{'y'}, 100), 1},
  1025. {[]byte("ABCDEFGHIJKLMNOPQRST"), 2},
  1026. }
  1027. var c writeCounter
  1028. w := NewBufferedWriter(&c)
  1029. defer w.Close()
  1030. for i, tc := range testCases {
  1031. c = 0
  1032. if _, err := w.Write(tc.input); err != nil {
  1033. t.Errorf("#%d: Write: %v", i, err)
  1034. continue
  1035. }
  1036. if err := w.Flush(); err != nil {
  1037. t.Errorf("#%d: Flush: %v", i, err)
  1038. continue
  1039. }
  1040. if int(c) != tc.want {
  1041. t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
  1042. continue
  1043. }
  1044. }
  1045. }
  1046. func benchDecode(b *testing.B, src []byte) {
  1047. encoded := Encode(nil, src)
  1048. // Bandwidth is in amount of uncompressed data.
  1049. b.SetBytes(int64(len(src)))
  1050. b.ResetTimer()
  1051. for i := 0; i < b.N; i++ {
  1052. Decode(src, encoded)
  1053. }
  1054. }
  1055. func benchEncode(b *testing.B, src []byte) {
  1056. // Bandwidth is in amount of uncompressed data.
  1057. b.SetBytes(int64(len(src)))
  1058. dst := make([]byte, MaxEncodedLen(len(src)))
  1059. b.ResetTimer()
  1060. for i := 0; i < b.N; i++ {
  1061. Encode(dst, src)
  1062. }
  1063. }
  1064. func testOrBenchmark(b testing.TB) string {
  1065. if _, ok := b.(*testing.B); ok {
  1066. return "benchmark"
  1067. }
  1068. return "test"
  1069. }
  1070. func readFile(b testing.TB, filename string) []byte {
  1071. src, err := ioutil.ReadFile(filename)
  1072. if err != nil {
  1073. b.Skipf("skipping %s: %v", testOrBenchmark(b), err)
  1074. }
  1075. if len(src) == 0 {
  1076. b.Fatalf("%s has zero length", filename)
  1077. }
  1078. return src
  1079. }
  1080. // expand returns a slice of length n containing repeated copies of src.
  1081. func expand(src []byte, n int) []byte {
  1082. dst := make([]byte, n)
  1083. for x := dst; len(x) > 0; {
  1084. i := copy(x, src)
  1085. x = x[i:]
  1086. }
  1087. return dst
  1088. }
  1089. func benchWords(b *testing.B, n int, decode bool) {
  1090. // Note: the file is OS-language dependent so the resulting values are not
  1091. // directly comparable for non-US-English OS installations.
  1092. data := expand(readFile(b, "/usr/share/dict/words"), n)
  1093. if decode {
  1094. benchDecode(b, data)
  1095. } else {
  1096. benchEncode(b, data)
  1097. }
  1098. }
  1099. func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
  1100. func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
  1101. func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
  1102. func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
  1103. func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
  1104. func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
  1105. func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
  1106. func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
  1107. func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
  1108. func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
  1109. func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
  1110. func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
  1111. func BenchmarkRandomEncode(b *testing.B) {
  1112. rng := rand.New(rand.NewSource(1))
  1113. data := make([]byte, 1<<20)
  1114. for i := range data {
  1115. data[i] = uint8(rng.Intn(256))
  1116. }
  1117. benchEncode(b, data)
  1118. }
  1119. // testFiles' values are copied directly from
  1120. // https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
  1121. // The label field is unused in snappy-go.
  1122. var testFiles = []struct {
  1123. label string
  1124. filename string
  1125. sizeLimit int
  1126. }{
  1127. {"html", "html", 0},
  1128. {"urls", "urls.10K", 0},
  1129. {"jpg", "fireworks.jpeg", 0},
  1130. {"jpg_200", "fireworks.jpeg", 200},
  1131. {"pdf", "paper-100k.pdf", 0},
  1132. {"html4", "html_x_4", 0},
  1133. {"txt1", "alice29.txt", 0},
  1134. {"txt2", "asyoulik.txt", 0},
  1135. {"txt3", "lcet10.txt", 0},
  1136. {"txt4", "plrabn12.txt", 0},
  1137. {"pb", "geo.protodata", 0},
  1138. {"gaviota", "kppkn.gtb", 0},
  1139. }
  1140. const (
  1141. // The benchmark data files are at this canonical URL.
  1142. benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
  1143. )
  1144. func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) {
  1145. bDir := filepath.FromSlash(*benchdataDir)
  1146. filename := filepath.Join(bDir, basename)
  1147. if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
  1148. return nil
  1149. }
  1150. if !*download {
  1151. b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b))
  1152. }
  1153. // Download the official snappy C++ implementation reference test data
  1154. // files for benchmarking.
  1155. if err := os.MkdirAll(bDir, 0777); err != nil && !os.IsExist(err) {
  1156. return fmt.Errorf("failed to create %s: %s", bDir, err)
  1157. }
  1158. f, err := os.Create(filename)
  1159. if err != nil {
  1160. return fmt.Errorf("failed to create %s: %s", filename, err)
  1161. }
  1162. defer f.Close()
  1163. defer func() {
  1164. if errRet != nil {
  1165. os.Remove(filename)
  1166. }
  1167. }()
  1168. url := benchURL + basename
  1169. resp, err := http.Get(url)
  1170. if err != nil {
  1171. return fmt.Errorf("failed to download %s: %s", url, err)
  1172. }
  1173. defer resp.Body.Close()
  1174. if s := resp.StatusCode; s != http.StatusOK {
  1175. return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
  1176. }
  1177. _, err = io.Copy(f, resp.Body)
  1178. if err != nil {
  1179. return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
  1180. }
  1181. return nil
  1182. }
  1183. func benchFile(b *testing.B, i int, decode bool) {
  1184. if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
  1185. b.Fatalf("failed to download testdata: %s", err)
  1186. }
  1187. bDir := filepath.FromSlash(*benchdataDir)
  1188. data := readFile(b, filepath.Join(bDir, testFiles[i].filename))
  1189. if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
  1190. data = data[:n]
  1191. }
  1192. if decode {
  1193. benchDecode(b, data)
  1194. } else {
  1195. benchEncode(b, data)
  1196. }
  1197. }
  1198. // Naming convention is kept similar to what snappy's C++ implementation uses.
  1199. func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) }
  1200. func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) }
  1201. func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) }
  1202. func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) }
  1203. func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) }
  1204. func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) }
  1205. func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) }
  1206. func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) }
  1207. func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) }
  1208. func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) }
  1209. func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
  1210. func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
  1211. func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) }
  1212. func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) }
  1213. func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) }
  1214. func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) }
  1215. func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) }
  1216. func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) }
  1217. func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) }
  1218. func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) }
  1219. func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) }
  1220. func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) }
  1221. func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
  1222. func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }
  1223. func BenchmarkExtendMatch(b *testing.B) {
  1224. src, err := ioutil.ReadFile(goldenText)
  1225. if err != nil {
  1226. b.Fatalf("ReadFile: %v", err)
  1227. }
  1228. b.ResetTimer()
  1229. for i := 0; i < b.N; i++ {
  1230. for _, tc := range extendMatchGoldenTestCases {
  1231. extendMatch(src, tc.i, tc.j)
  1232. }
  1233. }
  1234. }