snappy_test.go 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316
  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package snappy
  5. import (
  6. "bytes"
  7. "encoding/binary"
  8. "flag"
  9. "fmt"
  10. "io"
  11. "io/ioutil"
  12. "math/rand"
  13. "net/http"
  14. "os"
  15. "os/exec"
  16. "path/filepath"
  17. "runtime"
  18. "strings"
  19. "testing"
  20. )
  21. var (
  22. download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
  23. testdata = flag.String("testdata", "testdata", "Directory containing the test data")
  24. )
  25. // goEncoderShouldMatchCppEncoder is whether to test that the algorithm used by
  26. // Go's encoder matches byte-for-byte what the C++ snappy encoder produces, on
  27. // this GOARCH. There is more than one valid encoding of any given input, and
  28. // there is more than one good algorithm along the frontier of trading off
  29. // throughput for output size. Nonetheless, we presume that the C++ encoder's
  30. // algorithm is a good one and has been tested on a wide range of inputs, so
  31. // matching that exactly should mean that the Go encoder's algorithm is also
  32. // good, without needing to gather our own corpus of test data.
  33. //
  34. // The exact algorithm used by the C++ code is potentially endian dependent, as
  35. // it puns a byte pointer to a uint32 pointer to load, hash and compare 4 bytes
  36. // at a time. The Go implementation is endian agnostic, in that its output is
  37. // the same (as little-endian C++ code), regardless of the CPU's endianness.
  38. //
  39. // Thus, when comparing Go's output to C++ output generated beforehand, such as
  40. // the "testdata/pi.txt.rawsnappy" file generated by C++ code on a little-
  41. // endian system, we can run that test regardless of the runtime.GOARCH value.
  42. //
  43. // When comparing Go's output to dynamically generated C++ output, i.e. the
  44. // result of fork/exec'ing a C++ program, we can run that test only on
  45. // little-endian systems, because the C++ output might be different on
  46. // big-endian systems. The runtime package doesn't export endianness per se,
  47. // but we can restrict this match-C++ test to common little-endian systems.
  48. const goEncoderShouldMatchCppEncoder = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "arm"
  49. func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
  50. got := maxEncodedLenOfMaxBlockSize
  51. want := MaxEncodedLen(maxBlockSize)
  52. if got != want {
  53. t.Fatalf("got %d, want %d", got, want)
  54. }
  55. }
  56. func cmp(a, b []byte) error {
  57. if bytes.Equal(a, b) {
  58. return nil
  59. }
  60. if len(a) != len(b) {
  61. return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
  62. }
  63. for i := range a {
  64. if a[i] != b[i] {
  65. return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
  66. }
  67. }
  68. return nil
  69. }
  70. func roundtrip(b, ebuf, dbuf []byte) error {
  71. d, err := Decode(dbuf, Encode(ebuf, b))
  72. if err != nil {
  73. return fmt.Errorf("decoding error: %v", err)
  74. }
  75. if err := cmp(d, b); err != nil {
  76. return fmt.Errorf("roundtrip mismatch: %v", err)
  77. }
  78. return nil
  79. }
  80. func TestEmpty(t *testing.T) {
  81. if err := roundtrip(nil, nil, nil); err != nil {
  82. t.Fatal(err)
  83. }
  84. }
  85. func TestSmallCopy(t *testing.T) {
  86. for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
  87. for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
  88. for i := 0; i < 32; i++ {
  89. s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
  90. if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
  91. t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
  92. }
  93. }
  94. }
  95. }
  96. }
  97. func TestSmallRand(t *testing.T) {
  98. rng := rand.New(rand.NewSource(1))
  99. for n := 1; n < 20000; n += 23 {
  100. b := make([]byte, n)
  101. for i := range b {
  102. b[i] = uint8(rng.Intn(256))
  103. }
  104. if err := roundtrip(b, nil, nil); err != nil {
  105. t.Fatal(err)
  106. }
  107. }
  108. }
  109. func TestSmallRegular(t *testing.T) {
  110. for n := 1; n < 20000; n += 23 {
  111. b := make([]byte, n)
  112. for i := range b {
  113. b[i] = uint8(i%10 + 'a')
  114. }
  115. if err := roundtrip(b, nil, nil); err != nil {
  116. t.Fatal(err)
  117. }
  118. }
  119. }
  120. func TestInvalidVarint(t *testing.T) {
  121. testCases := []struct {
  122. desc string
  123. input string
  124. }{{
  125. "invalid varint, final byte has continuation bit set",
  126. "\xff",
  127. }, {
  128. "invalid varint, value overflows uint64",
  129. "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
  130. }, {
  131. // https://github.com/google/snappy/blob/master/format_description.txt
  132. // says that "the stream starts with the uncompressed length [as a
  133. // varint] (up to a maximum of 2^32 - 1)".
  134. "valid varint (as uint64), but value overflows uint32",
  135. "\x80\x80\x80\x80\x10",
  136. }}
  137. for _, tc := range testCases {
  138. input := []byte(tc.input)
  139. if _, err := DecodedLen(input); err != ErrCorrupt {
  140. t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
  141. }
  142. if _, err := Decode(nil, input); err != ErrCorrupt {
  143. t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
  144. }
  145. }
  146. }
  147. func TestDecode(t *testing.T) {
  148. lit40Bytes := make([]byte, 40)
  149. for i := range lit40Bytes {
  150. lit40Bytes[i] = byte(i)
  151. }
  152. lit40 := string(lit40Bytes)
  153. testCases := []struct {
  154. desc string
  155. input string
  156. want string
  157. wantErr error
  158. }{{
  159. `decodedLen=0; valid input`,
  160. "\x00",
  161. "",
  162. nil,
  163. }, {
  164. `decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
  165. "\x03" + "\x08\xff\xff\xff",
  166. "\xff\xff\xff",
  167. nil,
  168. }, {
  169. `decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
  170. "\x02" + "\x08\xff\xff\xff",
  171. "",
  172. ErrCorrupt,
  173. }, {
  174. `decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
  175. "\x03" + "\x08\xff\xff",
  176. "",
  177. ErrCorrupt,
  178. }, {
  179. `decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
  180. "\x28" + "\x9c" + lit40,
  181. lit40,
  182. nil,
  183. }, {
  184. `decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
  185. "\x01" + "\xf0",
  186. "",
  187. ErrCorrupt,
  188. }, {
  189. `decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
  190. "\x03" + "\xf0\x02\xff\xff\xff",
  191. "\xff\xff\xff",
  192. nil,
  193. }, {
  194. `decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
  195. "\x01" + "\xf4\x00",
  196. "",
  197. ErrCorrupt,
  198. }, {
  199. `decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
  200. "\x03" + "\xf4\x02\x00\xff\xff\xff",
  201. "\xff\xff\xff",
  202. nil,
  203. }, {
  204. `decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
  205. "\x01" + "\xf8\x00\x00",
  206. "",
  207. ErrCorrupt,
  208. }, {
  209. `decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
  210. "\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
  211. "\xff\xff\xff",
  212. nil,
  213. }, {
  214. `decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
  215. "\x01" + "\xfc\x00\x00\x00",
  216. "",
  217. ErrCorrupt,
  218. }, {
  219. `decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
  220. "\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
  221. "",
  222. ErrCorrupt,
  223. }, {
  224. `decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
  225. "\x04" + "\xfc\x02\x00\x00\x00\xff",
  226. "",
  227. ErrCorrupt,
  228. }, {
  229. `decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
  230. "\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
  231. "\xff\xff\xff",
  232. nil,
  233. }, {
  234. `decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
  235. "\x04" + "\x01",
  236. "",
  237. ErrCorrupt,
  238. }, {
  239. `decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
  240. "\x04" + "\x02\x00",
  241. "",
  242. ErrCorrupt,
  243. }, {
  244. `decodedLen=4; tagCopy4; unsupported COPY_4 tag`,
  245. "\x04" + "\x03\x00\x00\x00\x00",
  246. "",
  247. errUnsupportedCopy4Tag,
  248. }, {
  249. `decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
  250. "\x04" + "\x0cabcd",
  251. "abcd",
  252. nil,
  253. }, {
  254. `decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
  255. "\x0d" + "\x0cabcd" + "\x15\x04",
  256. "abcdabcdabcda",
  257. nil,
  258. }, {
  259. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
  260. "\x08" + "\x0cabcd" + "\x01\x04",
  261. "abcdabcd",
  262. nil,
  263. }, {
  264. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
  265. "\x08" + "\x0cabcd" + "\x01\x02",
  266. "abcdcdcd",
  267. nil,
  268. }, {
  269. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
  270. "\x08" + "\x0cabcd" + "\x01\x01",
  271. "abcddddd",
  272. nil,
  273. }, {
  274. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`,
  275. "\x08" + "\x0cabcd" + "\x01\x00",
  276. "",
  277. ErrCorrupt,
  278. }, {
  279. `decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
  280. "\x09" + "\x0cabcd" + "\x01\x04",
  281. "",
  282. ErrCorrupt,
  283. }, {
  284. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
  285. "\x08" + "\x0cabcd" + "\x01\x05",
  286. "",
  287. ErrCorrupt,
  288. }, {
  289. `decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
  290. "\x07" + "\x0cabcd" + "\x01\x04",
  291. "",
  292. ErrCorrupt,
  293. }, {
  294. `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
  295. "\x06" + "\x0cabcd" + "\x06\x03\x00",
  296. "abcdbc",
  297. nil,
  298. }}
  299. const (
  300. // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
  301. // not present in either the input or the output. It is written to dBuf
  302. // to check that Decode does not write bytes past the end of
  303. // dBuf[:dLen].
  304. //
  305. // The magic number 37 was chosen because it is prime. A more 'natural'
  306. // number like 32 might lead to a false negative if, for example, a
  307. // byte was incorrectly copied 4*8 bytes later.
  308. notPresentBase = 0xa0
  309. notPresentLen = 37
  310. )
  311. var dBuf [100]byte
  312. loop:
  313. for i, tc := range testCases {
  314. input := []byte(tc.input)
  315. for _, x := range input {
  316. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  317. t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
  318. continue loop
  319. }
  320. }
  321. dLen, n := binary.Uvarint(input)
  322. if n <= 0 {
  323. t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
  324. continue
  325. }
  326. if dLen > uint64(len(dBuf)) {
  327. t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
  328. continue
  329. }
  330. for j := range dBuf {
  331. dBuf[j] = byte(notPresentBase + j%notPresentLen)
  332. }
  333. g, gotErr := Decode(dBuf[:], input)
  334. if got := string(g); got != tc.want || gotErr != tc.wantErr {
  335. t.Errorf("#%d (%s):\ngot %q, %v\nwant %q, %v",
  336. i, tc.desc, got, gotErr, tc.want, tc.wantErr)
  337. continue
  338. }
  339. for j, x := range dBuf {
  340. if uint64(j) < dLen {
  341. continue
  342. }
  343. if w := byte(notPresentBase + j%notPresentLen); x != w {
  344. t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
  345. i, tc.desc, j, x, w, dBuf)
  346. continue loop
  347. }
  348. }
  349. }
  350. }
  351. // TestDecodeLengthOffset tests decoding an encoding of the form literal +
  352. // copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
  353. func TestDecodeLengthOffset(t *testing.T) {
  354. const (
  355. prefix = "abcdefghijklmnopqr"
  356. suffix = "ABCDEFGHIJKLMNOPQR"
  357. // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
  358. // not present in either the input or the output. It is written to
  359. // gotBuf to check that Decode does not write bytes past the end of
  360. // gotBuf[:totalLen].
  361. //
  362. // The magic number 37 was chosen because it is prime. A more 'natural'
  363. // number like 32 might lead to a false negative if, for example, a
  364. // byte was incorrectly copied 4*8 bytes later.
  365. notPresentBase = 0xa0
  366. notPresentLen = 37
  367. )
  368. var gotBuf, wantBuf, inputBuf [128]byte
  369. for length := 1; length <= 18; length++ {
  370. for offset := 1; offset <= 18; offset++ {
  371. loop:
  372. for suffixLen := 0; suffixLen <= 18; suffixLen++ {
  373. totalLen := len(prefix) + length + suffixLen
  374. inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
  375. inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
  376. inputLen++
  377. inputLen += copy(inputBuf[inputLen:], prefix)
  378. inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
  379. inputBuf[inputLen+1] = byte(offset)
  380. inputBuf[inputLen+2] = 0x00
  381. inputLen += 3
  382. if suffixLen > 0 {
  383. inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
  384. inputLen++
  385. inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
  386. }
  387. input := inputBuf[:inputLen]
  388. for i := range gotBuf {
  389. gotBuf[i] = byte(notPresentBase + i%notPresentLen)
  390. }
  391. got, err := Decode(gotBuf[:], input)
  392. if err != nil {
  393. t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
  394. continue
  395. }
  396. wantLen := 0
  397. wantLen += copy(wantBuf[wantLen:], prefix)
  398. for i := 0; i < length; i++ {
  399. wantBuf[wantLen] = wantBuf[wantLen-offset]
  400. wantLen++
  401. }
  402. wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
  403. want := wantBuf[:wantLen]
  404. for _, x := range input {
  405. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  406. t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
  407. length, offset, suffixLen, x, input)
  408. continue loop
  409. }
  410. }
  411. for i, x := range gotBuf {
  412. if i < totalLen {
  413. continue
  414. }
  415. if w := byte(notPresentBase + i%notPresentLen); x != w {
  416. t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
  417. "Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
  418. length, offset, suffixLen, totalLen, i, x, w, gotBuf)
  419. continue loop
  420. }
  421. }
  422. for _, x := range want {
  423. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  424. t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
  425. length, offset, suffixLen, x, want)
  426. continue loop
  427. }
  428. }
  429. if !bytes.Equal(got, want) {
  430. t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot % x\nwant % x",
  431. length, offset, suffixLen, input, got, want)
  432. continue
  433. }
  434. }
  435. }
  436. }
  437. }
  438. const (
  439. goldenText = "Mark.Twain-Tom.Sawyer.txt"
  440. goldenCompressed = goldenText + ".rawsnappy"
  441. )
  442. func TestDecodeGoldenInput(t *testing.T) {
  443. src, err := ioutil.ReadFile(filepath.Join(*testdata, goldenCompressed))
  444. if err != nil {
  445. t.Fatalf("ReadFile: %v", err)
  446. }
  447. got, err := Decode(nil, src)
  448. if err != nil {
  449. t.Fatalf("Decode: %v", err)
  450. }
  451. want, err := ioutil.ReadFile(filepath.Join(*testdata, goldenText))
  452. if err != nil {
  453. t.Fatalf("ReadFile: %v", err)
  454. }
  455. if err := cmp(got, want); err != nil {
  456. t.Fatal(err)
  457. }
  458. }
  459. func TestEncodeGoldenInput(t *testing.T) {
  460. src, err := ioutil.ReadFile(filepath.Join(*testdata, goldenText))
  461. if err != nil {
  462. t.Fatalf("ReadFile: %v", err)
  463. }
  464. got := Encode(nil, src)
  465. want, err := ioutil.ReadFile(filepath.Join(*testdata, goldenCompressed))
  466. if err != nil {
  467. t.Fatalf("ReadFile: %v", err)
  468. }
  469. if err := cmp(got, want); err != nil {
  470. t.Fatal(err)
  471. }
  472. }
  473. func TestExtendMatchGoldenInput(t *testing.T) {
  474. src, err := ioutil.ReadFile(filepath.Join(*testdata, goldenText))
  475. if err != nil {
  476. t.Fatalf("ReadFile: %v", err)
  477. }
  478. for i, tc := range extendMatchGoldenTestCases {
  479. got := extendMatch(src, tc.i, tc.j)
  480. if got != tc.want {
  481. t.Errorf("test #%d: i, j = %5d, %5d: got %5d (= j + %6d), want %5d (= j + %6d)",
  482. i, tc.i, tc.j, got, got-tc.j, tc.want, tc.want-tc.j)
  483. }
  484. }
  485. }
  486. func TestExtendMatch(t *testing.T) {
  487. // ref is a simple, reference implementation of extendMatch.
  488. ref := func(src []byte, i, j int) int {
  489. for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
  490. }
  491. return j
  492. }
  493. nums := []int{0, 1, 2, 7, 8, 9, 29, 30, 31, 32, 33, 34, 38, 39, 40}
  494. for yIndex := 40; yIndex > 30; yIndex-- {
  495. xxx := bytes.Repeat([]byte("x"), 40)
  496. if yIndex < len(xxx) {
  497. xxx[yIndex] = 'y'
  498. }
  499. for _, i := range nums {
  500. for _, j := range nums {
  501. if i >= j {
  502. continue
  503. }
  504. got := extendMatch(xxx, i, j)
  505. want := ref(xxx, i, j)
  506. if got != want {
  507. t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
  508. }
  509. }
  510. }
  511. }
  512. }
  513. const snappytoolCmdName = "cmd/snappytool/snappytool"
  514. func skipTestSameEncodingAsCpp() (msg string) {
  515. if !goEncoderShouldMatchCppEncoder {
  516. return fmt.Sprintf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH)
  517. }
  518. if _, err := os.Stat(snappytoolCmdName); err != nil {
  519. return fmt.Sprintf("could not find snappytool: %v", err)
  520. }
  521. return ""
  522. }
  523. func runTestSameEncodingAsCpp(src []byte) error {
  524. got := Encode(nil, src)
  525. cmd := exec.Command(snappytoolCmdName, "-e")
  526. cmd.Stdin = bytes.NewReader(src)
  527. want, err := cmd.Output()
  528. if err != nil {
  529. return fmt.Errorf("could not run snappytool: %v", err)
  530. }
  531. return cmp(got, want)
  532. }
  533. func TestSameEncodingAsCppShortCopies(t *testing.T) {
  534. if msg := skipTestSameEncodingAsCpp(); msg != "" {
  535. t.Skip(msg)
  536. }
  537. src := bytes.Repeat([]byte{'a'}, 20)
  538. for i := 0; i <= len(src); i++ {
  539. if err := runTestSameEncodingAsCpp(src[:i]); err != nil {
  540. t.Errorf("i=%d: %v", i, err)
  541. }
  542. }
  543. }
  544. func TestSameEncodingAsCppLongFiles(t *testing.T) {
  545. if msg := skipTestSameEncodingAsCpp(); msg != "" {
  546. t.Skip(msg)
  547. }
  548. failed := false
  549. for i, tf := range testFiles {
  550. if err := downloadBenchmarkFiles(t, tf.filename); err != nil {
  551. t.Fatalf("failed to download testdata: %s", err)
  552. }
  553. data := readFile(t, filepath.Join(*testdata, benchDir, tf.filename))
  554. if n := tf.sizeLimit; 0 < n && n < len(data) {
  555. data = data[:n]
  556. }
  557. if err := runTestSameEncodingAsCpp(data); err != nil {
  558. t.Errorf("i=%d: %v", i, err)
  559. failed = true
  560. }
  561. }
  562. if failed {
  563. t.Errorf("was the snappytool program built against the C++ snappy library version " +
  564. "d53de187 or later, commited on 2016-04-05? See " +
  565. "https://github.com/google/snappy/commit/d53de18799418e113e44444252a39b12a0e4e0cc")
  566. }
  567. }
  568. // TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
  569. // described in decode_amd64.s and its claim of a 10 byte overrun worst case.
  570. func TestSlowForwardCopyOverrun(t *testing.T) {
  571. const base = 100
  572. for length := 1; length < 18; length++ {
  573. for offset := 1; offset < 18; offset++ {
  574. highWaterMark := base
  575. d := base
  576. l := length
  577. o := offset
  578. // makeOffsetAtLeast8
  579. for o < 8 {
  580. if end := d + 8; highWaterMark < end {
  581. highWaterMark = end
  582. }
  583. l -= o
  584. d += o
  585. o += o
  586. }
  587. // fixUpSlowForwardCopy
  588. a := d
  589. d += l
  590. // finishSlowForwardCopy
  591. for l > 0 {
  592. if end := a + 8; highWaterMark < end {
  593. highWaterMark = end
  594. }
  595. a += 8
  596. l -= 8
  597. }
  598. dWant := base + length
  599. overrun := highWaterMark - dWant
  600. if d != dWant || overrun < 0 || 10 < overrun {
  601. t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
  602. length, offset, d, overrun, dWant)
  603. }
  604. }
  605. }
  606. }
  607. // TestEncodeNoiseThenRepeats encodes input for which the first half is very
  608. // incompressible and the second half is very compressible. The encoded form's
  609. // length should be closer to 50% of the original length than 100%.
  610. func TestEncodeNoiseThenRepeats(t *testing.T) {
  611. for _, origLen := range []int{256 * 1024, 2048 * 1024} {
  612. src := make([]byte, origLen)
  613. rng := rand.New(rand.NewSource(1))
  614. firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
  615. for i := range firstHalf {
  616. firstHalf[i] = uint8(rng.Intn(256))
  617. }
  618. for i := range secondHalf {
  619. secondHalf[i] = uint8(i >> 8)
  620. }
  621. dst := Encode(nil, src)
  622. if got, want := len(dst), origLen*3/4; got >= want {
  623. t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
  624. }
  625. }
  626. }
  627. func TestFramingFormat(t *testing.T) {
  628. // src is comprised of alternating 1e5-sized sequences of random
  629. // (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
  630. // because it is larger than maxBlockSize (64k).
  631. src := make([]byte, 1e6)
  632. rng := rand.New(rand.NewSource(1))
  633. for i := 0; i < 10; i++ {
  634. if i%2 == 0 {
  635. for j := 0; j < 1e5; j++ {
  636. src[1e5*i+j] = uint8(rng.Intn(256))
  637. }
  638. } else {
  639. for j := 0; j < 1e5; j++ {
  640. src[1e5*i+j] = uint8(i)
  641. }
  642. }
  643. }
  644. buf := new(bytes.Buffer)
  645. if _, err := NewWriter(buf).Write(src); err != nil {
  646. t.Fatalf("Write: encoding: %v", err)
  647. }
  648. dst, err := ioutil.ReadAll(NewReader(buf))
  649. if err != nil {
  650. t.Fatalf("ReadAll: decoding: %v", err)
  651. }
  652. if err := cmp(dst, src); err != nil {
  653. t.Fatal(err)
  654. }
  655. }
  656. func TestWriterGoldenOutput(t *testing.T) {
  657. buf := new(bytes.Buffer)
  658. w := NewBufferedWriter(buf)
  659. defer w.Close()
  660. w.Write([]byte("abcd")) // Not compressible.
  661. w.Flush()
  662. w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible.
  663. w.Flush()
  664. // The next chunk is also compressible, but a naive, greedy encoding of the
  665. // overall length 67 copy as a length 64 copy (the longest expressible as a
  666. // tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte
  667. // tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4
  668. // bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2
  669. // (of length 60) and a 2-byte tagCopy1 (of length 7).
  670. w.Write(bytes.Repeat([]byte{'B'}, 68))
  671. w.Write([]byte("efC")) // Not compressible.
  672. w.Write(bytes.Repeat([]byte{'C'}, 20)) // Compressible.
  673. w.Write(bytes.Repeat([]byte{'B'}, 20)) // Compressible.
  674. w.Write([]byte("g")) // Not compressible.
  675. w.Flush()
  676. got := buf.String()
  677. want := strings.Join([]string{
  678. magicChunk,
  679. "\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
  680. "\x68\x10\xe6\xb6", // Checksum.
  681. "\x61\x62\x63\x64", // Uncompressed payload: "abcd".
  682. "\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum).
  683. "\x5f\xeb\xf2\x10", // Checksum.
  684. "\x96\x01", // Compressed payload: Uncompressed length (varint encoded): 150.
  685. "\x00\x41", // Compressed payload: tagLiteral, length=1, "A".
  686. "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
  687. "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
  688. "\x52\x01\x00", // Compressed payload: tagCopy2, length=21, offset=1.
  689. "\x00\x18\x00\x00", // Compressed chunk, 24 bytes long (including 4 byte checksum).
  690. "\x30\x85\x69\xeb", // Checksum.
  691. "\x70", // Compressed payload: Uncompressed length (varint encoded): 112.
  692. "\x00\x42", // Compressed payload: tagLiteral, length=1, "B".
  693. "\xee\x01\x00", // Compressed payload: tagCopy2, length=60, offset=1.
  694. "\x0d\x01", // Compressed payload: tagCopy1, length=7, offset=1.
  695. "\x08\x65\x66\x43", // Compressed payload: tagLiteral, length=3, "efC".
  696. "\x4e\x01\x00", // Compressed payload: tagCopy2, length=20, offset=1.
  697. "\x4e\x5a\x00", // Compressed payload: tagCopy2, length=20, offset=90.
  698. "\x00\x67", // Compressed payload: tagLiteral, length=1, "g".
  699. }, "")
  700. if got != want {
  701. t.Fatalf("\ngot: % x\nwant: % x", got, want)
  702. }
  703. }
  704. func TestEmitLiteral(t *testing.T) {
  705. testCases := []struct {
  706. length int
  707. want string
  708. }{
  709. {1, "\x00"},
  710. {2, "\x04"},
  711. {59, "\xe8"},
  712. {60, "\xec"},
  713. {61, "\xf0\x3c"},
  714. {62, "\xf0\x3d"},
  715. {254, "\xf0\xfd"},
  716. {255, "\xf0\xfe"},
  717. {256, "\xf0\xff"},
  718. {257, "\xf4\x00\x01"},
  719. {65534, "\xf4\xfd\xff"},
  720. {65535, "\xf4\xfe\xff"},
  721. {65536, "\xf4\xff\xff"},
  722. }
  723. dst := make([]byte, 70000)
  724. nines := bytes.Repeat([]byte{0x99}, 65536)
  725. for _, tc := range testCases {
  726. lit := nines[:tc.length]
  727. n := emitLiteral(dst, lit)
  728. if !bytes.HasSuffix(dst[:n], lit) {
  729. t.Errorf("length=%d: did not end with that many literal bytes", tc.length)
  730. continue
  731. }
  732. got := string(dst[:n-tc.length])
  733. if got != tc.want {
  734. t.Errorf("length=%d:\ngot % x\nwant % x", tc.length, got, tc.want)
  735. continue
  736. }
  737. }
  738. }
  739. func TestEmitCopy(t *testing.T) {
  740. testCases := []struct {
  741. offset int
  742. length int
  743. want string
  744. }{
  745. {8, 04, "\x01\x08"},
  746. {8, 11, "\x1d\x08"},
  747. {8, 12, "\x2e\x08\x00"},
  748. {8, 13, "\x32\x08\x00"},
  749. {8, 59, "\xea\x08\x00"},
  750. {8, 60, "\xee\x08\x00"},
  751. {8, 61, "\xf2\x08\x00"},
  752. {8, 62, "\xf6\x08\x00"},
  753. {8, 63, "\xfa\x08\x00"},
  754. {8, 64, "\xfe\x08\x00"},
  755. {8, 65, "\xee\x08\x00\x05\x08"},
  756. {8, 66, "\xee\x08\x00\x09\x08"},
  757. {8, 67, "\xee\x08\x00\x0d\x08"},
  758. {8, 68, "\xfe\x08\x00\x01\x08"},
  759. {8, 69, "\xfe\x08\x00\x05\x08"},
  760. {8, 80, "\xfe\x08\x00\x3e\x08\x00"},
  761. {256, 04, "\x21\x00"},
  762. {256, 11, "\x3d\x00"},
  763. {256, 12, "\x2e\x00\x01"},
  764. {256, 13, "\x32\x00\x01"},
  765. {256, 59, "\xea\x00\x01"},
  766. {256, 60, "\xee\x00\x01"},
  767. {256, 61, "\xf2\x00\x01"},
  768. {256, 62, "\xf6\x00\x01"},
  769. {256, 63, "\xfa\x00\x01"},
  770. {256, 64, "\xfe\x00\x01"},
  771. {256, 65, "\xee\x00\x01\x25\x00"},
  772. {256, 66, "\xee\x00\x01\x29\x00"},
  773. {256, 67, "\xee\x00\x01\x2d\x00"},
  774. {256, 68, "\xfe\x00\x01\x21\x00"},
  775. {256, 69, "\xfe\x00\x01\x25\x00"},
  776. {256, 80, "\xfe\x00\x01\x3e\x00\x01"},
  777. {2048, 04, "\x0e\x00\x08"},
  778. {2048, 11, "\x2a\x00\x08"},
  779. {2048, 12, "\x2e\x00\x08"},
  780. {2048, 13, "\x32\x00\x08"},
  781. {2048, 59, "\xea\x00\x08"},
  782. {2048, 60, "\xee\x00\x08"},
  783. {2048, 61, "\xf2\x00\x08"},
  784. {2048, 62, "\xf6\x00\x08"},
  785. {2048, 63, "\xfa\x00\x08"},
  786. {2048, 64, "\xfe\x00\x08"},
  787. {2048, 65, "\xee\x00\x08\x12\x00\x08"},
  788. {2048, 66, "\xee\x00\x08\x16\x00\x08"},
  789. {2048, 67, "\xee\x00\x08\x1a\x00\x08"},
  790. {2048, 68, "\xfe\x00\x08\x0e\x00\x08"},
  791. {2048, 69, "\xfe\x00\x08\x12\x00\x08"},
  792. {2048, 80, "\xfe\x00\x08\x3e\x00\x08"},
  793. }
  794. dst := make([]byte, 1024)
  795. for _, tc := range testCases {
  796. n := emitCopy(dst, tc.offset, tc.length)
  797. got := string(dst[:n])
  798. if got != tc.want {
  799. t.Errorf("offset=%d, length=%d:\ngot % x\nwant % x", tc.offset, tc.length, got, tc.want)
  800. }
  801. }
  802. }
  803. func TestNewBufferedWriter(t *testing.T) {
  804. // Test all 32 possible sub-sequences of these 5 input slices.
  805. //
  806. // Their lengths sum to 400,000, which is over 6 times the Writer ibuf
  807. // capacity: 6 * maxBlockSize is 393,216.
  808. inputs := [][]byte{
  809. bytes.Repeat([]byte{'a'}, 40000),
  810. bytes.Repeat([]byte{'b'}, 150000),
  811. bytes.Repeat([]byte{'c'}, 60000),
  812. bytes.Repeat([]byte{'d'}, 120000),
  813. bytes.Repeat([]byte{'e'}, 30000),
  814. }
  815. loop:
  816. for i := 0; i < 1<<uint(len(inputs)); i++ {
  817. var want []byte
  818. buf := new(bytes.Buffer)
  819. w := NewBufferedWriter(buf)
  820. for j, input := range inputs {
  821. if i&(1<<uint(j)) == 0 {
  822. continue
  823. }
  824. if _, err := w.Write(input); err != nil {
  825. t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
  826. continue loop
  827. }
  828. want = append(want, input...)
  829. }
  830. if err := w.Close(); err != nil {
  831. t.Errorf("i=%#02x: Close: %v", i, err)
  832. continue
  833. }
  834. got, err := ioutil.ReadAll(NewReader(buf))
  835. if err != nil {
  836. t.Errorf("i=%#02x: ReadAll: %v", i, err)
  837. continue
  838. }
  839. if err := cmp(got, want); err != nil {
  840. t.Errorf("i=%#02x: %v", i, err)
  841. continue
  842. }
  843. }
  844. }
  845. func TestFlush(t *testing.T) {
  846. buf := new(bytes.Buffer)
  847. w := NewBufferedWriter(buf)
  848. defer w.Close()
  849. if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
  850. t.Fatalf("Write: %v", err)
  851. }
  852. if n := buf.Len(); n != 0 {
  853. t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
  854. }
  855. if err := w.Flush(); err != nil {
  856. t.Fatalf("Flush: %v", err)
  857. }
  858. if n := buf.Len(); n == 0 {
  859. t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
  860. }
  861. }
  862. func TestReaderUncompressedDataOK(t *testing.T) {
  863. r := NewReader(strings.NewReader(magicChunk +
  864. "\x01\x08\x00\x00" + // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
  865. "\x68\x10\xe6\xb6" + // Checksum.
  866. "\x61\x62\x63\x64", // Uncompressed payload: "abcd".
  867. ))
  868. g, err := ioutil.ReadAll(r)
  869. if err != nil {
  870. t.Fatal(err)
  871. }
  872. if got, want := string(g), "abcd"; got != want {
  873. t.Fatalf("got %q, want %q", got, want)
  874. }
  875. }
  876. func TestReaderUncompressedDataNoPayload(t *testing.T) {
  877. r := NewReader(strings.NewReader(magicChunk +
  878. "\x01\x04\x00\x00" + // Uncompressed chunk, 4 bytes long.
  879. "", // No payload; corrupt input.
  880. ))
  881. if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
  882. t.Fatalf("got %v, want %v", err, ErrCorrupt)
  883. }
  884. }
  885. func TestReaderUncompressedDataTooLong(t *testing.T) {
  886. // https://github.com/google/snappy/blob/master/framing_format.txt section
  887. // 4.3 says that "the maximum legal chunk length... is 65540", or 0x10004.
  888. const n = 0x10005
  889. r := NewReader(strings.NewReader(magicChunk +
  890. "\x01\x05\x00\x01" + // Uncompressed chunk, n bytes long.
  891. strings.Repeat("\x00", n),
  892. ))
  893. if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
  894. t.Fatalf("got %v, want %v", err, ErrCorrupt)
  895. }
  896. }
  897. func TestReaderReset(t *testing.T) {
  898. gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
  899. buf := new(bytes.Buffer)
  900. if _, err := NewWriter(buf).Write(gold); err != nil {
  901. t.Fatalf("Write: %v", err)
  902. }
  903. encoded, invalid, partial := buf.String(), "invalid", "partial"
  904. r := NewReader(nil)
  905. for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
  906. if s == partial {
  907. r.Reset(strings.NewReader(encoded))
  908. if _, err := r.Read(make([]byte, 101)); err != nil {
  909. t.Errorf("#%d: %v", i, err)
  910. continue
  911. }
  912. continue
  913. }
  914. r.Reset(strings.NewReader(s))
  915. got, err := ioutil.ReadAll(r)
  916. switch s {
  917. case encoded:
  918. if err != nil {
  919. t.Errorf("#%d: %v", i, err)
  920. continue
  921. }
  922. if err := cmp(got, gold); err != nil {
  923. t.Errorf("#%d: %v", i, err)
  924. continue
  925. }
  926. case invalid:
  927. if err == nil {
  928. t.Errorf("#%d: got nil error, want non-nil", i)
  929. continue
  930. }
  931. }
  932. }
  933. }
  934. func TestWriterReset(t *testing.T) {
  935. gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
  936. const n = 20
  937. for _, buffered := range []bool{false, true} {
  938. var w *Writer
  939. if buffered {
  940. w = NewBufferedWriter(nil)
  941. defer w.Close()
  942. } else {
  943. w = NewWriter(nil)
  944. }
  945. var gots, wants [][]byte
  946. failed := false
  947. for i := 0; i <= n; i++ {
  948. buf := new(bytes.Buffer)
  949. w.Reset(buf)
  950. want := gold[:len(gold)*i/n]
  951. if _, err := w.Write(want); err != nil {
  952. t.Errorf("#%d: Write: %v", i, err)
  953. failed = true
  954. continue
  955. }
  956. if buffered {
  957. if err := w.Flush(); err != nil {
  958. t.Errorf("#%d: Flush: %v", i, err)
  959. failed = true
  960. continue
  961. }
  962. }
  963. got, err := ioutil.ReadAll(NewReader(buf))
  964. if err != nil {
  965. t.Errorf("#%d: ReadAll: %v", i, err)
  966. failed = true
  967. continue
  968. }
  969. gots = append(gots, got)
  970. wants = append(wants, want)
  971. }
  972. if failed {
  973. continue
  974. }
  975. for i := range gots {
  976. if err := cmp(gots[i], wants[i]); err != nil {
  977. t.Errorf("#%d: %v", i, err)
  978. }
  979. }
  980. }
  981. }
  982. func TestWriterResetWithoutFlush(t *testing.T) {
  983. buf0 := new(bytes.Buffer)
  984. buf1 := new(bytes.Buffer)
  985. w := NewBufferedWriter(buf0)
  986. if _, err := w.Write([]byte("xxx")); err != nil {
  987. t.Fatalf("Write #0: %v", err)
  988. }
  989. // Note that we don't Flush the Writer before calling Reset.
  990. w.Reset(buf1)
  991. if _, err := w.Write([]byte("yyy")); err != nil {
  992. t.Fatalf("Write #1: %v", err)
  993. }
  994. if err := w.Flush(); err != nil {
  995. t.Fatalf("Flush: %v", err)
  996. }
  997. got, err := ioutil.ReadAll(NewReader(buf1))
  998. if err != nil {
  999. t.Fatalf("ReadAll: %v", err)
  1000. }
  1001. if err := cmp(got, []byte("yyy")); err != nil {
  1002. t.Fatal(err)
  1003. }
  1004. }
  1005. type writeCounter int
  1006. func (c *writeCounter) Write(p []byte) (int, error) {
  1007. *c++
  1008. return len(p), nil
  1009. }
  1010. // TestNumUnderlyingWrites tests that each Writer flush only makes one or two
  1011. // Write calls on its underlying io.Writer, depending on whether or not the
  1012. // flushed buffer was compressible.
  1013. func TestNumUnderlyingWrites(t *testing.T) {
  1014. testCases := []struct {
  1015. input []byte
  1016. want int
  1017. }{
  1018. {bytes.Repeat([]byte{'x'}, 100), 1},
  1019. {bytes.Repeat([]byte{'y'}, 100), 1},
  1020. {[]byte("ABCDEFGHIJKLMNOPQRST"), 2},
  1021. }
  1022. var c writeCounter
  1023. w := NewBufferedWriter(&c)
  1024. defer w.Close()
  1025. for i, tc := range testCases {
  1026. c = 0
  1027. if _, err := w.Write(tc.input); err != nil {
  1028. t.Errorf("#%d: Write: %v", i, err)
  1029. continue
  1030. }
  1031. if err := w.Flush(); err != nil {
  1032. t.Errorf("#%d: Flush: %v", i, err)
  1033. continue
  1034. }
  1035. if int(c) != tc.want {
  1036. t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
  1037. continue
  1038. }
  1039. }
  1040. }
  1041. func benchDecode(b *testing.B, src []byte) {
  1042. encoded := Encode(nil, src)
  1043. // Bandwidth is in amount of uncompressed data.
  1044. b.SetBytes(int64(len(src)))
  1045. b.ResetTimer()
  1046. for i := 0; i < b.N; i++ {
  1047. Decode(src, encoded)
  1048. }
  1049. }
  1050. func benchEncode(b *testing.B, src []byte) {
  1051. // Bandwidth is in amount of uncompressed data.
  1052. b.SetBytes(int64(len(src)))
  1053. dst := make([]byte, MaxEncodedLen(len(src)))
  1054. b.ResetTimer()
  1055. for i := 0; i < b.N; i++ {
  1056. Encode(dst, src)
  1057. }
  1058. }
  1059. func testOrBenchmark(b testing.TB) string {
  1060. if _, ok := b.(*testing.B); ok {
  1061. return "benchmark"
  1062. }
  1063. return "test"
  1064. }
  1065. func readFile(b testing.TB, filename string) []byte {
  1066. src, err := ioutil.ReadFile(filename)
  1067. if err != nil {
  1068. b.Skipf("skipping %s: %v", testOrBenchmark(b), err)
  1069. }
  1070. if len(src) == 0 {
  1071. b.Fatalf("%s has zero length", filename)
  1072. }
  1073. return src
  1074. }
  1075. // expand returns a slice of length n containing repeated copies of src.
  1076. func expand(src []byte, n int) []byte {
  1077. dst := make([]byte, n)
  1078. for x := dst; len(x) > 0; {
  1079. i := copy(x, src)
  1080. x = x[i:]
  1081. }
  1082. return dst
  1083. }
  1084. func benchWords(b *testing.B, n int, decode bool) {
  1085. // Note: the file is OS-language dependent so the resulting values are not
  1086. // directly comparable for non-US-English OS installations.
  1087. data := expand(readFile(b, "/usr/share/dict/words"), n)
  1088. if decode {
  1089. benchDecode(b, data)
  1090. } else {
  1091. benchEncode(b, data)
  1092. }
  1093. }
  1094. func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
  1095. func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
  1096. func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
  1097. func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
  1098. func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
  1099. func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
  1100. func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
  1101. func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
  1102. func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
  1103. func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
  1104. func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
  1105. func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
  1106. func BenchmarkRandomEncode(b *testing.B) {
  1107. rng := rand.New(rand.NewSource(1))
  1108. data := make([]byte, 1<<20)
  1109. for i := range data {
  1110. data[i] = uint8(rng.Intn(256))
  1111. }
  1112. benchEncode(b, data)
  1113. }
  1114. // testFiles' values are copied directly from
  1115. // https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
  1116. // The label field is unused in snappy-go.
  1117. var testFiles = []struct {
  1118. label string
  1119. filename string
  1120. sizeLimit int
  1121. }{
  1122. {"html", "html", 0},
  1123. {"urls", "urls.10K", 0},
  1124. {"jpg", "fireworks.jpeg", 0},
  1125. {"jpg_200", "fireworks.jpeg", 200},
  1126. {"pdf", "paper-100k.pdf", 0},
  1127. {"html4", "html_x_4", 0},
  1128. {"txt1", "alice29.txt", 0},
  1129. {"txt2", "asyoulik.txt", 0},
  1130. {"txt3", "lcet10.txt", 0},
  1131. {"txt4", "plrabn12.txt", 0},
  1132. {"pb", "geo.protodata", 0},
  1133. {"gaviota", "kppkn.gtb", 0},
  1134. }
  1135. const (
  1136. // The benchmark data files are at this canonical URL.
  1137. benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
  1138. // They are copied to this local directory under testdata.
  1139. benchDir = "bench"
  1140. )
  1141. func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) {
  1142. tdBenchDir := filepath.Join(*testdata, benchDir)
  1143. filename := filepath.Join(tdBenchDir, basename)
  1144. if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
  1145. return nil
  1146. }
  1147. if !*download {
  1148. b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b))
  1149. }
  1150. // Download the official snappy C++ implementation reference test data
  1151. // files for benchmarking.
  1152. if err := os.MkdirAll(tdBenchDir, 0777); err != nil && !os.IsExist(err) {
  1153. return fmt.Errorf("failed to create %s: %s", tdBenchDir, err)
  1154. }
  1155. f, err := os.Create(filename)
  1156. if err != nil {
  1157. return fmt.Errorf("failed to create %s: %s", filename, err)
  1158. }
  1159. defer f.Close()
  1160. defer func() {
  1161. if errRet != nil {
  1162. os.Remove(filename)
  1163. }
  1164. }()
  1165. url := benchURL + basename
  1166. resp, err := http.Get(url)
  1167. if err != nil {
  1168. return fmt.Errorf("failed to download %s: %s", url, err)
  1169. }
  1170. defer resp.Body.Close()
  1171. if s := resp.StatusCode; s != http.StatusOK {
  1172. return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
  1173. }
  1174. _, err = io.Copy(f, resp.Body)
  1175. if err != nil {
  1176. return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
  1177. }
  1178. return nil
  1179. }
  1180. func benchFile(b *testing.B, i int, decode bool) {
  1181. if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
  1182. b.Fatalf("failed to download testdata: %s", err)
  1183. }
  1184. data := readFile(b, filepath.Join(*testdata, benchDir, testFiles[i].filename))
  1185. if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
  1186. data = data[:n]
  1187. }
  1188. if decode {
  1189. benchDecode(b, data)
  1190. } else {
  1191. benchEncode(b, data)
  1192. }
  1193. }
  1194. // Naming convention is kept similar to what snappy's C++ implementation uses.
  1195. func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) }
  1196. func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) }
  1197. func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) }
  1198. func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) }
  1199. func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) }
  1200. func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) }
  1201. func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) }
  1202. func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) }
  1203. func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) }
  1204. func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) }
  1205. func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
  1206. func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
  1207. func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) }
  1208. func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) }
  1209. func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) }
  1210. func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) }
  1211. func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) }
  1212. func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) }
  1213. func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) }
  1214. func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) }
  1215. func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) }
  1216. func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) }
  1217. func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
  1218. func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }
  1219. func BenchmarkExtendMatch(b *testing.B) {
  1220. src, err := ioutil.ReadFile(goldenText)
  1221. if err != nil {
  1222. b.Fatalf("ReadFile: %v", err)
  1223. }
  1224. b.ResetTimer()
  1225. for i := 0; i < b.N; i++ {
  1226. for _, tc := range extendMatchGoldenTestCases {
  1227. extendMatch(src, tc.i, tc.j)
  1228. }
  1229. }
  1230. }