decode.go 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "encoding/binary"
  8. "errors"
  9. "io"
  10. )
  11. var (
  12. // ErrCorrupt reports that the input is invalid.
  13. ErrCorrupt = errors.New("s2: corrupt input")
  14. // ErrCRC reports that the input failed CRC validation (streams only)
  15. ErrCRC = errors.New("s2: corrupt input, crc mismatch")
  16. // ErrTooLarge reports that the uncompressed length is too large.
  17. ErrTooLarge = errors.New("s2: decoded block is too large")
  18. // ErrUnsupported reports that the input isn't supported.
  19. ErrUnsupported = errors.New("s2: unsupported input")
  20. errUnsupportedLiteralLength = errors.New("s2: unsupported literal length")
  21. )
  22. // DecodedLen returns the length of the decoded block.
  23. func DecodedLen(src []byte) (int, error) {
  24. v, _, err := decodedLen(src)
  25. return v, err
  26. }
  27. // decodedLen returns the length of the decoded block and the number of bytes
  28. // that the length header occupied.
  29. func decodedLen(src []byte) (blockLen, headerLen int, err error) {
  30. v, n := binary.Uvarint(src)
  31. if n <= 0 || v > 0xffffffff {
  32. return 0, 0, ErrCorrupt
  33. }
  34. const wordSize = 32 << (^uint(0) >> 32 & 1)
  35. if wordSize == 32 && v > 0x7fffffff {
  36. return 0, 0, ErrTooLarge
  37. }
  38. return int(v), n, nil
  39. }
  40. const (
  41. decodeErrCodeCorrupt = 1
  42. decodeErrCodeUnsupportedLiteralLength = 2
  43. )
  44. // Decode returns the decoded form of src. The returned slice may be a sub-
  45. // slice of dst if dst was large enough to hold the entire decoded block.
  46. // Otherwise, a newly allocated slice will be returned.
  47. //
  48. // The dst and src must not overlap. It is valid to pass a nil dst.
  49. func Decode(dst, src []byte) ([]byte, error) {
  50. dLen, s, err := decodedLen(src)
  51. if err != nil {
  52. return nil, err
  53. }
  54. if dLen <= cap(dst) {
  55. dst = dst[:dLen]
  56. } else {
  57. dst = make([]byte, dLen)
  58. }
  59. switch s2Decode(dst, src[s:]) {
  60. case 0:
  61. return dst, nil
  62. case decodeErrCodeUnsupportedLiteralLength:
  63. return nil, errUnsupportedLiteralLength
  64. }
  65. return nil, ErrCorrupt
  66. }
  67. // NewReader returns a new Reader that decompresses from r, using the framing
  68. // format described at
  69. // https://github.com/google/snappy/blob/master/framing_format.txt with S2 changes.
  70. func NewReader(r io.Reader) *Reader {
  71. return &Reader{
  72. r: r,
  73. buf: make([]byte, MaxEncodedLen(maxBlockSize)+checksumSize),
  74. }
  75. }
  76. // Reader is an io.Reader that can read Snappy-compressed bytes.
  77. type Reader struct {
  78. r io.Reader
  79. err error
  80. decoded []byte
  81. buf []byte
  82. // decoded[i:j] contains decoded bytes that have not yet been passed on.
  83. i, j int
  84. readHeader bool
  85. }
  86. // Reset discards any buffered data, resets all state, and switches the Snappy
  87. // reader to read from r. This permits reusing a Reader rather than allocating
  88. // a new one.
  89. func (r *Reader) Reset(reader io.Reader) {
  90. r.r = reader
  91. r.err = nil
  92. r.i = 0
  93. r.j = 0
  94. r.readHeader = false
  95. }
  96. func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
  97. if _, r.err = io.ReadFull(r.r, p); r.err != nil {
  98. if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
  99. r.err = ErrCorrupt
  100. }
  101. return false
  102. }
  103. return true
  104. }
  105. // Read satisfies the io.Reader interface.
  106. func (r *Reader) Read(p []byte) (int, error) {
  107. if r.err != nil {
  108. return 0, r.err
  109. }
  110. for {
  111. if r.i < r.j {
  112. n := copy(p, r.decoded[r.i:r.j])
  113. r.i += n
  114. return n, nil
  115. }
  116. if !r.readFull(r.buf[:4], true) {
  117. return 0, r.err
  118. }
  119. chunkType := r.buf[0]
  120. if !r.readHeader {
  121. if chunkType != chunkTypeStreamIdentifier {
  122. r.err = ErrCorrupt
  123. return 0, r.err
  124. }
  125. r.readHeader = true
  126. }
  127. chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
  128. if chunkLen > len(r.buf) {
  129. r.err = ErrUnsupported
  130. return 0, r.err
  131. }
  132. // The chunk types are specified at
  133. // https://github.com/google/snappy/blob/master/framing_format.txt
  134. switch chunkType {
  135. case chunkTypeCompressedData:
  136. // Section 4.2. Compressed data (chunk type 0x00).
  137. if chunkLen < checksumSize {
  138. r.err = ErrCorrupt
  139. return 0, r.err
  140. }
  141. buf := r.buf[:chunkLen]
  142. if !r.readFull(buf, false) {
  143. return 0, r.err
  144. }
  145. checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
  146. buf = buf[checksumSize:]
  147. n, err := DecodedLen(buf)
  148. if err != nil {
  149. r.err = err
  150. return 0, r.err
  151. }
  152. if n > len(r.decoded) {
  153. if n > maxBlockSize {
  154. r.err = ErrCorrupt
  155. return 0, r.err
  156. }
  157. r.decoded = make([]byte, n)
  158. }
  159. if _, err := Decode(r.decoded, buf); err != nil {
  160. r.err = err
  161. return 0, r.err
  162. }
  163. if crc(r.decoded[:n]) != checksum {
  164. r.err = ErrCRC
  165. return 0, r.err
  166. }
  167. r.i, r.j = 0, n
  168. continue
  169. case chunkTypeUncompressedData:
  170. // Section 4.3. Uncompressed data (chunk type 0x01).
  171. if chunkLen < checksumSize {
  172. r.err = ErrCorrupt
  173. return 0, r.err
  174. }
  175. buf := r.buf[:checksumSize]
  176. if !r.readFull(buf, false) {
  177. return 0, r.err
  178. }
  179. checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
  180. // Read directly into r.decoded instead of via r.buf.
  181. n := chunkLen - checksumSize
  182. if n > len(r.decoded) {
  183. if n > maxBlockSize {
  184. r.err = ErrCorrupt
  185. return 0, r.err
  186. }
  187. r.decoded = make([]byte, n)
  188. }
  189. if !r.readFull(r.decoded[:n], false) {
  190. return 0, r.err
  191. }
  192. if crc(r.decoded[:n]) != checksum {
  193. r.err = ErrCRC
  194. return 0, r.err
  195. }
  196. r.i, r.j = 0, n
  197. continue
  198. case chunkTypeStreamIdentifier:
  199. // Section 4.1. Stream identifier (chunk type 0xff).
  200. if chunkLen != len(magicBody) {
  201. r.err = ErrCorrupt
  202. return 0, r.err
  203. }
  204. if !r.readFull(r.buf[:len(magicBody)], false) {
  205. return 0, r.err
  206. }
  207. if string(r.buf[:len(magicBody)]) != magicBody {
  208. if string(r.buf[:len(magicBody)]) != magicBodySnappy {
  209. r.err = ErrCorrupt
  210. return 0, r.err
  211. }
  212. }
  213. continue
  214. }
  215. if chunkType <= 0x7f {
  216. // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
  217. r.err = ErrUnsupported
  218. return 0, r.err
  219. }
  220. // Section 4.4 Padding (chunk type 0xfe).
  221. // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
  222. if !r.readFull(r.buf[:chunkLen], false) {
  223. return 0, r.err
  224. }
  225. }
  226. }
  227. // Skip will skip n bytes forward in the decompressed output.
  228. // For larger skips this consumes less CPU and is faster than reading output and discarding it.
  229. // CRC is not checked on skipped blocks.
  230. // io.ErrUnexpectedEOF is returned if the stream ends before all bytes have been skipped.
  231. // If a decoding error is encountered subsequent calls to Read will also fail.
  232. func (r *Reader) Skip(n int64) error {
  233. if n < 0 {
  234. return errors.New("attempted negative skip")
  235. }
  236. if r.err != nil {
  237. return r.err
  238. }
  239. for n > 0 {
  240. if r.i < r.j {
  241. // Skip in buffer.
  242. // decoded[i:j] contains decoded bytes that have not yet been passed on.
  243. left := int64(r.j - r.i)
  244. if left >= n {
  245. r.i += int(n)
  246. return nil
  247. }
  248. n -= int64(r.j - r.i)
  249. r.i, r.j = 0, 0
  250. }
  251. // Buffer empty; read blocks until we have content.
  252. if !r.readFull(r.buf[:4], true) {
  253. if r.err == io.EOF {
  254. r.err = io.ErrUnexpectedEOF
  255. }
  256. return r.err
  257. }
  258. chunkType := r.buf[0]
  259. if !r.readHeader {
  260. if chunkType != chunkTypeStreamIdentifier {
  261. r.err = ErrCorrupt
  262. return r.err
  263. }
  264. r.readHeader = true
  265. }
  266. chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
  267. if chunkLen > len(r.buf) {
  268. r.err = ErrUnsupported
  269. return r.err
  270. }
  271. // The chunk types are specified at
  272. // https://github.com/google/snappy/blob/master/framing_format.txt
  273. switch chunkType {
  274. case chunkTypeCompressedData:
  275. // Section 4.2. Compressed data (chunk type 0x00).
  276. if chunkLen < checksumSize {
  277. r.err = ErrCorrupt
  278. return r.err
  279. }
  280. buf := r.buf[:chunkLen]
  281. if !r.readFull(buf, false) {
  282. return r.err
  283. }
  284. checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
  285. buf = buf[checksumSize:]
  286. dLen, err := DecodedLen(buf)
  287. if err != nil {
  288. r.err = err
  289. return r.err
  290. }
  291. if dLen > maxBlockSize {
  292. r.err = ErrCorrupt
  293. return r.err
  294. }
  295. // Check if destination is within this block
  296. if int64(dLen) > n {
  297. if len(r.decoded) < dLen {
  298. r.decoded = make([]byte, dLen)
  299. }
  300. if _, err := Decode(r.decoded, buf); err != nil {
  301. r.err = err
  302. return r.err
  303. }
  304. if crc(r.decoded[:dLen]) != checksum {
  305. r.err = ErrCorrupt
  306. return r.err
  307. }
  308. } else {
  309. // Skip block completely
  310. n -= int64(dLen)
  311. dLen = 0
  312. }
  313. r.i, r.j = 0, dLen
  314. continue
  315. case chunkTypeUncompressedData:
  316. // Section 4.3. Uncompressed data (chunk type 0x01).
  317. if chunkLen < checksumSize {
  318. r.err = ErrCorrupt
  319. return r.err
  320. }
  321. buf := r.buf[:checksumSize]
  322. if !r.readFull(buf, false) {
  323. return r.err
  324. }
  325. checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
  326. // Read directly into r.decoded instead of via r.buf.
  327. n2 := chunkLen - checksumSize
  328. if n2 > len(r.decoded) {
  329. if n2 > maxBlockSize {
  330. r.err = ErrCorrupt
  331. return r.err
  332. }
  333. r.decoded = make([]byte, n2)
  334. }
  335. if !r.readFull(r.decoded[:n2], false) {
  336. return r.err
  337. }
  338. if int64(n2) < n {
  339. if crc(r.decoded[:n2]) != checksum {
  340. r.err = ErrCorrupt
  341. return r.err
  342. }
  343. }
  344. r.i, r.j = 0, n2
  345. continue
  346. case chunkTypeStreamIdentifier:
  347. // Section 4.1. Stream identifier (chunk type 0xff).
  348. if chunkLen != len(magicBody) {
  349. r.err = ErrCorrupt
  350. return r.err
  351. }
  352. if !r.readFull(r.buf[:len(magicBody)], false) {
  353. return r.err
  354. }
  355. if string(r.buf[:len(magicBody)]) != magicBody {
  356. if string(r.buf[:len(magicBody)]) != magicBodySnappy {
  357. r.err = ErrCorrupt
  358. return r.err
  359. }
  360. }
  361. continue
  362. }
  363. if chunkType <= 0x7f {
  364. // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
  365. r.err = ErrUnsupported
  366. return r.err
  367. }
  368. // Section 4.4 Padding (chunk type 0xfe).
  369. // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
  370. if !r.readFull(r.buf[:chunkLen], false) {
  371. return r.err
  372. }
  373. return io.ErrUnexpectedEOF
  374. }
  375. return nil
  376. }