123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623 |
- // Copyright 2010 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package zip
- import (
- "bufio"
- "encoding/binary"
- "errors"
- "fmt"
- "hash"
- "hash/crc32"
- "io"
- "os"
- "time"
- )
- var (
- ErrFormat = errors.New("zip: not a valid zip file")
- ErrAlgorithm = errors.New("zip: unsupported compression algorithm")
- ErrChecksum = errors.New("zip: checksum error")
- )
- type Reader struct {
- r io.ReaderAt
- File []*File
- Comment string
- decompressors map[uint16]Decompressor
- }
- type ReadCloser struct {
- f *os.File
- Reader
- }
- type File struct {
- FileHeader
- zip *Reader
- zipr io.ReaderAt
- zipsize int64
- headerOffset int64
- }
- func (f *File) hasDataDescriptor() bool {
- return f.Flags&0x8 != 0
- }
- // OpenReader will open the Zip file specified by name and return a ReadCloser.
- func OpenReader(name string) (*ReadCloser, error) {
- f, err := os.Open(name)
- if err != nil {
- return nil, err
- }
- fi, err := f.Stat()
- if err != nil {
- f.Close()
- return nil, err
- }
- r := new(ReadCloser)
- if err := r.init(f, fi.Size()); err != nil {
- f.Close()
- return nil, err
- }
- r.f = f
- return r, nil
- }
- // NewReader returns a new Reader reading from r, which is assumed to
- // have the given size in bytes.
- func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
- if size < 0 {
- return nil, errors.New("zip: size cannot be negative")
- }
- zr := new(Reader)
- if err := zr.init(r, size); err != nil {
- return nil, err
- }
- return zr, nil
- }
- func (z *Reader) init(r io.ReaderAt, size int64) error {
- end, err := readDirectoryEnd(r, size)
- if err != nil {
- return err
- }
- if end.directoryRecords > uint64(size)/fileHeaderLen {
- return fmt.Errorf("archive/zip: TOC declares impossible %d files in %d byte zip", end.directoryRecords, size)
- }
- z.r = r
- z.File = make([]*File, 0, end.directoryRecords)
- z.Comment = end.comment
- rs := io.NewSectionReader(r, 0, size)
- if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
- return err
- }
- buf := bufio.NewReader(rs)
- // The count of files inside a zip is truncated to fit in a uint16.
- // Gloss over this by reading headers until we encounter
- // a bad one, and then only report an ErrFormat or UnexpectedEOF if
- // the file count modulo 65536 is incorrect.
- for {
- f := &File{zip: z, zipr: r, zipsize: size}
- err = readDirectoryHeader(f, buf)
- if err == ErrFormat || err == io.ErrUnexpectedEOF {
- break
- }
- if err != nil {
- return err
- }
- z.File = append(z.File, f)
- }
- if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here
- // Return the readDirectoryHeader error if we read
- // the wrong number of directory entries.
- return err
- }
- return nil
- }
- // RegisterDecompressor registers or overrides a custom decompressor for a
- // specific method ID. If a decompressor for a given method is not found,
- // Reader will default to looking up the decompressor at the package level.
- func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) {
- if z.decompressors == nil {
- z.decompressors = make(map[uint16]Decompressor)
- }
- z.decompressors[method] = dcomp
- }
- func (z *Reader) decompressor(method uint16) Decompressor {
- dcomp := z.decompressors[method]
- if dcomp == nil {
- dcomp = decompressor(method)
- }
- return dcomp
- }
- // Close closes the Zip file, rendering it unusable for I/O.
- func (rc *ReadCloser) Close() error {
- return rc.f.Close()
- }
- // DataOffset returns the offset of the file's possibly-compressed
- // data, relative to the beginning of the zip file.
- //
- // Most callers should instead use Open, which transparently
- // decompresses data and verifies checksums.
- func (f *File) DataOffset() (offset int64, err error) {
- bodyOffset, err := f.findBodyOffset()
- if err != nil {
- return
- }
- return f.headerOffset + bodyOffset, nil
- }
- // Open returns a ReadCloser that provides access to the File's contents.
- // Multiple files may be read concurrently.
- func (f *File) Open() (io.ReadCloser, error) {
- bodyOffset, err := f.findBodyOffset()
- if err != nil {
- return nil, err
- }
- size := int64(f.CompressedSize64)
- r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
- dcomp := f.zip.decompressor(f.Method)
- if dcomp == nil {
- return nil, ErrAlgorithm
- }
- var rc io.ReadCloser = dcomp(r)
- var desr io.Reader
- if f.hasDataDescriptor() {
- desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen)
- }
- rc = &checksumReader{
- rc: rc,
- hash: crc32.NewIEEE(),
- f: f,
- desr: desr,
- }
- return rc, nil
- }
- // OpenRaw returns a Reader that returns the *compressed* output of the file.
- func (f *File) OpenRaw() (io.Reader, error) {
- bodyOffset, err := f.findBodyOffset()
- if err != nil {
- return nil, err
- }
- size := int64(f.CompressedSize64)
- return io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size), nil
- }
- type checksumReader struct {
- rc io.ReadCloser
- hash hash.Hash32
- nread uint64 // number of bytes read so far
- f *File
- desr io.Reader // if non-nil, where to read the data descriptor
- err error // sticky error
- }
- func (r *checksumReader) Read(b []byte) (n int, err error) {
- if r.err != nil {
- return 0, r.err
- }
- n, err = r.rc.Read(b)
- r.hash.Write(b[:n])
- r.nread += uint64(n)
- if err == nil {
- return
- }
- if err == io.EOF {
- if r.nread != r.f.UncompressedSize64 {
- return 0, io.ErrUnexpectedEOF
- }
- if r.desr != nil {
- if err1 := readDataDescriptor(r.desr, r.f); err1 != nil {
- if err1 == io.EOF {
- err = io.ErrUnexpectedEOF
- } else {
- err = err1
- }
- } else if r.hash.Sum32() != r.f.CRC32 {
- err = ErrChecksum
- }
- } else {
- // If there's not a data descriptor, we still compare
- // the CRC32 of what we've read against the file header
- // or TOC's CRC32, if it seems like it was set.
- if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
- err = ErrChecksum
- }
- }
- }
- r.err = err
- return
- }
- func (r *checksumReader) Close() error { return r.rc.Close() }
- // findBodyOffset does the minimum work to verify the file has a header
- // and returns the file body offset.
- func (f *File) findBodyOffset() (int64, error) {
- var buf [fileHeaderLen]byte
- if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil {
- return 0, err
- }
- b := readBuf(buf[:])
- if sig := b.uint32(); sig != fileHeaderSignature {
- return 0, ErrFormat
- }
- b = b[22:] // skip over most of the header
- filenameLen := int(b.uint16())
- extraLen := int(b.uint16())
- return int64(fileHeaderLen + filenameLen + extraLen), nil
- }
- // readDirectoryHeader attempts to read a directory header from r.
- // It returns io.ErrUnexpectedEOF if it cannot read a complete header,
- // and ErrFormat if it doesn't find a valid header signature.
- func readDirectoryHeader(f *File, r io.Reader) error {
- var buf [directoryHeaderLen]byte
- if _, err := io.ReadFull(r, buf[:]); err != nil {
- return err
- }
- b := readBuf(buf[:])
- if sig := b.uint32(); sig != directoryHeaderSignature {
- return ErrFormat
- }
- f.CreatorVersion = b.uint16()
- f.ReaderVersion = b.uint16()
- f.Flags = b.uint16()
- f.Method = b.uint16()
- f.ModifiedTime = b.uint16()
- f.ModifiedDate = b.uint16()
- f.CRC32 = b.uint32()
- f.CompressedSize = b.uint32()
- f.UncompressedSize = b.uint32()
- f.CompressedSize64 = uint64(f.CompressedSize)
- f.UncompressedSize64 = uint64(f.UncompressedSize)
- filenameLen := int(b.uint16())
- extraLen := int(b.uint16())
- commentLen := int(b.uint16())
- b = b[4:] // skipped start disk number and internal attributes (2x uint16)
- f.ExternalAttrs = b.uint32()
- f.headerOffset = int64(b.uint32())
- d := make([]byte, filenameLen+extraLen+commentLen)
- if _, err := io.ReadFull(r, d); err != nil {
- return err
- }
- f.Name = string(d[:filenameLen])
- f.Extra = d[filenameLen : filenameLen+extraLen]
- f.Comment = string(d[filenameLen+extraLen:])
- // Determine the character encoding.
- utf8Valid1, utf8Require1 := detectUTF8(f.Name)
- utf8Valid2, utf8Require2 := detectUTF8(f.Comment)
- switch {
- case !utf8Valid1 || !utf8Valid2:
- // Name and Comment definitely not UTF-8.
- f.NonUTF8 = true
- case !utf8Require1 && !utf8Require2:
- // Name and Comment use only single-byte runes that overlap with UTF-8.
- f.NonUTF8 = false
- default:
- // Might be UTF-8, might be some other encoding; preserve existing flag.
- // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag.
- // Since it is impossible to always distinguish valid UTF-8 from some
- // other encoding (e.g., GBK or Shift-JIS), we trust the flag.
- f.NonUTF8 = f.Flags&0x800 == 0
- }
- needUSize := f.UncompressedSize == ^uint32(0)
- needCSize := f.CompressedSize == ^uint32(0)
- needHeaderOffset := f.headerOffset == int64(^uint32(0))
- // Best effort to find what we need.
- // Other zip authors might not even follow the basic format,
- // and we'll just ignore the Extra content in that case.
- var modified time.Time
- parseExtras:
- for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size
- fieldTag := extra.uint16()
- fieldSize := int(extra.uint16())
- if len(extra) < fieldSize {
- break
- }
- fieldBuf := extra.sub(fieldSize)
- switch fieldTag {
- case zip64ExtraID:
- // update directory values from the zip64 extra block.
- // They should only be consulted if the sizes read earlier
- // are maxed out.
- // See golang.org/issue/13367.
- if needUSize {
- needUSize = false
- if len(fieldBuf) < 8 {
- return ErrFormat
- }
- f.UncompressedSize64 = fieldBuf.uint64()
- }
- if needCSize {
- needCSize = false
- if len(fieldBuf) < 8 {
- return ErrFormat
- }
- f.CompressedSize64 = fieldBuf.uint64()
- }
- if needHeaderOffset {
- needHeaderOffset = false
- if len(fieldBuf) < 8 {
- return ErrFormat
- }
- f.headerOffset = int64(fieldBuf.uint64())
- }
- case ntfsExtraID:
- if len(fieldBuf) < 4 {
- continue parseExtras
- }
- fieldBuf.uint32() // reserved (ignored)
- for len(fieldBuf) >= 4 { // need at least tag and size
- attrTag := fieldBuf.uint16()
- attrSize := int(fieldBuf.uint16())
- if len(fieldBuf) < attrSize {
- continue parseExtras
- }
- attrBuf := fieldBuf.sub(attrSize)
- if attrTag != 1 || attrSize != 24 {
- continue // Ignore irrelevant attributes
- }
- const ticksPerSecond = 1e7 // Windows timestamp resolution
- ts := int64(attrBuf.uint64()) // ModTime since Windows epoch
- secs := int64(ts / ticksPerSecond)
- nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond)
- epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
- modified = time.Unix(epoch.Unix()+secs, nsecs)
- }
- case unixExtraID, infoZipUnixExtraID:
- if len(fieldBuf) < 8 {
- continue parseExtras
- }
- fieldBuf.uint32() // AcTime (ignored)
- ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
- modified = time.Unix(ts, 0)
- case extTimeExtraID:
- if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
- continue parseExtras
- }
- ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
- modified = time.Unix(ts, 0)
- }
- }
- msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
- f.Modified = msdosModified
- if !modified.IsZero() {
- f.Modified = modified.UTC()
- // If legacy MS-DOS timestamps are set, we can use the delta between
- // the legacy and extended versions to estimate timezone offset.
- //
- // A non-UTC timezone is always used (even if offset is zero).
- // Thus, FileHeader.Modified.Location() == time.UTC is useful for
- // determining whether extended timestamps are present.
- // This is necessary for users that need to do additional time
- // calculations when dealing with legacy ZIP formats.
- if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
- f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
- }
- }
- // Assume that uncompressed size 2³²-1 could plausibly happen in
- // an old zip32 file that was sharding inputs into the largest chunks
- // possible (or is just malicious; search the web for 42.zip).
- // If needUSize is true still, it means we didn't see a zip64 extension.
- // As long as the compressed size is not also 2³²-1 (implausible)
- // and the header is not also 2³²-1 (equally implausible),
- // accept the uncompressed size 2³²-1 as valid.
- // If nothing else, this keeps archive/zip working with 42.zip.
- _ = needUSize
- if needCSize || needHeaderOffset {
- return ErrFormat
- }
- return nil
- }
- func readDataDescriptor(r io.Reader, f *File) error {
- var buf [dataDescriptorLen]byte
- // The spec says: "Although not originally assigned a
- // signature, the value 0x08074b50 has commonly been adopted
- // as a signature value for the data descriptor record.
- // Implementers should be aware that ZIP files may be
- // encountered with or without this signature marking data
- // descriptors and should account for either case when reading
- // ZIP files to ensure compatibility."
- //
- // dataDescriptorLen includes the size of the signature but
- // first read just those 4 bytes to see if it exists.
- if _, err := io.ReadFull(r, buf[:4]); err != nil {
- return err
- }
- off := 0
- maybeSig := readBuf(buf[:4])
- if maybeSig.uint32() != dataDescriptorSignature {
- // No data descriptor signature. Keep these four
- // bytes.
- off += 4
- }
- if _, err := io.ReadFull(r, buf[off:12]); err != nil {
- return err
- }
- b := readBuf(buf[:12])
- if b.uint32() != f.CRC32 {
- return ErrChecksum
- }
- // The two sizes that follow here can be either 32 bits or 64 bits
- // but the spec is not very clear on this and different
- // interpretations has been made causing incompatibilities. We
- // already have the sizes from the central directory so we can
- // just ignore these.
- return nil
- }
- func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) {
- // look for directoryEndSignature in the last 1k, then in the last 65k
- var buf []byte
- var directoryEndOffset int64
- for i, bLen := range []int64{1024, 65 * 1024} {
- if bLen > size {
- bLen = size
- }
- buf = make([]byte, int(bLen))
- if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
- return nil, err
- }
- if p := findSignatureInBlock(buf); p >= 0 {
- buf = buf[p:]
- directoryEndOffset = size - bLen + int64(p)
- break
- }
- if i == 1 || bLen == size {
- return nil, ErrFormat
- }
- }
- // read header into struct
- b := readBuf(buf[4:]) // skip signature
- d := &directoryEnd{
- diskNbr: uint32(b.uint16()),
- dirDiskNbr: uint32(b.uint16()),
- dirRecordsThisDisk: uint64(b.uint16()),
- directoryRecords: uint64(b.uint16()),
- directorySize: uint64(b.uint32()),
- directoryOffset: uint64(b.uint32()),
- commentLen: b.uint16(),
- }
- l := int(d.commentLen)
- if l > len(b) {
- return nil, errors.New("zip: invalid comment length")
- }
- d.comment = string(b[:l])
- // These values mean that the file can be a zip64 file
- if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
- p, err := findDirectory64End(r, directoryEndOffset)
- if err == nil && p >= 0 {
- err = readDirectory64End(r, p, d)
- }
- if err != nil {
- return nil, err
- }
- }
- // Make sure directoryOffset points to somewhere in our file.
- if o := int64(d.directoryOffset); o < 0 || o >= size {
- return nil, ErrFormat
- }
- return d, nil
- }
- // findDirectory64End tries to read the zip64 locator just before the
- // directory end and returns the offset of the zip64 directory end if
- // found.
- func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
- locOffset := directoryEndOffset - directory64LocLen
- if locOffset < 0 {
- return -1, nil // no need to look for a header outside the file
- }
- buf := make([]byte, directory64LocLen)
- if _, err := r.ReadAt(buf, locOffset); err != nil {
- return -1, err
- }
- b := readBuf(buf)
- if sig := b.uint32(); sig != directory64LocSignature {
- return -1, nil
- }
- if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
- return -1, nil // the file is not a valid zip64-file
- }
- p := b.uint64() // relative offset of the zip64 end of central directory record
- if b.uint32() != 1 { // total number of disks
- return -1, nil // the file is not a valid zip64-file
- }
- return int64(p), nil
- }
- // readDirectory64End reads the zip64 directory end and updates the
- // directory end with the zip64 directory end values.
- func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
- buf := make([]byte, directory64EndLen)
- if _, err := r.ReadAt(buf, offset); err != nil {
- return err
- }
- b := readBuf(buf)
- if sig := b.uint32(); sig != directory64EndSignature {
- return ErrFormat
- }
- b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
- d.diskNbr = b.uint32() // number of this disk
- d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory
- d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
- d.directoryRecords = b.uint64() // total number of entries in the central directory
- d.directorySize = b.uint64() // size of the central directory
- d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number
- return nil
- }
- func findSignatureInBlock(b []byte) int {
- for i := len(b) - directoryEndLen; i >= 0; i-- {
- // defined from directoryEndSignature in struct.go
- if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
- // n is length of comment
- n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
- if n+directoryEndLen+i <= len(b) {
- return i
- }
- }
- }
- return -1
- }
- type readBuf []byte
- func (b *readBuf) uint8() uint8 {
- v := (*b)[0]
- *b = (*b)[1:]
- return v
- }
- func (b *readBuf) uint16() uint16 {
- v := binary.LittleEndian.Uint16(*b)
- *b = (*b)[2:]
- return v
- }
- func (b *readBuf) uint32() uint32 {
- v := binary.LittleEndian.Uint32(*b)
- *b = (*b)[4:]
- return v
- }
- func (b *readBuf) uint64() uint64 {
- v := binary.LittleEndian.Uint64(*b)
- *b = (*b)[8:]
- return v
- }
- func (b *readBuf) sub(n int) readBuf {
- b2 := (*b)[:n]
- *b = (*b)[n:]
- return b2
- }
|