123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534 |
- // Copyright 2013 Richard Lehane. All rights reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package mscfb
- import (
- "encoding/binary"
- "io"
- "os"
- "time"
- "unicode"
- "unicode/utf16"
- "github.com/richardlehane/msoleps/types"
- )
- //objectType types
- const (
- unknown uint8 = 0x0 // this means unallocated - typically zeroed dir entries
- storage uint8 = 0x1 // this means dir
- stream uint8 = 0x2 // this means file
- rootStorage uint8 = 0x5 // this means root
- )
- // color flags
- const (
- red uint8 = 0x0
- black uint8 = 0x1
- )
- const lenDirEntry int = 64 + 4*4 + 16 + 4 + 8*2 + 4 + 8
- type directoryEntryFields struct {
- rawName [32]uint16 //64 bytes, unicode string encoded in UTF-16. If root, "Root Entry\0" w
- nameLength uint16 //2 bytes
- objectType uint8 //1 byte Must be one of the types specified above
- color uint8 //1 byte Must be 0x00 RED or 0x01 BLACK
- leftSibID uint32 //4 bytes, Dir? Stream ID of left sibling, if none set to NOSTREAM
- rightSibID uint32 //4 bytes, Dir? Stream ID of right sibling, if none set to NOSTREAM
- childID uint32 //4 bytes, Dir? Stream ID of child object, if none set to NOSTREAM
- clsid types.Guid // Contains an object class GUID (must be set to zeroes for stream object)
- stateBits [4]byte // user-defined flags for storage object
- create types.FileTime // Windows FILETIME structure
- modify types.FileTime // Windows FILETIME structure
- startingSectorLoc uint32 // if a stream object, first sector location. If root, first sector of ministream
- streamSize [8]byte // if a stream, size of user-defined data. If root, size of ministream
- }
- func makeDirEntry(b []byte) *directoryEntryFields {
- d := &directoryEntryFields{}
- for i := range d.rawName {
- d.rawName[i] = binary.LittleEndian.Uint16(b[i*2 : i*2+2])
- }
- d.nameLength = binary.LittleEndian.Uint16(b[64:66])
- d.objectType = uint8(b[66])
- d.color = uint8(b[67])
- d.leftSibID = binary.LittleEndian.Uint32(b[68:72])
- d.rightSibID = binary.LittleEndian.Uint32(b[72:76])
- d.childID = binary.LittleEndian.Uint32(b[76:80])
- d.clsid = types.MustGuid(b[80:96])
- copy(d.stateBits[:], b[96:100])
- d.create = types.MustFileTime(b[100:108])
- d.modify = types.MustFileTime(b[108:116])
- d.startingSectorLoc = binary.LittleEndian.Uint32(b[116:120])
- copy(d.streamSize[:], b[120:128])
- return d
- }
- func (r *Reader) setDirEntries() error {
- c := 20
- if r.header.numDirectorySectors > 0 {
- c = int(r.header.numDirectorySectors)
- }
- de := make([]*File, 0, c)
- cycles := make(map[uint32]bool)
- num := int(r.sectorSize / 128)
- sn := r.header.directorySectorLoc
- for sn != endOfChain {
- buf, err := r.readAt(fileOffset(r.sectorSize, sn), int(r.sectorSize))
- if err != nil {
- return Error{ErrRead, "directory entries read error (" + err.Error() + ")", fileOffset(r.sectorSize, sn)}
- }
- for i := 0; i < num; i++ {
- f := &File{r: r}
- f.directoryEntryFields = makeDirEntry(buf[i*128:])
- fixFile(r.header.majorVersion, f)
- f.curSector = f.startingSectorLoc
- de = append(de, f)
- }
- nsn, err := r.findNext(sn, false)
- if err != nil {
- return Error{ErrRead, "directory entries error finding sector (" + err.Error() + ")", int64(nsn)}
- }
- if nsn <= sn {
- if nsn == sn || cycles[nsn] {
- return Error{ErrRead, "directory entries sector cycle", int64(nsn)}
- }
- cycles[nsn] = true
- }
- sn = nsn
- }
- r.direntries = de
- return nil
- }
- func fixFile(v uint16, f *File) {
- fixName(f)
- if f.objectType != stream {
- return
- }
- // if the MSCFB major version is 4, then this can be a uint64 otherwise is a uint32 and the least signficant bits can contain junk
- if v > 3 {
- f.Size = int64(binary.LittleEndian.Uint64(f.streamSize[:]))
- } else {
- f.Size = int64(binary.LittleEndian.Uint32(f.streamSize[:4]))
- }
- }
- func fixName(f *File) {
- // From the spec:
- // "The length [name] MUST be a multiple of 2, and include the terminating null character in the count.
- // This length MUST NOT exceed 64, the maximum size of the Directory Entry Name field."
- if f.nameLength < 4 || f.nameLength > 64 {
- return
- }
- nlen := int(f.nameLength/2 - 1)
- f.Initial = f.rawName[0]
- var slen int
- if !unicode.IsPrint(rune(f.Initial)) {
- slen = 1
- }
- f.Name = string(utf16.Decode(f.rawName[slen:nlen]))
- }
- func (r *Reader) traverse() error {
- r.File = make([]*File, 0, len(r.direntries))
- var (
- recurse func(int, []string)
- err error
- counter int
- )
- recurse = func(i int, path []string) {
- // prevent cycles, number of recurse calls can't exceed number of directory entries
- counter++
- if counter > len(r.direntries) {
- err = Error{ErrTraverse, "traversal counter overflow", int64(i)}
- return
- }
- if i < 0 || i >= len(r.direntries) {
- err = Error{ErrTraverse, "illegal traversal index", int64(i)}
- return
- }
- file := r.direntries[i]
- if file.leftSibID != noStream {
- recurse(int(file.leftSibID), path)
- }
- r.File = append(r.File, file)
- file.Path = path
- if file.childID != noStream {
- if i > 0 {
- recurse(int(file.childID), append(path, file.Name))
- } else {
- recurse(int(file.childID), path)
- }
- }
- if file.rightSibID != noStream {
- recurse(int(file.rightSibID), path)
- }
- return
- }
- recurse(0, []string{})
- return err
- }
- // File represents a MSCFB directory entry
- type File struct {
- Name string // stream or directory name
- Initial uint16 // the first character in the name (identifies special streams such as MSOLEPS property sets)
- Path []string // file path
- Size int64 // size of stream
- i int64 // bytes read
- curSector uint32 // next sector for Read | Write
- rem int64 // offset in current sector remaining previous Read | Write
- *directoryEntryFields
- r *Reader
- }
- type fileInfo struct{ *File }
- func (fi fileInfo) Name() string { return fi.File.Name }
- func (fi fileInfo) Size() int64 {
- if fi.objectType != stream {
- return 0
- }
- return fi.File.Size
- }
- func (fi fileInfo) IsDir() bool { return fi.mode().IsDir() }
- func (fi fileInfo) ModTime() time.Time { return fi.Modified() }
- func (fi fileInfo) Mode() os.FileMode { return fi.File.mode() }
- func (fi fileInfo) Sys() interface{} { return nil }
- func (f *File) mode() os.FileMode {
- if f.objectType != stream {
- return os.ModeDir | 0777
- }
- return 0666
- }
- // FileInfo for this directory entry. Useful for IsDir() (whether a directory entry is a stream (file) or a storage object (dir))
- func (f *File) FileInfo() os.FileInfo {
- return fileInfo{f}
- }
- // ID returns this directory entry's CLSID field
- func (f *File) ID() string {
- return f.clsid.String()
- }
- // Created returns this directory entry's created field
- func (f *File) Created() time.Time {
- return f.create.Time()
- }
- // Created returns this directory entry's modified field
- func (f *File) Modified() time.Time {
- return f.modify.Time()
- }
- // Read this directory entry
- // Returns 0, io.EOF if no stream is available (i.e. for a storage object)
- func (f *File) Read(b []byte) (int, error) {
- if f.Size < 1 || f.i >= f.Size {
- return 0, io.EOF
- }
- sz := len(b)
- if int64(sz) > f.Size-f.i {
- sz = int(f.Size - f.i)
- }
- // get sectors and lengths for reads
- str, err := f.stream(sz)
- if err != nil {
- return 0, err
- }
- // now read
- var idx, i int
- for _, v := range str {
- jdx := idx + int(v[1])
- if jdx < idx || jdx > sz {
- return 0, Error{ErrRead, "bad read length", int64(jdx)}
- }
- j, err := f.r.ra.ReadAt(b[idx:jdx], v[0])
- i = i + j
- if err != nil {
- f.i += int64(i)
- return i, Error{ErrRead, "underlying reader fail (" + err.Error() + ")", int64(idx)}
- }
- idx = jdx
- }
- f.i += int64(i)
- if i != sz {
- err = Error{ErrRead, "bytes read do not match expected read size", int64(i)}
- } else if i < len(b) {
- err = io.EOF
- }
- return i, err
- }
- // Write to this directory entry
- // Depends on the io.ReaderAt supplied to mscfb.New() being a WriterAt too
- // Returns 0, io.EOF if no stream is available (i.e. for a storage object)
- func (f *File) Write(b []byte) (int, error) {
- if f.Size < 1 || f.i >= f.Size {
- return 0, io.EOF
- }
- if f.r.wa == nil {
- wa, ok := f.r.ra.(io.WriterAt)
- if !ok {
- return 0, Error{ErrWrite, "mscfb.New must be given ReaderAt convertible to a io.WriterAt in order to write", 0}
- }
- f.r.wa = wa
- }
- sz := len(b)
- if int64(sz) > f.Size-f.i {
- sz = int(f.Size - f.i)
- }
- // get sectors and lengths for writes
- str, err := f.stream(sz)
- if err != nil {
- return 0, err
- }
- // now read
- var idx, i int
- for _, v := range str {
- jdx := idx + int(v[1])
- if jdx < idx || jdx > sz {
- return 0, Error{ErrWrite, "bad write length", int64(jdx)}
- }
- j, err := f.r.wa.WriteAt(b[idx:jdx], v[0])
- i = i + j
- if err != nil {
- f.i += int64(i)
- return i, Error{ErrWrite, "underlying writer fail (" + err.Error() + ")", int64(idx)}
- }
- idx = jdx
- }
- f.i += int64(i)
- if i != sz {
- err = Error{ErrWrite, "bytes written do not match expected write size", int64(i)}
- } else if i < len(b) {
- err = io.EOF
- }
- return i, err
- }
- // ReadAt reads p bytes at offset off from start of file. Does not affect seek place for other reads/writes.
- func (f *File) ReadAt(p []byte, off int64) (n int, err error) {
- // memorize place
- mi, mrem, mcur := f.i, f.rem, f.curSector
- _, err = f.Seek(off, 0)
- if err == nil {
- n, err = f.Read(p)
- }
- f.i, f.rem, f.curSector = mi, mrem, mcur
- return n, err
- }
- // WriteAt reads p bytes at offset off from start of file. Does not affect seek place for other reads/writes.
- func (f *File) WriteAt(p []byte, off int64) (n int, err error) {
- // memorize place
- mi, mrem, mcur := f.i, f.rem, f.curSector
- _, err = f.Seek(off, 0)
- if err == nil {
- n, err = f.Write(p)
- }
- f.i, f.rem, f.curSector = mi, mrem, mcur
- return n, err
- }
- // Seek sets the offset for the next Read or Write to offset, interpreted according to whence: 0 means relative to the
- // start of the file, 1 means relative to the current offset, and 2 means relative to the end. Seek returns the new
- // offset relative to the start of the file and an error, if any.
- func (f *File) Seek(offset int64, whence int) (int64, error) {
- var abs int64
- switch whence {
- default:
- return 0, Error{ErrSeek, "invalid whence", int64(whence)}
- case 0:
- abs = offset
- case 1:
- abs = f.i + offset
- case 2:
- abs = f.Size - offset
- }
- switch {
- case abs < 0:
- return f.i, Error{ErrSeek, "can't seek before start of File", abs}
- case abs >= f.Size:
- return f.i, Error{ErrSeek, "can't seek past File length", abs}
- case abs == f.i:
- return abs, nil
- case abs > f.i:
- t := f.i
- f.i = abs
- return f.i, f.seek(abs - t)
- }
- if f.rem >= f.i-abs {
- f.rem = f.rem - (f.i - abs)
- f.i = abs
- return f.i, nil
- }
- f.rem = 0
- f.curSector = f.startingSectorLoc
- f.i = abs
- return f.i, f.seek(abs)
- }
- func (f *File) seek(sz int64) error {
- // calculate ministream and sector size
- var mini bool
- var ss int64
- if f.Size < miniStreamCutoffSize {
- mini = true
- ss = 64
- } else {
- ss = int64(f.r.sectorSize)
- }
- var j int64
- var err error
- // if we have a remainder in the current sector, use it first
- if f.rem > 0 {
- if ss-f.rem <= sz {
- f.curSector, err = f.r.findNext(f.curSector, mini)
- if err != nil {
- return err
- }
- j += ss - f.rem
- f.rem = 0
- if j == sz {
- return nil
- }
- } else {
- f.rem += sz
- return nil
- }
- if f.curSector == endOfChain {
- return Error{ErrRead, "unexpected early end of chain", int64(f.curSector)}
- }
- }
- for {
- // check if we are at the last sector
- if sz-j < ss {
- f.rem = sz - j
- return nil
- } else {
- j += ss
- f.curSector, err = f.r.findNext(f.curSector, mini)
- if err != nil {
- return err
- }
- // we might be at the last sector if there is no remainder, if so can return
- if j == sz {
- return nil
- }
- }
- }
- }
- // return offsets and lengths for read or write
- func (f *File) stream(sz int) ([][2]int64, error) {
- // calculate ministream, cap for sector slice, and sector size
- var mini bool
- var l int
- var ss int64
- if f.Size < miniStreamCutoffSize {
- mini = true
- l = sz/64 + 2
- ss = 64
- } else {
- l = sz/int(f.r.sectorSize) + 2
- ss = int64(f.r.sectorSize)
- }
- sectors := make([][2]int64, 0, l)
- var i, j int
- // if we have a remainder from a previous read, use it first
- if f.rem > 0 {
- offset, err := f.r.getOffset(f.curSector, mini)
- if err != nil {
- return nil, err
- }
- if ss-f.rem >= int64(sz) {
- sectors = append(sectors, [2]int64{offset + f.rem, int64(sz)})
- } else {
- sectors = append(sectors, [2]int64{offset + f.rem, ss - f.rem})
- }
- if ss-f.rem <= int64(sz) {
- f.curSector, err = f.r.findNext(f.curSector, mini)
- if err != nil {
- return nil, err
- }
- j += int(ss - f.rem)
- f.rem = 0
- } else {
- f.rem += int64(sz)
- }
- if sectors[0][1] == int64(sz) {
- return sectors, nil
- }
- if f.curSector == endOfChain {
- return nil, Error{ErrRead, "unexpected early end of chain", int64(f.curSector)}
- }
- i++
- }
- for {
- // emergency brake!
- if i >= cap(sectors) {
- return nil, Error{ErrRead, "index overruns sector length", int64(i)}
- }
- // grab the next offset
- offset, err := f.r.getOffset(f.curSector, mini)
- if err != nil {
- return nil, err
- }
- // check if we are at the last sector
- if sz-j < int(ss) {
- sectors = append(sectors, [2]int64{offset, int64(sz - j)})
- f.rem = int64(sz - j)
- return compressChain(sectors), nil
- } else {
- sectors = append(sectors, [2]int64{offset, ss})
- j += int(ss)
- f.curSector, err = f.r.findNext(f.curSector, mini)
- if err != nil {
- return nil, err
- }
- // we might be at the last sector if there is no remainder, if so can return
- if j == sz {
- return compressChain(sectors), nil
- }
- }
- i++
- }
- }
- func compressChain(locs [][2]int64) [][2]int64 {
- l := len(locs)
- for i, x := 0, 0; i < l && x+1 < len(locs); i++ {
- if locs[x][0]+locs[x][1] == locs[x+1][0] {
- locs[x][1] = locs[x][1] + locs[x+1][1]
- for j := range locs[x+1 : len(locs)-1] {
- locs[x+1+j] = locs[j+x+2]
- }
- locs = locs[:len(locs)-1]
- } else {
- x += 1
- }
- }
- return locs
- }
|