file.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. // Copyright 2013 Richard Lehane. All rights reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mscfb
  15. import (
  16. "encoding/binary"
  17. "io"
  18. "os"
  19. "time"
  20. "unicode"
  21. "unicode/utf16"
  22. "github.com/richardlehane/msoleps/types"
  23. )
  24. //objectType types
  25. const (
  26. unknown uint8 = 0x0 // this means unallocated - typically zeroed dir entries
  27. storage uint8 = 0x1 // this means dir
  28. stream uint8 = 0x2 // this means file
  29. rootStorage uint8 = 0x5 // this means root
  30. )
  31. // color flags
  32. const (
  33. red uint8 = 0x0
  34. black uint8 = 0x1
  35. )
  36. const lenDirEntry int = 64 + 4*4 + 16 + 4 + 8*2 + 4 + 8
  37. type directoryEntryFields struct {
  38. rawName [32]uint16 //64 bytes, unicode string encoded in UTF-16. If root, "Root Entry\0" w
  39. nameLength uint16 //2 bytes
  40. objectType uint8 //1 byte Must be one of the types specified above
  41. color uint8 //1 byte Must be 0x00 RED or 0x01 BLACK
  42. leftSibID uint32 //4 bytes, Dir? Stream ID of left sibling, if none set to NOSTREAM
  43. rightSibID uint32 //4 bytes, Dir? Stream ID of right sibling, if none set to NOSTREAM
  44. childID uint32 //4 bytes, Dir? Stream ID of child object, if none set to NOSTREAM
  45. clsid types.Guid // Contains an object class GUID (must be set to zeroes for stream object)
  46. stateBits [4]byte // user-defined flags for storage object
  47. create types.FileTime // Windows FILETIME structure
  48. modify types.FileTime // Windows FILETIME structure
  49. startingSectorLoc uint32 // if a stream object, first sector location. If root, first sector of ministream
  50. streamSize [8]byte // if a stream, size of user-defined data. If root, size of ministream
  51. }
  52. func makeDirEntry(b []byte) *directoryEntryFields {
  53. d := &directoryEntryFields{}
  54. for i := range d.rawName {
  55. d.rawName[i] = binary.LittleEndian.Uint16(b[i*2 : i*2+2])
  56. }
  57. d.nameLength = binary.LittleEndian.Uint16(b[64:66])
  58. d.objectType = uint8(b[66])
  59. d.color = uint8(b[67])
  60. d.leftSibID = binary.LittleEndian.Uint32(b[68:72])
  61. d.rightSibID = binary.LittleEndian.Uint32(b[72:76])
  62. d.childID = binary.LittleEndian.Uint32(b[76:80])
  63. d.clsid = types.MustGuid(b[80:96])
  64. copy(d.stateBits[:], b[96:100])
  65. d.create = types.MustFileTime(b[100:108])
  66. d.modify = types.MustFileTime(b[108:116])
  67. d.startingSectorLoc = binary.LittleEndian.Uint32(b[116:120])
  68. copy(d.streamSize[:], b[120:128])
  69. return d
  70. }
  71. func (r *Reader) setDirEntries() error {
  72. c := 20
  73. if r.header.numDirectorySectors > 0 {
  74. c = int(r.header.numDirectorySectors)
  75. }
  76. de := make([]*File, 0, c)
  77. cycles := make(map[uint32]bool)
  78. num := int(r.sectorSize / 128)
  79. sn := r.header.directorySectorLoc
  80. for sn != endOfChain {
  81. buf, err := r.readAt(fileOffset(r.sectorSize, sn), int(r.sectorSize))
  82. if err != nil {
  83. return Error{ErrRead, "directory entries read error (" + err.Error() + ")", fileOffset(r.sectorSize, sn)}
  84. }
  85. for i := 0; i < num; i++ {
  86. f := &File{r: r}
  87. f.directoryEntryFields = makeDirEntry(buf[i*128:])
  88. fixFile(r.header.majorVersion, f)
  89. f.curSector = f.startingSectorLoc
  90. de = append(de, f)
  91. }
  92. nsn, err := r.findNext(sn, false)
  93. if err != nil {
  94. return Error{ErrRead, "directory entries error finding sector (" + err.Error() + ")", int64(nsn)}
  95. }
  96. if nsn <= sn {
  97. if nsn == sn || cycles[nsn] {
  98. return Error{ErrRead, "directory entries sector cycle", int64(nsn)}
  99. }
  100. cycles[nsn] = true
  101. }
  102. sn = nsn
  103. }
  104. r.direntries = de
  105. return nil
  106. }
  107. func fixFile(v uint16, f *File) {
  108. fixName(f)
  109. if f.objectType != stream {
  110. return
  111. }
  112. // if the MSCFB major version is 4, then this can be a uint64 otherwise is a uint32 and the least signficant bits can contain junk
  113. if v > 3 {
  114. f.Size = int64(binary.LittleEndian.Uint64(f.streamSize[:]))
  115. } else {
  116. f.Size = int64(binary.LittleEndian.Uint32(f.streamSize[:4]))
  117. }
  118. }
  119. func fixName(f *File) {
  120. // From the spec:
  121. // "The length [name] MUST be a multiple of 2, and include the terminating null character in the count.
  122. // This length MUST NOT exceed 64, the maximum size of the Directory Entry Name field."
  123. if f.nameLength < 4 || f.nameLength > 64 {
  124. return
  125. }
  126. nlen := int(f.nameLength/2 - 1)
  127. f.Initial = f.rawName[0]
  128. var slen int
  129. if !unicode.IsPrint(rune(f.Initial)) {
  130. slen = 1
  131. }
  132. f.Name = string(utf16.Decode(f.rawName[slen:nlen]))
  133. }
  134. func (r *Reader) traverse() error {
  135. r.File = make([]*File, 0, len(r.direntries))
  136. var (
  137. recurse func(int, []string)
  138. err error
  139. counter int
  140. )
  141. recurse = func(i int, path []string) {
  142. // prevent cycles, number of recurse calls can't exceed number of directory entries
  143. counter++
  144. if counter > len(r.direntries) {
  145. err = Error{ErrTraverse, "traversal counter overflow", int64(i)}
  146. return
  147. }
  148. if i < 0 || i >= len(r.direntries) {
  149. err = Error{ErrTraverse, "illegal traversal index", int64(i)}
  150. return
  151. }
  152. file := r.direntries[i]
  153. if file.leftSibID != noStream {
  154. recurse(int(file.leftSibID), path)
  155. }
  156. r.File = append(r.File, file)
  157. file.Path = path
  158. if file.childID != noStream {
  159. if i > 0 {
  160. recurse(int(file.childID), append(path, file.Name))
  161. } else {
  162. recurse(int(file.childID), path)
  163. }
  164. }
  165. if file.rightSibID != noStream {
  166. recurse(int(file.rightSibID), path)
  167. }
  168. return
  169. }
  170. recurse(0, []string{})
  171. return err
  172. }
  173. // File represents a MSCFB directory entry
  174. type File struct {
  175. Name string // stream or directory name
  176. Initial uint16 // the first character in the name (identifies special streams such as MSOLEPS property sets)
  177. Path []string // file path
  178. Size int64 // size of stream
  179. i int64 // bytes read
  180. curSector uint32 // next sector for Read | Write
  181. rem int64 // offset in current sector remaining previous Read | Write
  182. *directoryEntryFields
  183. r *Reader
  184. }
  185. type fileInfo struct{ *File }
  186. func (fi fileInfo) Name() string { return fi.File.Name }
  187. func (fi fileInfo) Size() int64 {
  188. if fi.objectType != stream {
  189. return 0
  190. }
  191. return fi.File.Size
  192. }
  193. func (fi fileInfo) IsDir() bool { return fi.mode().IsDir() }
  194. func (fi fileInfo) ModTime() time.Time { return fi.Modified() }
  195. func (fi fileInfo) Mode() os.FileMode { return fi.File.mode() }
  196. func (fi fileInfo) Sys() interface{} { return nil }
  197. func (f *File) mode() os.FileMode {
  198. if f.objectType != stream {
  199. return os.ModeDir | 0777
  200. }
  201. return 0666
  202. }
  203. // FileInfo for this directory entry. Useful for IsDir() (whether a directory entry is a stream (file) or a storage object (dir))
  204. func (f *File) FileInfo() os.FileInfo {
  205. return fileInfo{f}
  206. }
  207. // ID returns this directory entry's CLSID field
  208. func (f *File) ID() string {
  209. return f.clsid.String()
  210. }
  211. // Created returns this directory entry's created field
  212. func (f *File) Created() time.Time {
  213. return f.create.Time()
  214. }
  215. // Created returns this directory entry's modified field
  216. func (f *File) Modified() time.Time {
  217. return f.modify.Time()
  218. }
  219. // Read this directory entry
  220. // Returns 0, io.EOF if no stream is available (i.e. for a storage object)
  221. func (f *File) Read(b []byte) (int, error) {
  222. if f.Size < 1 || f.i >= f.Size {
  223. return 0, io.EOF
  224. }
  225. sz := len(b)
  226. if int64(sz) > f.Size-f.i {
  227. sz = int(f.Size - f.i)
  228. }
  229. // get sectors and lengths for reads
  230. str, err := f.stream(sz)
  231. if err != nil {
  232. return 0, err
  233. }
  234. // now read
  235. var idx, i int
  236. for _, v := range str {
  237. jdx := idx + int(v[1])
  238. if jdx < idx || jdx > sz {
  239. return 0, Error{ErrRead, "bad read length", int64(jdx)}
  240. }
  241. j, err := f.r.ra.ReadAt(b[idx:jdx], v[0])
  242. i = i + j
  243. if err != nil {
  244. f.i += int64(i)
  245. return i, Error{ErrRead, "underlying reader fail (" + err.Error() + ")", int64(idx)}
  246. }
  247. idx = jdx
  248. }
  249. f.i += int64(i)
  250. if i != sz {
  251. err = Error{ErrRead, "bytes read do not match expected read size", int64(i)}
  252. } else if i < len(b) {
  253. err = io.EOF
  254. }
  255. return i, err
  256. }
  257. // Write to this directory entry
  258. // Depends on the io.ReaderAt supplied to mscfb.New() being a WriterAt too
  259. // Returns 0, io.EOF if no stream is available (i.e. for a storage object)
  260. func (f *File) Write(b []byte) (int, error) {
  261. if f.Size < 1 || f.i >= f.Size {
  262. return 0, io.EOF
  263. }
  264. if f.r.wa == nil {
  265. wa, ok := f.r.ra.(io.WriterAt)
  266. if !ok {
  267. return 0, Error{ErrWrite, "mscfb.New must be given ReaderAt convertible to a io.WriterAt in order to write", 0}
  268. }
  269. f.r.wa = wa
  270. }
  271. sz := len(b)
  272. if int64(sz) > f.Size-f.i {
  273. sz = int(f.Size - f.i)
  274. }
  275. // get sectors and lengths for writes
  276. str, err := f.stream(sz)
  277. if err != nil {
  278. return 0, err
  279. }
  280. // now read
  281. var idx, i int
  282. for _, v := range str {
  283. jdx := idx + int(v[1])
  284. if jdx < idx || jdx > sz {
  285. return 0, Error{ErrWrite, "bad write length", int64(jdx)}
  286. }
  287. j, err := f.r.wa.WriteAt(b[idx:jdx], v[0])
  288. i = i + j
  289. if err != nil {
  290. f.i += int64(i)
  291. return i, Error{ErrWrite, "underlying writer fail (" + err.Error() + ")", int64(idx)}
  292. }
  293. idx = jdx
  294. }
  295. f.i += int64(i)
  296. if i != sz {
  297. err = Error{ErrWrite, "bytes written do not match expected write size", int64(i)}
  298. } else if i < len(b) {
  299. err = io.EOF
  300. }
  301. return i, err
  302. }
  303. // ReadAt reads p bytes at offset off from start of file. Does not affect seek place for other reads/writes.
  304. func (f *File) ReadAt(p []byte, off int64) (n int, err error) {
  305. // memorize place
  306. mi, mrem, mcur := f.i, f.rem, f.curSector
  307. _, err = f.Seek(off, 0)
  308. if err == nil {
  309. n, err = f.Read(p)
  310. }
  311. f.i, f.rem, f.curSector = mi, mrem, mcur
  312. return n, err
  313. }
  314. // WriteAt reads p bytes at offset off from start of file. Does not affect seek place for other reads/writes.
  315. func (f *File) WriteAt(p []byte, off int64) (n int, err error) {
  316. // memorize place
  317. mi, mrem, mcur := f.i, f.rem, f.curSector
  318. _, err = f.Seek(off, 0)
  319. if err == nil {
  320. n, err = f.Write(p)
  321. }
  322. f.i, f.rem, f.curSector = mi, mrem, mcur
  323. return n, err
  324. }
  325. // Seek sets the offset for the next Read or Write to offset, interpreted according to whence: 0 means relative to the
  326. // start of the file, 1 means relative to the current offset, and 2 means relative to the end. Seek returns the new
  327. // offset relative to the start of the file and an error, if any.
  328. func (f *File) Seek(offset int64, whence int) (int64, error) {
  329. var abs int64
  330. switch whence {
  331. default:
  332. return 0, Error{ErrSeek, "invalid whence", int64(whence)}
  333. case 0:
  334. abs = offset
  335. case 1:
  336. abs = f.i + offset
  337. case 2:
  338. abs = f.Size - offset
  339. }
  340. switch {
  341. case abs < 0:
  342. return f.i, Error{ErrSeek, "can't seek before start of File", abs}
  343. case abs >= f.Size:
  344. return f.i, Error{ErrSeek, "can't seek past File length", abs}
  345. case abs == f.i:
  346. return abs, nil
  347. case abs > f.i:
  348. t := f.i
  349. f.i = abs
  350. return f.i, f.seek(abs - t)
  351. }
  352. if f.rem >= f.i-abs {
  353. f.rem = f.rem - (f.i - abs)
  354. f.i = abs
  355. return f.i, nil
  356. }
  357. f.rem = 0
  358. f.curSector = f.startingSectorLoc
  359. f.i = abs
  360. return f.i, f.seek(abs)
  361. }
  362. func (f *File) seek(sz int64) error {
  363. // calculate ministream and sector size
  364. var mini bool
  365. var ss int64
  366. if f.Size < miniStreamCutoffSize {
  367. mini = true
  368. ss = 64
  369. } else {
  370. ss = int64(f.r.sectorSize)
  371. }
  372. var j int64
  373. var err error
  374. // if we have a remainder in the current sector, use it first
  375. if f.rem > 0 {
  376. if ss-f.rem <= sz {
  377. f.curSector, err = f.r.findNext(f.curSector, mini)
  378. if err != nil {
  379. return err
  380. }
  381. j += ss - f.rem
  382. f.rem = 0
  383. if j == sz {
  384. return nil
  385. }
  386. } else {
  387. f.rem += sz
  388. return nil
  389. }
  390. if f.curSector == endOfChain {
  391. return Error{ErrRead, "unexpected early end of chain", int64(f.curSector)}
  392. }
  393. }
  394. for {
  395. // check if we are at the last sector
  396. if sz-j < ss {
  397. f.rem = sz - j
  398. return nil
  399. } else {
  400. j += ss
  401. f.curSector, err = f.r.findNext(f.curSector, mini)
  402. if err != nil {
  403. return err
  404. }
  405. // we might be at the last sector if there is no remainder, if so can return
  406. if j == sz {
  407. return nil
  408. }
  409. }
  410. }
  411. }
  412. // return offsets and lengths for read or write
  413. func (f *File) stream(sz int) ([][2]int64, error) {
  414. // calculate ministream, cap for sector slice, and sector size
  415. var mini bool
  416. var l int
  417. var ss int64
  418. if f.Size < miniStreamCutoffSize {
  419. mini = true
  420. l = sz/64 + 2
  421. ss = 64
  422. } else {
  423. l = sz/int(f.r.sectorSize) + 2
  424. ss = int64(f.r.sectorSize)
  425. }
  426. sectors := make([][2]int64, 0, l)
  427. var i, j int
  428. // if we have a remainder from a previous read, use it first
  429. if f.rem > 0 {
  430. offset, err := f.r.getOffset(f.curSector, mini)
  431. if err != nil {
  432. return nil, err
  433. }
  434. if ss-f.rem >= int64(sz) {
  435. sectors = append(sectors, [2]int64{offset + f.rem, int64(sz)})
  436. } else {
  437. sectors = append(sectors, [2]int64{offset + f.rem, ss - f.rem})
  438. }
  439. if ss-f.rem <= int64(sz) {
  440. f.curSector, err = f.r.findNext(f.curSector, mini)
  441. if err != nil {
  442. return nil, err
  443. }
  444. j += int(ss - f.rem)
  445. f.rem = 0
  446. } else {
  447. f.rem += int64(sz)
  448. }
  449. if sectors[0][1] == int64(sz) {
  450. return sectors, nil
  451. }
  452. if f.curSector == endOfChain {
  453. return nil, Error{ErrRead, "unexpected early end of chain", int64(f.curSector)}
  454. }
  455. i++
  456. }
  457. for {
  458. // emergency brake!
  459. if i >= cap(sectors) {
  460. return nil, Error{ErrRead, "index overruns sector length", int64(i)}
  461. }
  462. // grab the next offset
  463. offset, err := f.r.getOffset(f.curSector, mini)
  464. if err != nil {
  465. return nil, err
  466. }
  467. // check if we are at the last sector
  468. if sz-j < int(ss) {
  469. sectors = append(sectors, [2]int64{offset, int64(sz - j)})
  470. f.rem = int64(sz - j)
  471. return compressChain(sectors), nil
  472. } else {
  473. sectors = append(sectors, [2]int64{offset, ss})
  474. j += int(ss)
  475. f.curSector, err = f.r.findNext(f.curSector, mini)
  476. if err != nil {
  477. return nil, err
  478. }
  479. // we might be at the last sector if there is no remainder, if so can return
  480. if j == sz {
  481. return compressChain(sectors), nil
  482. }
  483. }
  484. i++
  485. }
  486. }
  487. func compressChain(locs [][2]int64) [][2]int64 {
  488. l := len(locs)
  489. for i, x := 0, 0; i < l && x+1 < len(locs); i++ {
  490. if locs[x][0]+locs[x][1] == locs[x+1][0] {
  491. locs[x][1] = locs[x][1] + locs[x+1][1]
  492. for j := range locs[x+1 : len(locs)-1] {
  493. locs[x+1+j] = locs[j+x+2]
  494. }
  495. locs = locs[:len(locs)-1]
  496. } else {
  497. x += 1
  498. }
  499. }
  500. return locs
  501. }