lib.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math"
  9. "strconv"
  10. "strings"
  11. )
  12. // XLSXReaderError is the standard error type for otherwise undefined
  13. // errors in the XSLX reading process.
  14. type XLSXReaderError struct {
  15. Err string
  16. }
  17. // String() returns a string value from an XLSXReaderError struct in
  18. // order that it might comply with the os.Error interface.
  19. func (e *XLSXReaderError) Error() string {
  20. return e.Err
  21. }
  22. // Cell is a high level structure intended to provide user access to
  23. // the contents of Cell within an xlsx.Row.
  24. type Cell struct {
  25. Value string
  26. styleIndex int
  27. styles *xlsxStyles
  28. numFmtRefTable map[int]xlsxNumFmt
  29. date1904 bool
  30. }
  31. // CellInterface defines the public API of the Cell.
  32. type CellInterface interface {
  33. String() string
  34. }
  35. // String returns the value of a Cell as a string.
  36. func (c *Cell) String() string {
  37. return c.Value
  38. }
  39. // GetStyle returns the Style associated with a Cell
  40. func (c *Cell) GetStyle() *Style {
  41. style := &Style{}
  42. if c.styleIndex > 0 && c.styleIndex <= len(c.styles.CellXfs) {
  43. xf := c.styles.CellXfs[c.styleIndex-1]
  44. if xf.ApplyBorder {
  45. var border Border
  46. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  47. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  48. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  49. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  50. style.Border = border
  51. }
  52. if xf.ApplyFill {
  53. var fill Fill
  54. fill.PatternType = c.styles.Fills[xf.FillId].PatternFill.PatternType
  55. fill.BgColor = c.styles.Fills[xf.FillId].PatternFill.BgColor.RGB
  56. fill.FgColor = c.styles.Fills[xf.FillId].PatternFill.FgColor.RGB
  57. style.Fill = fill
  58. }
  59. if xf.ApplyFont {
  60. font := c.styles.Fonts[xf.FontId]
  61. style.Font = Font{}
  62. style.Font.Size, _ = strconv.Atoi(font.Sz.Val)
  63. style.Font.Name = font.Name.Val
  64. style.Font.Family, _ = strconv.Atoi(font.Family.Val)
  65. style.Font.Charset, _ = strconv.Atoi(font.Charset.Val)
  66. }
  67. }
  68. return style
  69. }
  70. // The number format string is returnable from a cell.
  71. func (c *Cell) GetNumberFormat() string {
  72. var numberFormat string = ""
  73. if c.styleIndex > 0 && c.styleIndex <= len(c.styles.CellXfs) {
  74. xf := c.styles.CellXfs[c.styleIndex-1]
  75. numFmt := c.numFmtRefTable[xf.NumFmtId]
  76. numberFormat = numFmt.FormatCode
  77. }
  78. return strings.ToLower(numberFormat)
  79. }
  80. func (c *Cell) formatToTime(format string) string {
  81. f, err := strconv.ParseFloat(c.Value, 64)
  82. if err != nil {
  83. return err.Error()
  84. }
  85. return TimeFromExcelTime(f, c.date1904).Format(format)
  86. }
  87. func (c *Cell) formatToFloat(format string) string {
  88. f, err := strconv.ParseFloat(c.Value, 64)
  89. if err != nil {
  90. return err.Error()
  91. }
  92. return fmt.Sprintf(format, f)
  93. }
  94. func (c *Cell) formatToInt(format string) string {
  95. f, err := strconv.ParseFloat(c.Value, 64)
  96. if err != nil {
  97. return err.Error()
  98. }
  99. return fmt.Sprintf(format, int(f))
  100. }
  101. // Return the formatted version of the value.
  102. func (c *Cell) FormattedValue() string {
  103. var numberFormat string = c.GetNumberFormat()
  104. switch numberFormat {
  105. case "general":
  106. return c.Value
  107. case "0", "#,##0":
  108. return c.formatToInt("%d")
  109. case "0.00", "#,##0.00", "@":
  110. return c.formatToFloat("%.2f")
  111. case "#,##0 ;(#,##0)", "#,##0 ;[red](#,##0)":
  112. f, err := strconv.ParseFloat(c.Value, 64)
  113. if err != nil {
  114. return err.Error()
  115. }
  116. if f < 0 {
  117. i := int(math.Abs(f))
  118. return fmt.Sprintf("(%d)", i)
  119. }
  120. i := int(f)
  121. return fmt.Sprintf("%d", i)
  122. case "#,##0.00;(#,##0.00)", "#,##0.00;[red](#,##0.00)":
  123. f, err := strconv.ParseFloat(c.Value, 64)
  124. if err != nil {
  125. return err.Error()
  126. }
  127. if f < 0 {
  128. return fmt.Sprintf("(%.2f)", f)
  129. }
  130. return fmt.Sprintf("%.2f", f)
  131. case "0%":
  132. f, err := strconv.ParseFloat(c.Value, 64)
  133. if err != nil {
  134. return err.Error()
  135. }
  136. f = f * 100
  137. return fmt.Sprintf("%d%%", int(f))
  138. case "0.00%":
  139. f, err := strconv.ParseFloat(c.Value, 64)
  140. if err != nil {
  141. return err.Error()
  142. }
  143. f = f * 100
  144. return fmt.Sprintf("%.2f%%", f)
  145. case "0.00e+00", "##0.0e+0":
  146. return c.formatToFloat("%e")
  147. case "mm-dd-yy":
  148. return c.formatToTime("01-02-06")
  149. case "d-mmm-yy":
  150. return c.formatToTime("2-Jan-06")
  151. case "d-mmm":
  152. return c.formatToTime("2-Jan")
  153. case "mmm-yy":
  154. return c.formatToTime("Jan-06")
  155. case "h:mm am/pm":
  156. return c.formatToTime("3:04 pm")
  157. case "h:mm:ss am/pm":
  158. return c.formatToTime("3:04:05 pm")
  159. case "h:mm":
  160. return c.formatToTime("15:04")
  161. case "h:mm:ss":
  162. return c.formatToTime("15:04:05")
  163. case "m/d/yy h:mm":
  164. return c.formatToTime("1/2/06 15:04")
  165. case "mm:ss":
  166. return c.formatToTime("04:05")
  167. case "[h]:mm:ss":
  168. f, err := strconv.ParseFloat(c.Value, 64)
  169. if err != nil {
  170. return err.Error()
  171. }
  172. t := TimeFromExcelTime(f, c.date1904)
  173. if t.Hour() > 0 {
  174. return t.Format("15:04:05")
  175. }
  176. return t.Format("04:05")
  177. case "mmss.0":
  178. f, err := strconv.ParseFloat(c.Value, 64)
  179. if err != nil {
  180. return err.Error()
  181. }
  182. t := TimeFromExcelTime(f, c.date1904)
  183. return fmt.Sprintf("%0d%0d.%d", t.Minute(), t.Second(), t.Nanosecond()/1000)
  184. case "yyyy\\-mm\\-dd":
  185. return c.formatToTime("2006\\-01\\-02")
  186. case "dd/mm/yy":
  187. return c.formatToTime("02/01/06")
  188. case "hh:mm:ss":
  189. return c.formatToTime("15:04:05")
  190. case "dd/mm/yy\\ hh:mm":
  191. return c.formatToTime("02/01/06\\ 15:04")
  192. case "dd/mm/yyyy hh:mm:ss":
  193. return c.formatToTime("02/01/2006 15:04:05")
  194. case "yy-mm-dd":
  195. return c.formatToTime("06-01-02")
  196. case "d-mmm-yyyy":
  197. return c.formatToTime("2-Jan-2006")
  198. case "m/d/yy":
  199. return c.formatToTime("1/2/06")
  200. case "m/d/yyyy":
  201. return c.formatToTime("1/2/2006")
  202. case "dd-mmm-yyyy":
  203. return c.formatToTime("02-Jan-2006")
  204. case "dd/mm/yyyy":
  205. return c.formatToTime("02/01/2006")
  206. case "mm/dd/yy hh:mm am/pm":
  207. return c.formatToTime("01/02/06 03:04 pm")
  208. case "mm/dd/yyyy hh:mm:ss":
  209. return c.formatToTime("01/02/2006 15:04:05")
  210. case "yyyy-mm-dd hh:mm:ss":
  211. return c.formatToTime("2006-01-02 15:04:05")
  212. }
  213. return c.Value
  214. }
  215. // Row is a high level structure indended to provide user access to a
  216. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  217. type Row struct {
  218. Cells []*Cell
  219. }
  220. // Sheet is a high level structure intended to provide user access to
  221. // the contents of a particular sheet within an XLSX file.
  222. type Sheet struct {
  223. Name string
  224. Rows []*Row
  225. MaxRow int
  226. MaxCol int
  227. }
  228. // Style is a high level structure intended to provide user access to
  229. // the contents of Style within an XLSX file.
  230. type Style struct {
  231. Border Border
  232. Fill Fill
  233. Font Font
  234. }
  235. // Border is a high level structure intended to provide user access to
  236. // the contents of Border Style within an Sheet.
  237. type Border struct {
  238. Left string
  239. Right string
  240. Top string
  241. Bottom string
  242. }
  243. // Fill is a high level structure intended to provide user access to
  244. // the contents of background and foreground color index within an Sheet.
  245. type Fill struct {
  246. PatternType string
  247. BgColor string
  248. FgColor string
  249. }
  250. type Font struct {
  251. Size int
  252. Name string
  253. Family int
  254. Charset int
  255. }
  256. // File is a high level structure providing a slice of Sheet structs
  257. // to the user.
  258. type File struct {
  259. worksheets map[string]*zip.File
  260. numFmtRefTable map[int]xlsxNumFmt
  261. referenceTable []string
  262. styles *xlsxStyles
  263. Sheets []*Sheet // sheet access by index
  264. Sheet map[string]*Sheet // sheet access by name
  265. Date1904 bool
  266. }
  267. // getRangeFromString is an internal helper function that converts
  268. // XLSX internal range syntax to a pair of integers. For example,
  269. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  270. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  271. var parts []string
  272. parts = strings.SplitN(rangeString, ":", 2)
  273. if parts[0] == "" {
  274. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  275. }
  276. if parts[1] == "" {
  277. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  278. }
  279. lower, error = strconv.Atoi(parts[0])
  280. if error != nil {
  281. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  282. }
  283. upper, error = strconv.Atoi(parts[1])
  284. if error != nil {
  285. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  286. }
  287. return lower, upper, error
  288. }
  289. // lettersToNumeric is used to convert a character based column
  290. // reference to a zero based numeric column identifier.
  291. func lettersToNumeric(letters string) int {
  292. sum, mul, n := 0, 1, 0
  293. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  294. c := letters[i]
  295. switch {
  296. case 'A' <= c && c <= 'Z':
  297. n += int(c - 'A')
  298. case 'a' <= c && c <= 'z':
  299. n += int(c - 'a')
  300. }
  301. sum += n * mul
  302. }
  303. return sum
  304. }
  305. // letterOnlyMapF is used in conjunction with strings.Map to return
  306. // only the characters A-Z and a-z in a string
  307. func letterOnlyMapF(rune rune) rune {
  308. switch {
  309. case 'A' <= rune && rune <= 'Z':
  310. return rune
  311. case 'a' <= rune && rune <= 'z':
  312. return rune - 32
  313. }
  314. return -1
  315. }
  316. // intOnlyMapF is used in conjunction with strings.Map to return only
  317. // the numeric portions of a string.
  318. func intOnlyMapF(rune rune) rune {
  319. if rune >= 48 && rune < 58 {
  320. return rune
  321. }
  322. return -1
  323. }
  324. // getCoordsFromCellIDString returns the zero based cartesian
  325. // coordinates from a cell name in Excel format, e.g. the cellIDString
  326. // "A1" returns 0, 0 and the "B3" return 1, 2.
  327. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  328. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  329. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  330. if error != nil {
  331. return x, y, error
  332. }
  333. y -= 1 // Zero based
  334. x = lettersToNumeric(letterPart)
  335. return x, y, error
  336. }
  337. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  338. // and minimum coordinates from the dimension reference embedded in a
  339. // XLSX worksheet. For example, the dimension reference "A1:B2"
  340. // returns "0,0", "1,1".
  341. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  342. var parts []string
  343. parts = strings.Split(ref, ":")
  344. minx, miny, err = getCoordsFromCellIDString(parts[0])
  345. if err != nil {
  346. return -1, -1, -1, -1, err
  347. }
  348. if len(parts) == 1 {
  349. maxx, maxy = minx, miny
  350. return
  351. }
  352. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  353. if err != nil {
  354. return -1, -1, -1, -1, err
  355. }
  356. return
  357. }
  358. // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
  359. // that doesn't have a DimensionRef set. The only case currently
  360. // known where this is true is with XLSX exported from Google Docs.
  361. func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
  362. // Note, this method could be very slow for large spreadsheets.
  363. var x, y int
  364. minx = 0
  365. miny = 0
  366. maxy = 0
  367. maxx = 0
  368. for _, row := range worksheet.SheetData.Row {
  369. for _, cell := range row.C {
  370. x, y, err = getCoordsFromCellIDString(cell.R)
  371. if err != nil {
  372. return -1, -1, -1, -1, err
  373. }
  374. if x < minx {
  375. minx = x
  376. }
  377. if x > maxx {
  378. maxx = x
  379. }
  380. if y < miny {
  381. miny = y
  382. }
  383. if y > maxy {
  384. maxy = y
  385. }
  386. }
  387. }
  388. return
  389. }
  390. // makeRowFromSpan will, when given a span expressed as a string,
  391. // return an empty Row large enough to encompass that span and
  392. // populate it with empty cells. All rows start from cell 1 -
  393. // regardless of the lower bound of the span.
  394. func makeRowFromSpan(spans string) *Row {
  395. var error error
  396. var upper int
  397. var row *Row
  398. var cell *Cell
  399. row = new(Row)
  400. _, upper, error = getRangeFromString(spans)
  401. if error != nil {
  402. panic(error)
  403. }
  404. error = nil
  405. row.Cells = make([]*Cell, upper)
  406. for i := 0; i < upper; i++ {
  407. cell = new(Cell)
  408. cell.Value = ""
  409. row.Cells[i] = cell
  410. }
  411. return row
  412. }
  413. // makeRowFromRaw returns the Row representation of the xlsxRow.
  414. func makeRowFromRaw(rawrow xlsxRow) *Row {
  415. var upper int
  416. var row *Row
  417. var cell *Cell
  418. row = new(Row)
  419. upper = -1
  420. for _, rawcell := range rawrow.C {
  421. x, _, error := getCoordsFromCellIDString(rawcell.R)
  422. if error != nil {
  423. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  424. }
  425. if x > upper {
  426. upper = x
  427. }
  428. }
  429. upper++
  430. row.Cells = make([]*Cell, upper)
  431. for i := 0; i < upper; i++ {
  432. cell = new(Cell)
  433. cell.Value = ""
  434. row.Cells[i] = cell
  435. }
  436. return row
  437. }
  438. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  439. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  440. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  441. var value string = ""
  442. var data string = rawcell.V
  443. if len(data) > 0 {
  444. vval := strings.Trim(data, " \t\n\r")
  445. switch rawcell.T {
  446. case "s": // Shared String
  447. ref, error := strconv.Atoi(vval)
  448. if error != nil {
  449. panic(error)
  450. }
  451. value = reftable[ref]
  452. default:
  453. value = vval
  454. }
  455. }
  456. return value
  457. }
  458. // readRowsFromSheet is an internal helper function that extracts the
  459. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  460. // the value references from the reference table and stores them in
  461. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  462. var rows []*Row
  463. var row *Row
  464. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  465. var reftable []string
  466. var err error
  467. var insertRowIndex, insertColIndex int
  468. if len(Worksheet.SheetData.Row) == 0 {
  469. return nil, 0, 0
  470. }
  471. reftable = file.referenceTable
  472. if len(Worksheet.Dimension.Ref) > 0 {
  473. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  474. } else {
  475. minCol, minRow, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
  476. }
  477. if err != nil {
  478. panic(err.Error())
  479. }
  480. rowCount = (maxRow - minRow) + 1
  481. colCount = (maxCol - minCol) + 1
  482. rows = make([]*Row, rowCount)
  483. insertRowIndex = minRow
  484. for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ {
  485. rawrow := Worksheet.SheetData.Row[rowIndex]
  486. // Some spreadsheets will omit blank rows from the
  487. // stored data
  488. for rawrow.R > (insertRowIndex + 1) {
  489. // Put an empty Row into the array
  490. rows[insertRowIndex-minRow] = new(Row)
  491. insertRowIndex++
  492. }
  493. // range is not empty and only one range exist
  494. if len(rawrow.Spans) != 0 && strings.Count(rawrow.Spans, ":") == 1 {
  495. row = makeRowFromSpan(rawrow.Spans)
  496. } else {
  497. row = makeRowFromRaw(rawrow)
  498. }
  499. insertColIndex = minCol
  500. for _, rawcell := range rawrow.C {
  501. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  502. // Some spreadsheets will omit blank cells
  503. // from the data.
  504. for x > insertColIndex {
  505. // Put an empty Cell into the array
  506. row.Cells[insertColIndex-minCol] = new(Cell)
  507. insertColIndex++
  508. }
  509. cellX := insertColIndex - minCol
  510. row.Cells[cellX].Value = getValueFromCellData(rawcell, reftable)
  511. row.Cells[cellX].styleIndex = rawcell.S
  512. row.Cells[cellX].styles = file.styles
  513. row.Cells[cellX].numFmtRefTable = file.numFmtRefTable
  514. row.Cells[cellX].date1904 = file.Date1904
  515. insertColIndex++
  516. }
  517. rows[insertRowIndex-minRow] = row
  518. insertRowIndex++
  519. }
  520. return rows, colCount, rowCount
  521. }
  522. type indexedSheet struct {
  523. Index int
  524. Sheet *Sheet
  525. Error error
  526. }
  527. // readSheetFromFile is the logic of converting a xlsxSheet struct
  528. // into a Sheet struct. This work can be done in parallel and so
  529. // readSheetsFromZipFile will spawn an instance of this function per
  530. // sheet and get the results back on the provided channel.
  531. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) {
  532. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  533. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
  534. if error != nil {
  535. result.Error = error
  536. sc <- result
  537. return
  538. }
  539. sheet := new(Sheet)
  540. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi)
  541. result.Sheet = sheet
  542. sc <- result
  543. }
  544. // readSheetsFromZipFile is an internal helper function that loops
  545. // over the Worksheets defined in the XSLXWorkbook and loads them into
  546. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  547. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) ([]*Sheet, error) {
  548. var workbook *xlsxWorkbook
  549. var error error
  550. var rc io.ReadCloser
  551. var decoder *xml.Decoder
  552. var sheetCount int
  553. workbook = new(xlsxWorkbook)
  554. rc, error = f.Open()
  555. if error != nil {
  556. return nil, error
  557. }
  558. decoder = xml.NewDecoder(rc)
  559. error = decoder.Decode(workbook)
  560. if error != nil {
  561. return nil, error
  562. }
  563. file.Date1904 = workbook.WorkbookPr.Date1904
  564. sheetCount = len(workbook.Sheets.Sheet)
  565. sheets := make([]*Sheet, sheetCount)
  566. sheetChan := make(chan *indexedSheet, sheetCount)
  567. for i, rawsheet := range workbook.Sheets.Sheet {
  568. go readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap)
  569. }
  570. for j := 0; j < sheetCount; j++ {
  571. sheet := <-sheetChan
  572. if sheet.Error != nil {
  573. return nil, sheet.Error
  574. }
  575. sheet.Sheet.Name = workbook.Sheets.Sheet[sheet.Index].Name
  576. sheets[sheet.Index] = sheet.Sheet
  577. }
  578. return sheets, nil
  579. }
  580. // readSharedStringsFromZipFile() is an internal helper function to
  581. // extract a reference table from the sharedStrings.xml file within
  582. // the XLSX zip file.
  583. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  584. var sst *xlsxSST
  585. var error error
  586. var rc io.ReadCloser
  587. var decoder *xml.Decoder
  588. var reftable []string
  589. rc, error = f.Open()
  590. if error != nil {
  591. return nil, error
  592. }
  593. sst = new(xlsxSST)
  594. decoder = xml.NewDecoder(rc)
  595. error = decoder.Decode(sst)
  596. if error != nil {
  597. return nil, error
  598. }
  599. reftable = MakeSharedStringRefTable(sst)
  600. return reftable, nil
  601. }
  602. // readStylesFromZipFile() is an internal helper function to
  603. // extract a style table from the style.xml file within
  604. // the XLSX zip file.
  605. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  606. var style *xlsxStyles
  607. var error error
  608. var rc io.ReadCloser
  609. var decoder *xml.Decoder
  610. rc, error = f.Open()
  611. if error != nil {
  612. return nil, error
  613. }
  614. style = new(xlsxStyles)
  615. decoder = xml.NewDecoder(rc)
  616. error = decoder.Decode(style)
  617. if error != nil {
  618. return nil, error
  619. }
  620. return style, nil
  621. }
  622. func buildNumFmtRefTable(style *xlsxStyles) map[int]xlsxNumFmt {
  623. refTable := make(map[int]xlsxNumFmt)
  624. for _, numFmt := range style.NumFmts {
  625. refTable[numFmt.NumFmtId] = numFmt
  626. }
  627. return refTable
  628. }
  629. // readWorkbookRelationsFromZipFile is an internal helper function to
  630. // extract a map of relationship ID strings to the name of the
  631. // worksheet.xml file they refer to. The resulting map can be used to
  632. // reliably derefence the worksheets in the XLSX file.
  633. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (map[string]string, error) {
  634. var sheetXMLMap map[string]string
  635. var wbRelationships *xlsxWorkbookRels
  636. var rc io.ReadCloser
  637. var decoder *xml.Decoder
  638. var err error
  639. rc, err = workbookRels.Open()
  640. if err != nil {
  641. return nil, err
  642. }
  643. decoder = xml.NewDecoder(rc)
  644. wbRelationships = new(xlsxWorkbookRels)
  645. err = decoder.Decode(wbRelationships)
  646. if err != nil {
  647. return nil, err
  648. }
  649. sheetXMLMap = make(map[string]string)
  650. for _, rel := range wbRelationships.Relationships {
  651. if strings.HasSuffix(rel.Target, ".xml") && strings.HasPrefix(rel.Target, "worksheets/") {
  652. sheetXMLMap[rel.Id] = strings.Replace(rel.Target[len("worksheets/"):], ".xml", "", 1)
  653. }
  654. }
  655. return sheetXMLMap, nil
  656. }
  657. // OpenFile() take the name of an XLSX file and returns a populated
  658. // xlsx.File struct for it.
  659. func OpenFile(filename string) (*File, error) {
  660. var f *zip.ReadCloser
  661. f, err := zip.OpenReader(filename)
  662. if err != nil {
  663. return nil, err
  664. }
  665. return ReadZip(f)
  666. }
  667. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  668. // xlsx.File struct populated with its contents. In most cases
  669. // ReadZip is not used directly, but is called internally by OpenFile.
  670. func ReadZip(f *zip.ReadCloser) (*File, error) {
  671. defer f.Close()
  672. return ReadZipReader(&f.Reader)
  673. }
  674. // ReadZipReader() can be used to read xlsx in memory without touch filesystem.
  675. func ReadZipReader(r *zip.Reader) (*File, error) {
  676. var err error
  677. var file *File
  678. var reftable []string
  679. var sharedStrings *zip.File
  680. var sheetMap map[string]*Sheet
  681. var sheetXMLMap map[string]string
  682. var sheets []*Sheet
  683. var style *xlsxStyles
  684. var styles *zip.File
  685. var v *zip.File
  686. var workbook *zip.File
  687. var workbookRels *zip.File
  688. var worksheets map[string]*zip.File
  689. file = new(File)
  690. worksheets = make(map[string]*zip.File, len(r.File))
  691. for _, v = range r.File {
  692. switch v.Name {
  693. case "xl/sharedStrings.xml":
  694. sharedStrings = v
  695. case "xl/workbook.xml":
  696. workbook = v
  697. case "xl/_rels/workbook.xml.rels":
  698. workbookRels = v
  699. case "xl/styles.xml":
  700. styles = v
  701. default:
  702. if len(v.Name) > 14 {
  703. if v.Name[0:13] == "xl/worksheets" {
  704. worksheets[v.Name[14:len(v.Name)-4]] = v
  705. }
  706. }
  707. }
  708. }
  709. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  710. if err != nil {
  711. return nil, err
  712. }
  713. file.worksheets = worksheets
  714. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  715. if err != nil {
  716. return nil, err
  717. }
  718. if reftable == nil {
  719. readerErr := new(XLSXReaderError)
  720. readerErr.Err = "No valid sharedStrings.xml found in XLSX file"
  721. return nil, readerErr
  722. }
  723. file.referenceTable = reftable
  724. style, err = readStylesFromZipFile(styles)
  725. if err != nil {
  726. return nil, err
  727. }
  728. file.styles = style
  729. file.numFmtRefTable = buildNumFmtRefTable(style)
  730. sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
  731. if err != nil {
  732. return nil, err
  733. }
  734. if sheets == nil {
  735. readerErr := new(XLSXReaderError)
  736. readerErr.Err = "No sheets found in XLSX File"
  737. return nil, readerErr
  738. }
  739. file.Sheets = sheets
  740. sheetMap = make(map[string]*Sheet, len(sheets))
  741. for i := 0; i < len(sheets); i++ {
  742. sheetMap[sheets[i].Name] = sheets[i]
  743. }
  744. file.Sheet = sheetMap
  745. return file, nil
  746. }
  747. func NewFile() *File {
  748. return &File{}
  749. }