lib.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math"
  9. "path"
  10. "strconv"
  11. "strings"
  12. )
  13. // XLSXReaderError is the standard error type for otherwise undefined
  14. // errors in the XSLX reading process.
  15. type XLSXReaderError struct {
  16. Err string
  17. }
  18. // String() returns a string value from an XLSXReaderError struct in
  19. // order that it might comply with the os.Error interface.
  20. func (e *XLSXReaderError) Error() string {
  21. return e.Err
  22. }
  23. // Cell is a high level structure intended to provide user access to
  24. // the contents of Cell within an xlsx.Row.
  25. type Cell struct {
  26. Value string
  27. styleIndex int
  28. styles *xlsxStyles
  29. numFmtRefTable map[int]xlsxNumFmt
  30. date1904 bool
  31. }
  32. // CellInterface defines the public API of the Cell.
  33. type CellInterface interface {
  34. String() string
  35. }
  36. // String returns the value of a Cell as a string.
  37. func (c *Cell) String() string {
  38. return c.Value
  39. }
  40. // GetStyle returns the Style associated with a Cell
  41. func (c *Cell) GetStyle() *Style {
  42. style := &Style{}
  43. if c.styleIndex >= 0 && c.styleIndex < len(c.styles.CellXfs) {
  44. xf := c.styles.CellXfs[c.styleIndex]
  45. if xf.BorderId >= 0 && xf.BorderId < len(c.styles.Borders) {
  46. var border Border
  47. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  48. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  49. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  50. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  51. style.Border = border
  52. }
  53. if xf.FillId >= 0 && xf.FillId < len(c.styles.Fills) {
  54. var fill Fill
  55. fill.PatternType = c.styles.Fills[xf.FillId].PatternFill.PatternType
  56. fill.BgColor = c.styles.Fills[xf.FillId].PatternFill.BgColor.RGB
  57. fill.FgColor = c.styles.Fills[xf.FillId].PatternFill.FgColor.RGB
  58. style.Fill = fill
  59. }
  60. if xf.FontId >= 0 && xf.FontId < len(c.styles.Fonts) {
  61. font := c.styles.Fonts[xf.FontId]
  62. style.Font = Font{}
  63. style.Font.Size, _ = strconv.Atoi(font.Sz.Val)
  64. style.Font.Name = font.Name.Val
  65. style.Font.Family, _ = strconv.Atoi(font.Family.Val)
  66. style.Font.Charset, _ = strconv.Atoi(font.Charset.Val)
  67. }
  68. }
  69. return style
  70. }
  71. // The number format string is returnable from a cell.
  72. func (c *Cell) GetNumberFormat() string {
  73. var numberFormat string = ""
  74. if c.styleIndex > 0 && c.styleIndex <= len(c.styles.CellXfs) {
  75. xf := c.styles.CellXfs[c.styleIndex-1]
  76. numFmt := c.numFmtRefTable[xf.NumFmtId]
  77. numberFormat = numFmt.FormatCode
  78. }
  79. return strings.ToLower(numberFormat)
  80. }
  81. func (c *Cell) formatToTime(format string) string {
  82. f, err := strconv.ParseFloat(c.Value, 64)
  83. if err != nil {
  84. return err.Error()
  85. }
  86. return TimeFromExcelTime(f, c.date1904).Format(format)
  87. }
  88. func (c *Cell) formatToFloat(format string) string {
  89. f, err := strconv.ParseFloat(c.Value, 64)
  90. if err != nil {
  91. return err.Error()
  92. }
  93. return fmt.Sprintf(format, f)
  94. }
  95. func (c *Cell) formatToInt(format string) string {
  96. f, err := strconv.ParseFloat(c.Value, 64)
  97. if err != nil {
  98. return err.Error()
  99. }
  100. return fmt.Sprintf(format, int(f))
  101. }
  102. // Return the formatted version of the value.
  103. func (c *Cell) FormattedValue() string {
  104. var numberFormat string = c.GetNumberFormat()
  105. switch numberFormat {
  106. case "general":
  107. return c.Value
  108. case "0", "#,##0":
  109. return c.formatToInt("%d")
  110. case "0.00", "#,##0.00", "@":
  111. return c.formatToFloat("%.2f")
  112. case "#,##0 ;(#,##0)", "#,##0 ;[red](#,##0)":
  113. f, err := strconv.ParseFloat(c.Value, 64)
  114. if err != nil {
  115. return err.Error()
  116. }
  117. if f < 0 {
  118. i := int(math.Abs(f))
  119. return fmt.Sprintf("(%d)", i)
  120. }
  121. i := int(f)
  122. return fmt.Sprintf("%d", i)
  123. case "#,##0.00;(#,##0.00)", "#,##0.00;[red](#,##0.00)":
  124. f, err := strconv.ParseFloat(c.Value, 64)
  125. if err != nil {
  126. return err.Error()
  127. }
  128. if f < 0 {
  129. return fmt.Sprintf("(%.2f)", f)
  130. }
  131. return fmt.Sprintf("%.2f", f)
  132. case "0%":
  133. f, err := strconv.ParseFloat(c.Value, 64)
  134. if err != nil {
  135. return err.Error()
  136. }
  137. f = f * 100
  138. return fmt.Sprintf("%d%%", int(f))
  139. case "0.00%":
  140. f, err := strconv.ParseFloat(c.Value, 64)
  141. if err != nil {
  142. return err.Error()
  143. }
  144. f = f * 100
  145. return fmt.Sprintf("%.2f%%", f)
  146. case "0.00e+00", "##0.0e+0":
  147. return c.formatToFloat("%e")
  148. case "mm-dd-yy":
  149. return c.formatToTime("01-02-06")
  150. case "d-mmm-yy":
  151. return c.formatToTime("2-Jan-06")
  152. case "d-mmm":
  153. return c.formatToTime("2-Jan")
  154. case "mmm-yy":
  155. return c.formatToTime("Jan-06")
  156. case "h:mm am/pm":
  157. return c.formatToTime("3:04 pm")
  158. case "h:mm:ss am/pm":
  159. return c.formatToTime("3:04:05 pm")
  160. case "h:mm":
  161. return c.formatToTime("15:04")
  162. case "h:mm:ss":
  163. return c.formatToTime("15:04:05")
  164. case "m/d/yy h:mm":
  165. return c.formatToTime("1/2/06 15:04")
  166. case "mm:ss":
  167. return c.formatToTime("04:05")
  168. case "[h]:mm:ss":
  169. f, err := strconv.ParseFloat(c.Value, 64)
  170. if err != nil {
  171. return err.Error()
  172. }
  173. t := TimeFromExcelTime(f, c.date1904)
  174. if t.Hour() > 0 {
  175. return t.Format("15:04:05")
  176. }
  177. return t.Format("04:05")
  178. case "mmss.0":
  179. f, err := strconv.ParseFloat(c.Value, 64)
  180. if err != nil {
  181. return err.Error()
  182. }
  183. t := TimeFromExcelTime(f, c.date1904)
  184. return fmt.Sprintf("%0d%0d.%d", t.Minute(), t.Second(), t.Nanosecond()/1000)
  185. case "yyyy\\-mm\\-dd":
  186. return c.formatToTime("2006\\-01\\-02")
  187. case "dd/mm/yy":
  188. return c.formatToTime("02/01/06")
  189. case "hh:mm:ss":
  190. return c.formatToTime("15:04:05")
  191. case "dd/mm/yy\\ hh:mm":
  192. return c.formatToTime("02/01/06\\ 15:04")
  193. case "dd/mm/yyyy hh:mm:ss":
  194. return c.formatToTime("02/01/2006 15:04:05")
  195. case "yy-mm-dd":
  196. return c.formatToTime("06-01-02")
  197. case "d-mmm-yyyy":
  198. return c.formatToTime("2-Jan-2006")
  199. case "m/d/yy":
  200. return c.formatToTime("1/2/06")
  201. case "m/d/yyyy":
  202. return c.formatToTime("1/2/2006")
  203. case "dd-mmm-yyyy":
  204. return c.formatToTime("02-Jan-2006")
  205. case "dd/mm/yyyy":
  206. return c.formatToTime("02/01/2006")
  207. case "mm/dd/yy hh:mm am/pm":
  208. return c.formatToTime("01/02/06 03:04 pm")
  209. case "mm/dd/yyyy hh:mm:ss":
  210. return c.formatToTime("01/02/2006 15:04:05")
  211. case "yyyy-mm-dd hh:mm:ss":
  212. return c.formatToTime("2006-01-02 15:04:05")
  213. }
  214. return c.Value
  215. }
  216. // Row is a high level structure indended to provide user access to a
  217. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  218. type Row struct {
  219. Cells []*Cell
  220. }
  221. // Sheet is a high level structure intended to provide user access to
  222. // the contents of a particular sheet within an XLSX file.
  223. type Sheet struct {
  224. Name string
  225. Rows []*Row
  226. MaxRow int
  227. MaxCol int
  228. }
  229. // Style is a high level structure intended to provide user access to
  230. // the contents of Style within an XLSX file.
  231. type Style struct {
  232. Border Border
  233. Fill Fill
  234. Font Font
  235. }
  236. // Border is a high level structure intended to provide user access to
  237. // the contents of Border Style within an Sheet.
  238. type Border struct {
  239. Left string
  240. Right string
  241. Top string
  242. Bottom string
  243. }
  244. // Fill is a high level structure intended to provide user access to
  245. // the contents of background and foreground color index within an Sheet.
  246. type Fill struct {
  247. PatternType string
  248. BgColor string
  249. FgColor string
  250. }
  251. type Font struct {
  252. Size int
  253. Name string
  254. Family int
  255. Charset int
  256. }
  257. // File is a high level structure providing a slice of Sheet structs
  258. // to the user.
  259. type File struct {
  260. worksheets map[string]*zip.File
  261. numFmtRefTable map[int]xlsxNumFmt
  262. referenceTable []string
  263. styles *xlsxStyles
  264. Sheets []*Sheet // sheet access by index
  265. Sheet map[string]*Sheet // sheet access by name
  266. Date1904 bool
  267. }
  268. // getRangeFromString is an internal helper function that converts
  269. // XLSX internal range syntax to a pair of integers. For example,
  270. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  271. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  272. var parts []string
  273. parts = strings.SplitN(rangeString, ":", 2)
  274. if parts[0] == "" {
  275. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  276. }
  277. if parts[1] == "" {
  278. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  279. }
  280. lower, error = strconv.Atoi(parts[0])
  281. if error != nil {
  282. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  283. }
  284. upper, error = strconv.Atoi(parts[1])
  285. if error != nil {
  286. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  287. }
  288. return lower, upper, error
  289. }
  290. // lettersToNumeric is used to convert a character based column
  291. // reference to a zero based numeric column identifier.
  292. func lettersToNumeric(letters string) int {
  293. sum, mul, n := 0, 1, 0
  294. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  295. c := letters[i]
  296. switch {
  297. case 'A' <= c && c <= 'Z':
  298. n += int(c - 'A')
  299. case 'a' <= c && c <= 'z':
  300. n += int(c - 'a')
  301. }
  302. sum += n * mul
  303. }
  304. return sum
  305. }
  306. // letterOnlyMapF is used in conjunction with strings.Map to return
  307. // only the characters A-Z and a-z in a string
  308. func letterOnlyMapF(rune rune) rune {
  309. switch {
  310. case 'A' <= rune && rune <= 'Z':
  311. return rune
  312. case 'a' <= rune && rune <= 'z':
  313. return rune - 32
  314. }
  315. return -1
  316. }
  317. // intOnlyMapF is used in conjunction with strings.Map to return only
  318. // the numeric portions of a string.
  319. func intOnlyMapF(rune rune) rune {
  320. if rune >= 48 && rune < 58 {
  321. return rune
  322. }
  323. return -1
  324. }
  325. // getCoordsFromCellIDString returns the zero based cartesian
  326. // coordinates from a cell name in Excel format, e.g. the cellIDString
  327. // "A1" returns 0, 0 and the "B3" return 1, 2.
  328. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  329. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  330. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  331. if error != nil {
  332. return x, y, error
  333. }
  334. y -= 1 // Zero based
  335. x = lettersToNumeric(letterPart)
  336. return x, y, error
  337. }
  338. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  339. // and minimum coordinates from the dimension reference embedded in a
  340. // XLSX worksheet. For example, the dimension reference "A1:B2"
  341. // returns "0,0", "1,1".
  342. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  343. var parts []string
  344. parts = strings.Split(ref, ":")
  345. minx, miny, err = getCoordsFromCellIDString(parts[0])
  346. if err != nil {
  347. return -1, -1, -1, -1, err
  348. }
  349. if len(parts) == 1 {
  350. maxx, maxy = minx, miny
  351. return
  352. }
  353. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  354. if err != nil {
  355. return -1, -1, -1, -1, err
  356. }
  357. return
  358. }
  359. // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
  360. // that doesn't have a DimensionRef set. The only case currently
  361. // known where this is true is with XLSX exported from Google Docs.
  362. func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
  363. // Note, this method could be very slow for large spreadsheets.
  364. var x, y int
  365. minx = math.MaxInt64
  366. miny = math.MaxInt64
  367. maxy = 0
  368. maxx = 0
  369. for _, row := range worksheet.SheetData.Row {
  370. for _, cell := range row.C {
  371. x, y, err = getCoordsFromCellIDString(cell.R)
  372. if err != nil {
  373. return -1, -1, -1, -1, err
  374. }
  375. if x < minx {
  376. minx = x
  377. }
  378. if x > maxx {
  379. maxx = x
  380. }
  381. if y < miny {
  382. miny = y
  383. }
  384. if y > maxy {
  385. maxy = y
  386. }
  387. }
  388. }
  389. return
  390. }
  391. // makeRowFromSpan will, when given a span expressed as a string,
  392. // return an empty Row large enough to encompass that span and
  393. // populate it with empty cells. All rows start from cell 1 -
  394. // regardless of the lower bound of the span.
  395. func makeRowFromSpan(spans string) *Row {
  396. var error error
  397. var upper int
  398. var row *Row
  399. var cell *Cell
  400. row = new(Row)
  401. _, upper, error = getRangeFromString(spans)
  402. if error != nil {
  403. panic(error)
  404. }
  405. error = nil
  406. row.Cells = make([]*Cell, upper)
  407. for i := 0; i < upper; i++ {
  408. cell = new(Cell)
  409. cell.Value = ""
  410. row.Cells[i] = cell
  411. }
  412. return row
  413. }
  414. // makeRowFromRaw returns the Row representation of the xlsxRow.
  415. func makeRowFromRaw(rawrow xlsxRow) *Row {
  416. var upper int
  417. var row *Row
  418. var cell *Cell
  419. row = new(Row)
  420. upper = -1
  421. for _, rawcell := range rawrow.C {
  422. x, _, error := getCoordsFromCellIDString(rawcell.R)
  423. if error != nil {
  424. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  425. }
  426. if x > upper {
  427. upper = x
  428. }
  429. }
  430. upper++
  431. row.Cells = make([]*Cell, upper)
  432. for i := 0; i < upper; i++ {
  433. cell = new(Cell)
  434. cell.Value = ""
  435. row.Cells[i] = cell
  436. }
  437. return row
  438. }
  439. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  440. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  441. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  442. var value string = ""
  443. var data string = rawcell.V
  444. if len(data) > 0 {
  445. vval := strings.Trim(data, " \t\n\r")
  446. switch rawcell.T {
  447. case "s": // Shared String
  448. ref, error := strconv.Atoi(vval)
  449. if error != nil {
  450. panic(error)
  451. }
  452. value = reftable[ref]
  453. default:
  454. value = vval
  455. }
  456. }
  457. return value
  458. }
  459. // readRowsFromSheet is an internal helper function that extracts the
  460. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  461. // the value references from the reference table and stores them in
  462. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  463. var rows []*Row
  464. var row *Row
  465. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  466. var reftable []string
  467. var err error
  468. var insertRowIndex, insertColIndex int
  469. if len(Worksheet.SheetData.Row) == 0 {
  470. return nil, 0, 0
  471. }
  472. reftable = file.referenceTable
  473. if len(Worksheet.Dimension.Ref) > 0 {
  474. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  475. } else {
  476. minCol, minRow, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
  477. }
  478. if err != nil {
  479. panic(err.Error())
  480. }
  481. rowCount = (maxRow - minRow) + 1
  482. colCount = (maxCol - minCol) + 1
  483. rows = make([]*Row, rowCount)
  484. insertRowIndex = minRow
  485. for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ {
  486. rawrow := Worksheet.SheetData.Row[rowIndex]
  487. // Some spreadsheets will omit blank rows from the
  488. // stored data
  489. for rawrow.R > (insertRowIndex + 1) {
  490. // Put an empty Row into the array
  491. rows[insertRowIndex-minRow] = new(Row)
  492. insertRowIndex++
  493. }
  494. // range is not empty and only one range exist
  495. if len(rawrow.Spans) != 0 && strings.Count(rawrow.Spans, ":") == 1 {
  496. row = makeRowFromSpan(rawrow.Spans)
  497. } else {
  498. row = makeRowFromRaw(rawrow)
  499. }
  500. insertColIndex = minCol
  501. for _, rawcell := range rawrow.C {
  502. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  503. // Some spreadsheets will omit blank cells
  504. // from the data.
  505. for x > insertColIndex {
  506. // Put an empty Cell into the array
  507. row.Cells[insertColIndex-minCol] = new(Cell)
  508. insertColIndex++
  509. }
  510. cellX := insertColIndex - minCol
  511. row.Cells[cellX].Value = getValueFromCellData(rawcell, reftable)
  512. row.Cells[cellX].styleIndex = rawcell.S
  513. row.Cells[cellX].styles = file.styles
  514. row.Cells[cellX].numFmtRefTable = file.numFmtRefTable
  515. row.Cells[cellX].date1904 = file.Date1904
  516. insertColIndex++
  517. }
  518. rows[insertRowIndex-minRow] = row
  519. insertRowIndex++
  520. }
  521. return rows, colCount, rowCount
  522. }
  523. type indexedSheet struct {
  524. Index int
  525. Sheet *Sheet
  526. Error error
  527. }
  528. // readSheetFromFile is the logic of converting a xlsxSheet struct
  529. // into a Sheet struct. This work can be done in parallel and so
  530. // readSheetsFromZipFile will spawn an instance of this function per
  531. // sheet and get the results back on the provided channel.
  532. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) {
  533. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  534. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
  535. if error != nil {
  536. result.Error = error
  537. sc <- result
  538. return
  539. }
  540. sheet := new(Sheet)
  541. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi)
  542. result.Sheet = sheet
  543. sc <- result
  544. }
  545. // readSheetsFromZipFile is an internal helper function that loops
  546. // over the Worksheets defined in the XSLXWorkbook and loads them into
  547. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  548. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) ([]*Sheet, error) {
  549. var workbook *xlsxWorkbook
  550. var error error
  551. var rc io.ReadCloser
  552. var decoder *xml.Decoder
  553. var sheetCount int
  554. workbook = new(xlsxWorkbook)
  555. rc, error = f.Open()
  556. if error != nil {
  557. return nil, error
  558. }
  559. decoder = xml.NewDecoder(rc)
  560. error = decoder.Decode(workbook)
  561. if error != nil {
  562. return nil, error
  563. }
  564. file.Date1904 = workbook.WorkbookPr.Date1904
  565. sheetCount = len(workbook.Sheets.Sheet)
  566. sheets := make([]*Sheet, sheetCount)
  567. sheetChan := make(chan *indexedSheet, sheetCount)
  568. for i, rawsheet := range workbook.Sheets.Sheet {
  569. go readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap)
  570. }
  571. for j := 0; j < sheetCount; j++ {
  572. sheet := <-sheetChan
  573. if sheet.Error != nil {
  574. return nil, sheet.Error
  575. }
  576. sheet.Sheet.Name = workbook.Sheets.Sheet[sheet.Index].Name
  577. sheets[sheet.Index] = sheet.Sheet
  578. }
  579. return sheets, nil
  580. }
  581. // readSharedStringsFromZipFile() is an internal helper function to
  582. // extract a reference table from the sharedStrings.xml file within
  583. // the XLSX zip file.
  584. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  585. var sst *xlsxSST
  586. var error error
  587. var rc io.ReadCloser
  588. var decoder *xml.Decoder
  589. var reftable []string
  590. // In a file with no strings it's possible that
  591. // sharedStrings.xml doesn't exist. In this case the value
  592. // passed as f will be nil.
  593. if f == nil {
  594. reftable = make([]string, 0)
  595. return reftable, nil
  596. }
  597. rc, error = f.Open()
  598. if error != nil {
  599. return nil, error
  600. }
  601. sst = new(xlsxSST)
  602. decoder = xml.NewDecoder(rc)
  603. error = decoder.Decode(sst)
  604. if error != nil {
  605. return nil, error
  606. }
  607. reftable = MakeSharedStringRefTable(sst)
  608. return reftable, nil
  609. }
  610. // readStylesFromZipFile() is an internal helper function to
  611. // extract a style table from the style.xml file within
  612. // the XLSX zip file.
  613. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  614. var style *xlsxStyles
  615. var error error
  616. var rc io.ReadCloser
  617. var decoder *xml.Decoder
  618. rc, error = f.Open()
  619. if error != nil {
  620. return nil, error
  621. }
  622. style = new(xlsxStyles)
  623. decoder = xml.NewDecoder(rc)
  624. error = decoder.Decode(style)
  625. if error != nil {
  626. return nil, error
  627. }
  628. return style, nil
  629. }
  630. func buildNumFmtRefTable(style *xlsxStyles) map[int]xlsxNumFmt {
  631. refTable := make(map[int]xlsxNumFmt)
  632. for _, numFmt := range style.NumFmts {
  633. refTable[numFmt.NumFmtId] = numFmt
  634. }
  635. return refTable
  636. }
  637. // readWorkbookRelationsFromZipFile is an internal helper function to
  638. // extract a map of relationship ID strings to the name of the
  639. // worksheet.xml file they refer to. The resulting map can be used to
  640. // reliably derefence the worksheets in the XLSX file.
  641. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (map[string]string, error) {
  642. var sheetXMLMap map[string]string
  643. var wbRelationships *xlsxWorkbookRels
  644. var rc io.ReadCloser
  645. var decoder *xml.Decoder
  646. var err error
  647. rc, err = workbookRels.Open()
  648. if err != nil {
  649. return nil, err
  650. }
  651. decoder = xml.NewDecoder(rc)
  652. wbRelationships = new(xlsxWorkbookRels)
  653. err = decoder.Decode(wbRelationships)
  654. if err != nil {
  655. return nil, err
  656. }
  657. sheetXMLMap = make(map[string]string)
  658. for _, rel := range wbRelationships.Relationships {
  659. if strings.HasSuffix(rel.Target, ".xml") && rel.Type == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" {
  660. _, filename := path.Split(rel.Target)
  661. sheetXMLMap[rel.Id] = strings.Replace(filename, ".xml", "", 1)
  662. }
  663. }
  664. return sheetXMLMap, nil
  665. }
  666. // OpenFile() take the name of an XLSX file and returns a populated
  667. // xlsx.File struct for it.
  668. func OpenFile(filename string) (*File, error) {
  669. var f *zip.ReadCloser
  670. f, err := zip.OpenReader(filename)
  671. if err != nil {
  672. return nil, err
  673. }
  674. return ReadZip(f)
  675. }
  676. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  677. // xlsx.File struct populated with its contents. In most cases
  678. // ReadZip is not used directly, but is called internally by OpenFile.
  679. func ReadZip(f *zip.ReadCloser) (*File, error) {
  680. defer f.Close()
  681. return ReadZipReader(&f.Reader)
  682. }
  683. // ReadZipReader() can be used to read xlsx in memory without touch filesystem.
  684. func ReadZipReader(r *zip.Reader) (*File, error) {
  685. var err error
  686. var file *File
  687. var reftable []string
  688. var sharedStrings *zip.File
  689. var sheetMap map[string]*Sheet
  690. var sheetXMLMap map[string]string
  691. var sheets []*Sheet
  692. var style *xlsxStyles
  693. var styles *zip.File
  694. var v *zip.File
  695. var workbook *zip.File
  696. var workbookRels *zip.File
  697. var worksheets map[string]*zip.File
  698. file = new(File)
  699. worksheets = make(map[string]*zip.File, len(r.File))
  700. for _, v = range r.File {
  701. switch v.Name {
  702. case "xl/sharedStrings.xml":
  703. sharedStrings = v
  704. case "xl/workbook.xml":
  705. workbook = v
  706. case "xl/_rels/workbook.xml.rels":
  707. workbookRels = v
  708. case "xl/styles.xml":
  709. styles = v
  710. default:
  711. if len(v.Name) > 14 {
  712. if v.Name[0:13] == "xl/worksheets" {
  713. worksheets[v.Name[14:len(v.Name)-4]] = v
  714. }
  715. }
  716. }
  717. }
  718. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  719. if err != nil {
  720. return nil, err
  721. }
  722. file.worksheets = worksheets
  723. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  724. if err != nil {
  725. return nil, err
  726. }
  727. file.referenceTable = reftable
  728. if styles != nil {
  729. style, err = readStylesFromZipFile(styles)
  730. if err != nil {
  731. return nil, err
  732. }
  733. file.styles = style
  734. }
  735. sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
  736. if err != nil {
  737. return nil, err
  738. }
  739. if sheets == nil {
  740. readerErr := new(XLSXReaderError)
  741. readerErr.Err = "No sheets found in XLSX File"
  742. return nil, readerErr
  743. }
  744. file.Sheets = sheets
  745. sheetMap = make(map[string]*Sheet, len(sheets))
  746. for i := 0; i < len(sheets); i++ {
  747. sheetMap[sheets[i].Name] = sheets[i]
  748. }
  749. file.Sheet = sheetMap
  750. return file, nil
  751. }
  752. func NewFile() *File {
  753. return &File{}
  754. }