lib.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math"
  9. "path"
  10. "strconv"
  11. "strings"
  12. )
  13. // XLSXReaderError is the standard error type for otherwise undefined
  14. // errors in the XSLX reading process.
  15. type XLSXReaderError struct {
  16. Err string
  17. }
  18. // String() returns a string value from an XLSXReaderError struct in
  19. // order that it might comply with the os.Error interface.
  20. func (e *XLSXReaderError) Error() string {
  21. return e.Err
  22. }
  23. // Cell is a high level structure intended to provide user access to
  24. // the contents of Cell within an xlsx.Row.
  25. type Cell struct {
  26. Value string
  27. styleIndex int
  28. styles *xlsxStyles
  29. numFmtRefTable map[int]xlsxNumFmt
  30. date1904 bool
  31. }
  32. // CellInterface defines the public API of the Cell.
  33. type CellInterface interface {
  34. String() string
  35. }
  36. // String returns the value of a Cell as a string.
  37. func (c *Cell) String() string {
  38. return c.Value
  39. }
  40. // GetStyle returns the Style associated with a Cell
  41. func (c *Cell) GetStyle() *Style {
  42. style := &Style{}
  43. if c.styleIndex >= 0 && c.styleIndex < len(c.styles.CellXfs) {
  44. xf := c.styles.CellXfs[c.styleIndex]
  45. if xf.BorderId >= 0 && xf.BorderId < len(c.styles.Borders) {
  46. var border Border
  47. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  48. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  49. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  50. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  51. style.Border = border
  52. }
  53. if xf.FillId >= 0 && xf.FillId < len(c.styles.Fills) {
  54. var fill Fill
  55. fill.PatternType = c.styles.Fills[xf.FillId].PatternFill.PatternType
  56. fill.BgColor = c.styles.Fills[xf.FillId].PatternFill.BgColor.RGB
  57. fill.FgColor = c.styles.Fills[xf.FillId].PatternFill.FgColor.RGB
  58. style.Fill = fill
  59. }
  60. if xf.FontId >= 0 && xf.FontId < len(c.styles.Fonts) {
  61. font := c.styles.Fonts[xf.FontId]
  62. style.Font = Font{}
  63. style.Font.Size, _ = strconv.Atoi(font.Sz.Val)
  64. style.Font.Name = font.Name.Val
  65. style.Font.Family, _ = strconv.Atoi(font.Family.Val)
  66. style.Font.Charset, _ = strconv.Atoi(font.Charset.Val)
  67. }
  68. }
  69. return style
  70. }
  71. // The number format string is returnable from a cell.
  72. func (c *Cell) GetNumberFormat() string {
  73. var numberFormat string = ""
  74. if c.styleIndex > 0 && c.styleIndex <= len(c.styles.CellXfs) {
  75. xf := c.styles.CellXfs[c.styleIndex-1]
  76. numFmt := c.numFmtRefTable[xf.NumFmtId]
  77. numberFormat = numFmt.FormatCode
  78. }
  79. return strings.ToLower(numberFormat)
  80. }
  81. func (c *Cell) formatToTime(format string) string {
  82. f, err := strconv.ParseFloat(c.Value, 64)
  83. if err != nil {
  84. return err.Error()
  85. }
  86. return TimeFromExcelTime(f, c.date1904).Format(format)
  87. }
  88. func (c *Cell) formatToFloat(format string) string {
  89. f, err := strconv.ParseFloat(c.Value, 64)
  90. if err != nil {
  91. return err.Error()
  92. }
  93. return fmt.Sprintf(format, f)
  94. }
  95. func (c *Cell) formatToInt(format string) string {
  96. f, err := strconv.ParseFloat(c.Value, 64)
  97. if err != nil {
  98. return err.Error()
  99. }
  100. return fmt.Sprintf(format, int(f))
  101. }
  102. // Return the formatted version of the value.
  103. func (c *Cell) FormattedValue() string {
  104. var numberFormat string = c.GetNumberFormat()
  105. switch numberFormat {
  106. case "general":
  107. return c.Value
  108. case "0", "#,##0":
  109. return c.formatToInt("%d")
  110. case "0.00", "#,##0.00", "@":
  111. return c.formatToFloat("%.2f")
  112. case "#,##0 ;(#,##0)", "#,##0 ;[red](#,##0)":
  113. f, err := strconv.ParseFloat(c.Value, 64)
  114. if err != nil {
  115. return err.Error()
  116. }
  117. if f < 0 {
  118. i := int(math.Abs(f))
  119. return fmt.Sprintf("(%d)", i)
  120. }
  121. i := int(f)
  122. return fmt.Sprintf("%d", i)
  123. case "#,##0.00;(#,##0.00)", "#,##0.00;[red](#,##0.00)":
  124. f, err := strconv.ParseFloat(c.Value, 64)
  125. if err != nil {
  126. return err.Error()
  127. }
  128. if f < 0 {
  129. return fmt.Sprintf("(%.2f)", f)
  130. }
  131. return fmt.Sprintf("%.2f", f)
  132. case "0%":
  133. f, err := strconv.ParseFloat(c.Value, 64)
  134. if err != nil {
  135. return err.Error()
  136. }
  137. f = f * 100
  138. return fmt.Sprintf("%d%%", int(f))
  139. case "0.00%":
  140. f, err := strconv.ParseFloat(c.Value, 64)
  141. if err != nil {
  142. return err.Error()
  143. }
  144. f = f * 100
  145. return fmt.Sprintf("%.2f%%", f)
  146. case "0.00e+00", "##0.0e+0":
  147. return c.formatToFloat("%e")
  148. case "mm-dd-yy":
  149. return c.formatToTime("01-02-06")
  150. case "d-mmm-yy":
  151. return c.formatToTime("2-Jan-06")
  152. case "d-mmm":
  153. return c.formatToTime("2-Jan")
  154. case "mmm-yy":
  155. return c.formatToTime("Jan-06")
  156. case "h:mm am/pm":
  157. return c.formatToTime("3:04 pm")
  158. case "h:mm:ss am/pm":
  159. return c.formatToTime("3:04:05 pm")
  160. case "h:mm":
  161. return c.formatToTime("15:04")
  162. case "h:mm:ss":
  163. return c.formatToTime("15:04:05")
  164. case "m/d/yy h:mm":
  165. return c.formatToTime("1/2/06 15:04")
  166. case "mm:ss":
  167. return c.formatToTime("04:05")
  168. case "[h]:mm:ss":
  169. f, err := strconv.ParseFloat(c.Value, 64)
  170. if err != nil {
  171. return err.Error()
  172. }
  173. t := TimeFromExcelTime(f, c.date1904)
  174. if t.Hour() > 0 {
  175. return t.Format("15:04:05")
  176. }
  177. return t.Format("04:05")
  178. case "mmss.0":
  179. f, err := strconv.ParseFloat(c.Value, 64)
  180. if err != nil {
  181. return err.Error()
  182. }
  183. t := TimeFromExcelTime(f, c.date1904)
  184. return fmt.Sprintf("%0d%0d.%d", t.Minute(), t.Second(), t.Nanosecond()/1000)
  185. case "yyyy\\-mm\\-dd":
  186. return c.formatToTime("2006\\-01\\-02")
  187. case "dd/mm/yy":
  188. return c.formatToTime("02/01/06")
  189. case "hh:mm:ss":
  190. return c.formatToTime("15:04:05")
  191. case "dd/mm/yy\\ hh:mm":
  192. return c.formatToTime("02/01/06\\ 15:04")
  193. case "dd/mm/yyyy hh:mm:ss":
  194. return c.formatToTime("02/01/2006 15:04:05")
  195. case "yy-mm-dd":
  196. return c.formatToTime("06-01-02")
  197. case "d-mmm-yyyy":
  198. return c.formatToTime("2-Jan-2006")
  199. case "m/d/yy":
  200. return c.formatToTime("1/2/06")
  201. case "m/d/yyyy":
  202. return c.formatToTime("1/2/2006")
  203. case "dd-mmm-yyyy":
  204. return c.formatToTime("02-Jan-2006")
  205. case "dd/mm/yyyy":
  206. return c.formatToTime("02/01/2006")
  207. case "mm/dd/yy hh:mm am/pm":
  208. return c.formatToTime("01/02/06 03:04 pm")
  209. case "mm/dd/yyyy hh:mm:ss":
  210. return c.formatToTime("01/02/2006 15:04:05")
  211. case "yyyy-mm-dd hh:mm:ss":
  212. return c.formatToTime("2006-01-02 15:04:05")
  213. }
  214. return c.Value
  215. }
  216. // Row is a high level structure indended to provide user access to a
  217. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  218. type Row struct {
  219. Cells []*Cell
  220. }
  221. // Sheet is a high level structure intended to provide user access to
  222. // the contents of a particular sheet within an XLSX file.
  223. type Sheet struct {
  224. Name string
  225. Rows []*Row
  226. MaxRow int
  227. MaxCol int
  228. }
  229. // Style is a high level structure intended to provide user access to
  230. // the contents of Style within an XLSX file.
  231. type Style struct {
  232. Border Border
  233. Fill Fill
  234. Font Font
  235. }
  236. // Border is a high level structure intended to provide user access to
  237. // the contents of Border Style within an Sheet.
  238. type Border struct {
  239. Left string
  240. Right string
  241. Top string
  242. Bottom string
  243. }
  244. // Fill is a high level structure intended to provide user access to
  245. // the contents of background and foreground color index within an Sheet.
  246. type Fill struct {
  247. PatternType string
  248. BgColor string
  249. FgColor string
  250. }
  251. type Font struct {
  252. Size int
  253. Name string
  254. Family int
  255. Charset int
  256. }
  257. // File is a high level structure providing a slice of Sheet structs
  258. // to the user.
  259. type File struct {
  260. worksheets map[string]*zip.File
  261. numFmtRefTable map[int]xlsxNumFmt
  262. referenceTable []string
  263. styles *xlsxStyles
  264. Sheets []*Sheet // sheet access by index
  265. Sheet map[string]*Sheet // sheet access by name
  266. Date1904 bool
  267. }
  268. // getRangeFromString is an internal helper function that converts
  269. // XLSX internal range syntax to a pair of integers. For example,
  270. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  271. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  272. var parts []string
  273. parts = strings.SplitN(rangeString, ":", 2)
  274. if parts[0] == "" {
  275. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  276. }
  277. if parts[1] == "" {
  278. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  279. }
  280. lower, error = strconv.Atoi(parts[0])
  281. if error != nil {
  282. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  283. }
  284. upper, error = strconv.Atoi(parts[1])
  285. if error != nil {
  286. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  287. }
  288. return lower, upper, error
  289. }
  290. // lettersToNumeric is used to convert a character based column
  291. // reference to a zero based numeric column identifier.
  292. func lettersToNumeric(letters string) int {
  293. sum, mul, n := 0, 1, 0
  294. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  295. c := letters[i]
  296. switch {
  297. case 'A' <= c && c <= 'Z':
  298. n += int(c - 'A')
  299. case 'a' <= c && c <= 'z':
  300. n += int(c - 'a')
  301. }
  302. sum += n * mul
  303. }
  304. return sum
  305. }
  306. // letterOnlyMapF is used in conjunction with strings.Map to return
  307. // only the characters A-Z and a-z in a string
  308. func letterOnlyMapF(rune rune) rune {
  309. switch {
  310. case 'A' <= rune && rune <= 'Z':
  311. return rune
  312. case 'a' <= rune && rune <= 'z':
  313. return rune - 32
  314. }
  315. return -1
  316. }
  317. // intOnlyMapF is used in conjunction with strings.Map to return only
  318. // the numeric portions of a string.
  319. func intOnlyMapF(rune rune) rune {
  320. if rune >= 48 && rune < 58 {
  321. return rune
  322. }
  323. return -1
  324. }
  325. // getCoordsFromCellIDString returns the zero based cartesian
  326. // coordinates from a cell name in Excel format, e.g. the cellIDString
  327. // "A1" returns 0, 0 and the "B3" return 1, 2.
  328. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  329. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  330. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  331. if error != nil {
  332. return x, y, error
  333. }
  334. y -= 1 // Zero based
  335. x = lettersToNumeric(letterPart)
  336. return x, y, error
  337. }
  338. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  339. // and minimum coordinates from the dimension reference embedded in a
  340. // XLSX worksheet. For example, the dimension reference "A1:B2"
  341. // returns "0,0", "1,1".
  342. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  343. var parts []string
  344. parts = strings.Split(ref, ":")
  345. minx, miny, err = getCoordsFromCellIDString(parts[0])
  346. if err != nil {
  347. return -1, -1, -1, -1, err
  348. }
  349. if len(parts) == 1 {
  350. maxx, maxy = minx, miny
  351. return
  352. }
  353. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  354. if err != nil {
  355. return -1, -1, -1, -1, err
  356. }
  357. return
  358. }
  359. // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
  360. // that doesn't have a DimensionRef set. The only case currently
  361. // known where this is true is with XLSX exported from Google Docs.
  362. func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
  363. // Note, this method could be very slow for large spreadsheets.
  364. var x, y int
  365. minx = math.MaxInt64
  366. miny = math.MaxInt64
  367. maxy = 0
  368. maxx = 0
  369. for _, row := range worksheet.SheetData.Row {
  370. for _, cell := range row.C {
  371. x, y, err = getCoordsFromCellIDString(cell.R)
  372. if err != nil {
  373. return -1, -1, -1, -1, err
  374. }
  375. if x < minx {
  376. minx = x
  377. }
  378. if x > maxx {
  379. maxx = x
  380. }
  381. if y < miny {
  382. miny = y
  383. }
  384. if y > maxy {
  385. maxy = y
  386. }
  387. }
  388. }
  389. if minx == math.MaxInt64 { minx = 0 }
  390. if miny == math.MaxInt64 { miny = 0 }
  391. return
  392. }
  393. // makeRowFromSpan will, when given a span expressed as a string,
  394. // return an empty Row large enough to encompass that span and
  395. // populate it with empty cells. All rows start from cell 1 -
  396. // regardless of the lower bound of the span.
  397. func makeRowFromSpan(spans string) *Row {
  398. var error error
  399. var upper int
  400. var row *Row
  401. var cell *Cell
  402. row = new(Row)
  403. _, upper, error = getRangeFromString(spans)
  404. if error != nil {
  405. panic(error)
  406. }
  407. error = nil
  408. row.Cells = make([]*Cell, upper)
  409. for i := 0; i < upper; i++ {
  410. cell = new(Cell)
  411. cell.Value = ""
  412. row.Cells[i] = cell
  413. }
  414. return row
  415. }
  416. // makeRowFromRaw returns the Row representation of the xlsxRow.
  417. func makeRowFromRaw(rawrow xlsxRow) *Row {
  418. var upper int
  419. var row *Row
  420. var cell *Cell
  421. row = new(Row)
  422. upper = -1
  423. for _, rawcell := range rawrow.C {
  424. x, _, error := getCoordsFromCellIDString(rawcell.R)
  425. if error != nil {
  426. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  427. }
  428. if x > upper {
  429. upper = x
  430. }
  431. }
  432. upper++
  433. row.Cells = make([]*Cell, upper)
  434. for i := 0; i < upper; i++ {
  435. cell = new(Cell)
  436. cell.Value = ""
  437. row.Cells[i] = cell
  438. }
  439. return row
  440. }
  441. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  442. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  443. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  444. var value string = ""
  445. var data string = rawcell.V
  446. if len(data) > 0 {
  447. vval := strings.Trim(data, " \t\n\r")
  448. switch rawcell.T {
  449. case "s": // Shared String
  450. ref, error := strconv.Atoi(vval)
  451. if error != nil {
  452. panic(error)
  453. }
  454. value = reftable[ref]
  455. default:
  456. value = vval
  457. }
  458. }
  459. return value
  460. }
  461. // readRowsFromSheet is an internal helper function that extracts the
  462. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  463. // the value references from the reference table and stores them in
  464. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  465. var rows []*Row
  466. var row *Row
  467. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  468. var reftable []string
  469. var err error
  470. var insertRowIndex, insertColIndex int
  471. if len(Worksheet.SheetData.Row) == 0 {
  472. return nil, 0, 0
  473. }
  474. reftable = file.referenceTable
  475. if len(Worksheet.Dimension.Ref) > 0 {
  476. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  477. } else {
  478. minCol, minRow, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
  479. }
  480. if err != nil {
  481. panic(err.Error())
  482. }
  483. rowCount = (maxRow - minRow) + 1
  484. colCount = (maxCol - minCol) + 1
  485. rows = make([]*Row, rowCount)
  486. insertRowIndex = minRow
  487. for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ {
  488. rawrow := Worksheet.SheetData.Row[rowIndex]
  489. // Some spreadsheets will omit blank rows from the
  490. // stored data
  491. for rawrow.R > (insertRowIndex + 1) {
  492. // Put an empty Row into the array
  493. rows[insertRowIndex-minRow] = new(Row)
  494. insertRowIndex++
  495. }
  496. // range is not empty and only one range exist
  497. if len(rawrow.Spans) != 0 && strings.Count(rawrow.Spans, ":") == 1 {
  498. row = makeRowFromSpan(rawrow.Spans)
  499. } else {
  500. row = makeRowFromRaw(rawrow)
  501. }
  502. insertColIndex = minCol
  503. for _, rawcell := range rawrow.C {
  504. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  505. // Some spreadsheets will omit blank cells
  506. // from the data.
  507. for x > insertColIndex {
  508. // Put an empty Cell into the array
  509. row.Cells[insertColIndex-minCol] = new(Cell)
  510. insertColIndex++
  511. }
  512. cellX := insertColIndex - minCol
  513. row.Cells[cellX].Value = getValueFromCellData(rawcell, reftable)
  514. row.Cells[cellX].styleIndex = rawcell.S
  515. row.Cells[cellX].styles = file.styles
  516. row.Cells[cellX].numFmtRefTable = file.numFmtRefTable
  517. row.Cells[cellX].date1904 = file.Date1904
  518. insertColIndex++
  519. }
  520. if len(rows) > insertRowIndex-minRow {
  521. rows[insertRowIndex-minRow] = row
  522. }
  523. insertRowIndex++
  524. }
  525. return rows, colCount, rowCount
  526. }
  527. type indexedSheet struct {
  528. Index int
  529. Sheet *Sheet
  530. Error error
  531. }
  532. // readSheetFromFile is the logic of converting a xlsxSheet struct
  533. // into a Sheet struct. This work can be done in parallel and so
  534. // readSheetsFromZipFile will spawn an instance of this function per
  535. // sheet and get the results back on the provided channel.
  536. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) {
  537. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  538. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
  539. if error != nil {
  540. result.Error = error
  541. sc <- result
  542. return
  543. }
  544. sheet := new(Sheet)
  545. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi)
  546. result.Sheet = sheet
  547. sc <- result
  548. }
  549. // readSheetsFromZipFile is an internal helper function that loops
  550. // over the Worksheets defined in the XSLXWorkbook and loads them into
  551. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  552. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) ([]*Sheet, error) {
  553. var workbook *xlsxWorkbook
  554. var error error
  555. var rc io.ReadCloser
  556. var decoder *xml.Decoder
  557. var sheetCount int
  558. workbook = new(xlsxWorkbook)
  559. rc, error = f.Open()
  560. if error != nil {
  561. return nil, error
  562. }
  563. decoder = xml.NewDecoder(rc)
  564. error = decoder.Decode(workbook)
  565. if error != nil {
  566. return nil, error
  567. }
  568. file.Date1904 = workbook.WorkbookPr.Date1904
  569. sheetCount = len(workbook.Sheets.Sheet)
  570. sheets := make([]*Sheet, sheetCount)
  571. sheetChan := make(chan *indexedSheet, sheetCount)
  572. for i, rawsheet := range workbook.Sheets.Sheet {
  573. go readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap)
  574. }
  575. for j := 0; j < sheetCount; j++ {
  576. sheet := <-sheetChan
  577. if sheet.Error != nil {
  578. return nil, sheet.Error
  579. }
  580. sheet.Sheet.Name = workbook.Sheets.Sheet[sheet.Index].Name
  581. sheets[sheet.Index] = sheet.Sheet
  582. }
  583. return sheets, nil
  584. }
  585. // readSharedStringsFromZipFile() is an internal helper function to
  586. // extract a reference table from the sharedStrings.xml file within
  587. // the XLSX zip file.
  588. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  589. var sst *xlsxSST
  590. var error error
  591. var rc io.ReadCloser
  592. var decoder *xml.Decoder
  593. var reftable []string
  594. // In a file with no strings it's possible that
  595. // sharedStrings.xml doesn't exist. In this case the value
  596. // passed as f will be nil.
  597. if f == nil {
  598. reftable = make([]string, 0)
  599. return reftable, nil
  600. }
  601. rc, error = f.Open()
  602. if error != nil {
  603. return nil, error
  604. }
  605. sst = new(xlsxSST)
  606. decoder = xml.NewDecoder(rc)
  607. error = decoder.Decode(sst)
  608. if error != nil {
  609. return nil, error
  610. }
  611. reftable = MakeSharedStringRefTable(sst)
  612. return reftable, nil
  613. }
  614. // readStylesFromZipFile() is an internal helper function to
  615. // extract a style table from the style.xml file within
  616. // the XLSX zip file.
  617. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  618. var style *xlsxStyles
  619. var error error
  620. var rc io.ReadCloser
  621. var decoder *xml.Decoder
  622. rc, error = f.Open()
  623. if error != nil {
  624. return nil, error
  625. }
  626. style = new(xlsxStyles)
  627. decoder = xml.NewDecoder(rc)
  628. error = decoder.Decode(style)
  629. if error != nil {
  630. return nil, error
  631. }
  632. return style, nil
  633. }
  634. func buildNumFmtRefTable(style *xlsxStyles) map[int]xlsxNumFmt {
  635. refTable := make(map[int]xlsxNumFmt)
  636. for _, numFmt := range style.NumFmts {
  637. refTable[numFmt.NumFmtId] = numFmt
  638. }
  639. return refTable
  640. }
  641. // readWorkbookRelationsFromZipFile is an internal helper function to
  642. // extract a map of relationship ID strings to the name of the
  643. // worksheet.xml file they refer to. The resulting map can be used to
  644. // reliably derefence the worksheets in the XLSX file.
  645. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (map[string]string, error) {
  646. var sheetXMLMap map[string]string
  647. var wbRelationships *xlsxWorkbookRels
  648. var rc io.ReadCloser
  649. var decoder *xml.Decoder
  650. var err error
  651. rc, err = workbookRels.Open()
  652. if err != nil {
  653. return nil, err
  654. }
  655. decoder = xml.NewDecoder(rc)
  656. wbRelationships = new(xlsxWorkbookRels)
  657. err = decoder.Decode(wbRelationships)
  658. if err != nil {
  659. return nil, err
  660. }
  661. sheetXMLMap = make(map[string]string)
  662. for _, rel := range wbRelationships.Relationships {
  663. if strings.HasSuffix(rel.Target, ".xml") && rel.Type == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" {
  664. _, filename := path.Split(rel.Target)
  665. sheetXMLMap[rel.Id] = strings.Replace(filename, ".xml", "", 1)
  666. }
  667. }
  668. return sheetXMLMap, nil
  669. }
  670. // OpenFile() take the name of an XLSX file and returns a populated
  671. // xlsx.File struct for it.
  672. func OpenFile(filename string) (*File, error) {
  673. var f *zip.ReadCloser
  674. f, err := zip.OpenReader(filename)
  675. if err != nil {
  676. return nil, err
  677. }
  678. return ReadZip(f)
  679. }
  680. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  681. // xlsx.File struct populated with its contents. In most cases
  682. // ReadZip is not used directly, but is called internally by OpenFile.
  683. func ReadZip(f *zip.ReadCloser) (*File, error) {
  684. defer f.Close()
  685. return ReadZipReader(&f.Reader)
  686. }
  687. // ReadZipReader() can be used to read xlsx in memory without touch filesystem.
  688. func ReadZipReader(r *zip.Reader) (*File, error) {
  689. var err error
  690. var file *File
  691. var reftable []string
  692. var sharedStrings *zip.File
  693. var sheetMap map[string]*Sheet
  694. var sheetXMLMap map[string]string
  695. var sheets []*Sheet
  696. var style *xlsxStyles
  697. var styles *zip.File
  698. var v *zip.File
  699. var workbook *zip.File
  700. var workbookRels *zip.File
  701. var worksheets map[string]*zip.File
  702. file = new(File)
  703. worksheets = make(map[string]*zip.File, len(r.File))
  704. for _, v = range r.File {
  705. switch v.Name {
  706. case "xl/sharedStrings.xml":
  707. sharedStrings = v
  708. case "xl/workbook.xml":
  709. workbook = v
  710. case "xl/_rels/workbook.xml.rels":
  711. workbookRels = v
  712. case "xl/styles.xml":
  713. styles = v
  714. default:
  715. if len(v.Name) > 14 {
  716. if v.Name[0:13] == "xl/worksheets" {
  717. worksheets[v.Name[14:len(v.Name)-4]] = v
  718. }
  719. }
  720. }
  721. }
  722. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  723. if err != nil {
  724. return nil, err
  725. }
  726. file.worksheets = worksheets
  727. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  728. if err != nil {
  729. return nil, err
  730. }
  731. file.referenceTable = reftable
  732. if styles != nil {
  733. style, err = readStylesFromZipFile(styles)
  734. if err != nil {
  735. return nil, err
  736. }
  737. file.styles = style
  738. }
  739. sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
  740. if err != nil {
  741. return nil, err
  742. }
  743. if sheets == nil {
  744. readerErr := new(XLSXReaderError)
  745. readerErr.Err = "No sheets found in XLSX File"
  746. return nil, readerErr
  747. }
  748. file.Sheets = sheets
  749. sheetMap = make(map[string]*Sheet, len(sheets))
  750. for i := 0; i < len(sheets); i++ {
  751. sheetMap[sheets[i].Name] = sheets[i]
  752. }
  753. file.Sheet = sheetMap
  754. return file, nil
  755. }
  756. func NewFile() *File {
  757. return &File{}
  758. }