lib.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "path"
  9. "strconv"
  10. "strings"
  11. )
  12. // XLSXReaderError is the standard error type for otherwise undefined
  13. // errors in the XSLX reading process.
  14. type XLSXReaderError struct {
  15. Err string
  16. }
  17. // Error returns a string value from an XLSXReaderError struct in order
  18. // that it might comply with the builtin.error interface.
  19. func (e *XLSXReaderError) Error() string {
  20. return e.Err
  21. }
  22. // getRangeFromString is an internal helper function that converts
  23. // XLSX internal range syntax to a pair of integers. For example,
  24. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  25. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  26. var parts []string
  27. parts = strings.SplitN(rangeString, ":", 2)
  28. if parts[0] == "" {
  29. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  30. }
  31. if parts[1] == "" {
  32. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  33. }
  34. lower, error = strconv.Atoi(parts[0])
  35. if error != nil {
  36. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  37. }
  38. upper, error = strconv.Atoi(parts[1])
  39. if error != nil {
  40. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  41. }
  42. return lower, upper, error
  43. }
  44. // lettersToNumeric is used to convert a character based column
  45. // reference to a zero based numeric column identifier.
  46. func lettersToNumeric(letters string) int {
  47. sum, mul, n := 0, 1, 0
  48. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  49. c := letters[i]
  50. switch {
  51. case 'A' <= c && c <= 'Z':
  52. n += int(c - 'A')
  53. case 'a' <= c && c <= 'z':
  54. n += int(c - 'a')
  55. }
  56. sum += n * mul
  57. }
  58. return sum
  59. }
  60. // Get the largestDenominator that is a multiple of a basedDenominator
  61. // and fits at least once into a given numerator.
  62. func getLargestDenominator(numerator, multiple, baseDenominator, power int) (int, int) {
  63. if numerator/multiple == 0 {
  64. return 1, power
  65. }
  66. next, nextPower := getLargestDenominator(
  67. numerator, multiple*baseDenominator, baseDenominator, power+1)
  68. if next > multiple {
  69. return next, nextPower
  70. }
  71. return multiple, power
  72. }
  73. // Convers a list of numbers representing a column into a alphabetic
  74. // representation, as used in the spreadsheet.
  75. func formatColumnName(colId []int) string {
  76. lastPart := len(colId) - 1
  77. result := ""
  78. for n, part := range colId {
  79. if n == lastPart {
  80. // The least significant number is in the
  81. // range 0-25, all other numbers are 1-26,
  82. // hence we use a differente offset for the
  83. // last part.
  84. result += string(part + 65)
  85. } else {
  86. // Don't output leading 0s, as there is no
  87. // representation of 0 in this format.
  88. if part > 0 {
  89. result += string(part + 64)
  90. }
  91. }
  92. }
  93. return result
  94. }
  95. func smooshBase26Slice(b26 []int) []int {
  96. // Smoosh values together, eliminating 0s from all but the
  97. // least significant part.
  98. lastButOnePart := len(b26) - 2
  99. for i := lastButOnePart; i > 0; i-- {
  100. part := b26[i]
  101. if part == 0 {
  102. greaterPart := b26[i-1]
  103. if greaterPart > 0 {
  104. b26[i-1] = greaterPart - 1
  105. b26[i] = 26
  106. }
  107. }
  108. }
  109. return b26
  110. }
  111. func intToBase26(x int) (parts []int) {
  112. // Excel column codes are pure evil - in essence they're just
  113. // base26, but they don't represent the number 0.
  114. b26Denominator, _ := getLargestDenominator(x, 1, 26, 0)
  115. // This loop terminates because integer division of 1 / 26
  116. // returns 0.
  117. for d := b26Denominator; d > 0; d = d / 26 {
  118. value := x / d
  119. remainder := x % d
  120. parts = append(parts, value)
  121. x = remainder
  122. }
  123. return parts
  124. }
  125. // numericToLetters is used to convert a zero based, numeric column
  126. // indentifier into a character code.
  127. func numericToLetters(colRef int) string {
  128. parts := intToBase26(colRef)
  129. return formatColumnName(smooshBase26Slice(parts))
  130. }
  131. // letterOnlyMapF is used in conjunction with strings.Map to return
  132. // only the characters A-Z and a-z in a string
  133. func letterOnlyMapF(rune rune) rune {
  134. switch {
  135. case 'A' <= rune && rune <= 'Z':
  136. return rune
  137. case 'a' <= rune && rune <= 'z':
  138. return rune - 32
  139. }
  140. return -1
  141. }
  142. // intOnlyMapF is used in conjunction with strings.Map to return only
  143. // the numeric portions of a string.
  144. func intOnlyMapF(rune rune) rune {
  145. if rune >= 48 && rune < 58 {
  146. return rune
  147. }
  148. return -1
  149. }
  150. // getCoordsFromCellIDString returns the zero based cartesian
  151. // coordinates from a cell name in Excel format, e.g. the cellIDString
  152. // "A1" returns 0, 0 and the "B3" return 1, 2.
  153. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  154. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  155. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  156. if error != nil {
  157. return x, y, error
  158. }
  159. y -= 1 // Zero based
  160. x = lettersToNumeric(letterPart)
  161. return x, y, error
  162. }
  163. // getCellIDStringFromCoords returns the Excel format cell name that
  164. // represents a pair of zero based cartesian coordinates.
  165. func getCellIDStringFromCoords(x, y int) string {
  166. letterPart := numericToLetters(x)
  167. numericPart := y + 1
  168. return fmt.Sprintf("%s%d", letterPart, numericPart)
  169. }
  170. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  171. // and minimum coordinates from the dimension reference embedded in a
  172. // XLSX worksheet. For example, the dimension reference "A1:B2"
  173. // returns "0,0", "1,1".
  174. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  175. var parts []string
  176. parts = strings.Split(ref, ":")
  177. minx, miny, err = getCoordsFromCellIDString(parts[0])
  178. if err != nil {
  179. return -1, -1, -1, -1, err
  180. }
  181. if len(parts) == 1 {
  182. maxx, maxy = minx, miny
  183. return
  184. }
  185. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  186. if err != nil {
  187. return -1, -1, -1, -1, err
  188. }
  189. return
  190. }
  191. // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
  192. // that doesn't have a DimensionRef set. The only case currently
  193. // known where this is true is with XLSX exported from Google Docs.
  194. func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
  195. // Note, this method could be very slow for large spreadsheets.
  196. var x, y int
  197. var maxVal int
  198. maxVal = int(^uint(0) >> 1)
  199. minx = maxVal
  200. miny = maxVal
  201. maxy = 0
  202. maxx = 0
  203. for _, row := range worksheet.SheetData.Row {
  204. for _, cell := range row.C {
  205. x, y, err = getCoordsFromCellIDString(cell.R)
  206. if err != nil {
  207. return -1, -1, -1, -1, err
  208. }
  209. if x < minx {
  210. minx = x
  211. }
  212. if x > maxx {
  213. maxx = x
  214. }
  215. if y < miny {
  216. miny = y
  217. }
  218. if y > maxy {
  219. maxy = y
  220. }
  221. }
  222. }
  223. if minx == maxVal {
  224. minx = 0
  225. }
  226. if miny == maxVal {
  227. miny = 0
  228. }
  229. return
  230. }
  231. // makeRowFromSpan will, when given a span expressed as a string,
  232. // return an empty Row large enough to encompass that span and
  233. // populate it with empty cells. All rows start from cell 1 -
  234. // regardless of the lower bound of the span.
  235. func makeRowFromSpan(spans string, sheet *Sheet) *Row {
  236. var error error
  237. var upper int
  238. var row *Row
  239. var cell *Cell
  240. row = new(Row)
  241. row.Sheet = sheet
  242. _, upper, error = getRangeFromString(spans)
  243. if error != nil {
  244. panic(error)
  245. }
  246. error = nil
  247. row.Cells = make([]*Cell, upper)
  248. for i := 0; i < upper; i++ {
  249. cell = new(Cell)
  250. cell.Value = ""
  251. row.Cells[i] = cell
  252. }
  253. return row
  254. }
  255. // makeRowFromRaw returns the Row representation of the xlsxRow.
  256. func makeRowFromRaw(rawrow xlsxRow, sheet *Sheet) *Row {
  257. var upper int
  258. var row *Row
  259. var cell *Cell
  260. row = new(Row)
  261. row.Sheet = sheet
  262. upper = -1
  263. for _, rawcell := range rawrow.C {
  264. if rawcell.R != "" {
  265. x, _, error := getCoordsFromCellIDString(rawcell.R)
  266. if error != nil {
  267. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  268. }
  269. if x > upper {
  270. upper = x
  271. }
  272. continue
  273. }
  274. upper++
  275. }
  276. upper++
  277. row.Cells = make([]*Cell, upper)
  278. for i := 0; i < upper; i++ {
  279. cell = new(Cell)
  280. cell.Value = ""
  281. row.Cells[i] = cell
  282. }
  283. return row
  284. }
  285. func makeEmptyRow(sheet *Sheet) *Row {
  286. row := new(Row)
  287. row.Cells = make([]*Cell, 0)
  288. row.Sheet = sheet
  289. return row
  290. }
  291. type sharedFormula struct {
  292. x, y int
  293. formula string
  294. }
  295. func formulaForCell(rawcell xlsxC, sharedFormulas map[int]sharedFormula) string {
  296. var res string
  297. f := rawcell.F
  298. if f == nil {
  299. return ""
  300. }
  301. if f.T == "shared" {
  302. x, y, err := getCoordsFromCellIDString(rawcell.R)
  303. if err != nil {
  304. res = f.Content
  305. } else {
  306. if f.Ref != "" {
  307. res = f.Content
  308. sharedFormulas[f.Si] = sharedFormula{x, y, res}
  309. } else {
  310. sharedFormula := sharedFormulas[f.Si]
  311. dx := x - sharedFormula.x
  312. dy := y - sharedFormula.y
  313. orig := []byte(sharedFormula.formula)
  314. var start, end int
  315. var stringLiteral bool
  316. for end = 0; end < len(orig); end++ {
  317. c := orig[end]
  318. if c == '"' {
  319. stringLiteral = !stringLiteral
  320. }
  321. if stringLiteral {
  322. continue // Skip characters in quotes
  323. }
  324. if c >= 'A' && c <= 'Z' || c == '$' {
  325. res += string(orig[start:end])
  326. start = end
  327. end++
  328. foundNum := false
  329. for ; end < len(orig); end++ {
  330. idc := orig[end]
  331. if idc >= '0' && idc <= '9' || idc == '$' {
  332. foundNum = true
  333. } else if idc >= 'A' && idc <= 'Z' {
  334. if foundNum {
  335. break
  336. }
  337. } else {
  338. break
  339. }
  340. }
  341. if foundNum {
  342. cellID := string(orig[start:end])
  343. res += shiftCell(cellID, dx, dy)
  344. start = end
  345. }
  346. }
  347. }
  348. if start < len(orig) {
  349. res += string(orig[start:end])
  350. }
  351. }
  352. }
  353. } else {
  354. res = f.Content
  355. }
  356. return strings.Trim(res, " \t\n\r")
  357. }
  358. // shiftCell returns the cell shifted according to dx and dy taking into consideration of absolute
  359. // references with dollar sign ($)
  360. func shiftCell(cellID string, dx, dy int) string {
  361. fx, fy, _ := getCoordsFromCellIDString(cellID)
  362. // Is fixed column?
  363. fixedCol := strings.Index(cellID, "$") == 0
  364. // Is fixed row?
  365. fixedRow := strings.LastIndex(cellID, "$") > 0
  366. if !fixedCol {
  367. // Shift column
  368. fx += dx
  369. }
  370. if !fixedRow {
  371. // Shift row
  372. fy += dy
  373. }
  374. // New shifted cell
  375. shiftedCellID := getCellIDStringFromCoords(fx, fy)
  376. if !fixedCol && !fixedRow {
  377. return shiftedCellID
  378. }
  379. // There are absolute references, need to put the $ back into the formula.
  380. letterPart := strings.Map(letterOnlyMapF, shiftedCellID)
  381. numberPart := strings.Map(intOnlyMapF, shiftedCellID)
  382. result := ""
  383. if fixedCol {
  384. result += "$"
  385. }
  386. result += letterPart
  387. if fixedRow {
  388. result += "$"
  389. }
  390. result += numberPart
  391. return result
  392. }
  393. // fillCellData attempts to extract a valid value, usable in
  394. // CSV form from the raw cell value. Note - this is not actually
  395. // general enough - we should support retaining tabs and newlines.
  396. func fillCellData(rawcell xlsxC, reftable *RefTable, sharedFormulas map[int]sharedFormula, cell *Cell) {
  397. var data string = rawcell.V
  398. if len(data) > 0 {
  399. vval := strings.Trim(data, " \t\n\r")
  400. switch rawcell.T {
  401. case "s": // Shared String
  402. ref, error := strconv.Atoi(vval)
  403. if error != nil {
  404. panic(error)
  405. }
  406. cell.Value = reftable.ResolveSharedString(ref)
  407. cell.cellType = CellTypeString
  408. case "b": // Boolean
  409. cell.Value = vval
  410. cell.cellType = CellTypeBool
  411. case "e": // Error
  412. cell.Value = vval
  413. cell.formula = formulaForCell(rawcell, sharedFormulas)
  414. cell.cellType = CellTypeError
  415. default:
  416. if rawcell.F == nil {
  417. // Numeric
  418. cell.Value = vval
  419. cell.cellType = CellTypeNumeric
  420. } else {
  421. // Formula
  422. cell.Value = vval
  423. cell.formula = formulaForCell(rawcell, sharedFormulas)
  424. cell.cellType = CellTypeFormula
  425. }
  426. }
  427. }
  428. }
  429. // readRowsFromSheet is an internal helper function that extracts the
  430. // rows from a XSLXWorksheet, populates them with Cells and resolves
  431. // the value references from the reference table and stores them in
  432. // the rows and columns.
  433. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet) ([]*Row, []*Col, int, int) {
  434. var rows []*Row
  435. var cols []*Col
  436. var row *Row
  437. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  438. var reftable *RefTable
  439. var err error
  440. var insertRowIndex, insertColIndex int
  441. sharedFormulas := map[int]sharedFormula{}
  442. if len(Worksheet.SheetData.Row) == 0 {
  443. return nil, nil, 0, 0
  444. }
  445. reftable = file.referenceTable
  446. if len(Worksheet.Dimension.Ref) > 0 {
  447. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  448. } else {
  449. minCol, minRow, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
  450. }
  451. if err != nil {
  452. panic(err.Error())
  453. }
  454. rowCount = maxRow + 1
  455. colCount = maxCol + 1
  456. rows = make([]*Row, rowCount)
  457. cols = make([]*Col, colCount)
  458. insertRowIndex = minRow
  459. for i := range cols {
  460. cols[i] = &Col{
  461. Hidden: false,
  462. }
  463. }
  464. if Worksheet.Cols != nil {
  465. // Columns can apply to a range, for convenience we expand the
  466. // ranges out into individual column definitions.
  467. for _, rawcol := range Worksheet.Cols.Col {
  468. // Note, below, that sometimes column definitions can
  469. // exist outside the defined dimensions of the
  470. // spreadsheet - we deliberately exclude these
  471. // columns.
  472. for i := rawcol.Min; i <= rawcol.Max && i <= colCount; i++ {
  473. col := &Col{
  474. Min: rawcol.Min,
  475. Max: rawcol.Max,
  476. Hidden: rawcol.Hidden,
  477. Width: rawcol.Width}
  478. cols[i-1] = col
  479. if file.styles != nil {
  480. col.style = file.styles.getStyle(rawcol.Style)
  481. col.numFmt = file.styles.getNumberFormat(rawcol.Style)
  482. }
  483. }
  484. }
  485. }
  486. // insert leading empty rows that is in front of minRow
  487. for rowIndex := 0; rowIndex < minRow; rowIndex++ {
  488. rows[rowIndex] = makeEmptyRow(sheet)
  489. }
  490. numRows := len(rows)
  491. for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ {
  492. rawrow := Worksheet.SheetData.Row[rowIndex]
  493. // Some spreadsheets will omit blank rows from the
  494. // stored data
  495. for rawrow.R > (insertRowIndex + 1) {
  496. // Put an empty Row into the array
  497. if insertRowIndex < numRows {
  498. rows[insertRowIndex] = makeEmptyRow(sheet)
  499. }
  500. insertRowIndex++
  501. }
  502. // range is not empty and only one range exist
  503. if len(rawrow.Spans) != 0 && strings.Count(rawrow.Spans, ":") == 1 {
  504. row = makeRowFromSpan(rawrow.Spans, sheet)
  505. } else {
  506. row = makeRowFromRaw(rawrow, sheet)
  507. }
  508. row.Hidden = rawrow.Hidden
  509. insertColIndex = minCol
  510. for _, rawcell := range rawrow.C {
  511. h, v, err := Worksheet.MergeCells.getExtent(rawcell.R)
  512. if err != nil {
  513. panic(err.Error())
  514. }
  515. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  516. // Some spreadsheets will omit blank cells
  517. // from the data.
  518. for x > insertColIndex {
  519. // Put an empty Cell into the array
  520. row.Cells[insertColIndex] = new(Cell)
  521. insertColIndex++
  522. }
  523. cellX := insertColIndex
  524. cell := row.Cells[cellX]
  525. cell.HMerge = h
  526. cell.VMerge = v
  527. fillCellData(rawcell, reftable, sharedFormulas, cell)
  528. if file.styles != nil {
  529. cell.style = file.styles.getStyle(rawcell.S)
  530. cell.NumFmt = file.styles.getNumberFormat(rawcell.S)
  531. }
  532. cell.date1904 = file.Date1904
  533. // Cell is considered hidden if the row or the column of this cell is hidden
  534. cell.Hidden = rawrow.Hidden || (len(cols) > cellX && cols[cellX].Hidden)
  535. insertColIndex++
  536. }
  537. if len(rows) > insertRowIndex {
  538. rows[insertRowIndex] = row
  539. }
  540. insertRowIndex++
  541. }
  542. return rows, cols, colCount, rowCount
  543. }
  544. type indexedSheet struct {
  545. Index int
  546. Sheet *Sheet
  547. Error error
  548. }
  549. func readSheetViews(xSheetViews xlsxSheetViews) []SheetView {
  550. if xSheetViews.SheetView == nil || len(xSheetViews.SheetView) == 0 {
  551. return nil
  552. }
  553. sheetViews := []SheetView{}
  554. for _, xSheetView := range xSheetViews.SheetView {
  555. sheetView := SheetView{}
  556. if xSheetView.Pane != nil {
  557. xlsxPane := xSheetView.Pane
  558. pane := &Pane{}
  559. pane.XSplit = xlsxPane.XSplit
  560. pane.YSplit = xlsxPane.YSplit
  561. pane.TopLeftCell = xlsxPane.TopLeftCell
  562. pane.ActivePane = xlsxPane.ActivePane
  563. pane.State = xlsxPane.State
  564. sheetView.Pane = pane
  565. }
  566. sheetViews = append(sheetViews, sheetView)
  567. }
  568. return sheetViews
  569. }
  570. // readSheetFromFile is the logic of converting a xlsxSheet struct
  571. // into a Sheet struct. This work can be done in parallel and so
  572. // readSheetsFromZipFile will spawn an instance of this function per
  573. // sheet and get the results back on the provided channel.
  574. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) {
  575. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  576. defer func() {
  577. if e := recover(); e != nil {
  578. switch e.(type) {
  579. case error:
  580. result.Error = e.(error)
  581. default:
  582. result.Error = errors.New("unexpected error")
  583. }
  584. // The only thing here, is if one close the channel. but its not the case
  585. sc <- result
  586. }
  587. }()
  588. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
  589. if error != nil {
  590. result.Error = error
  591. sc <- result
  592. return
  593. }
  594. sheet := new(Sheet)
  595. sheet.File = fi
  596. sheet.Rows, sheet.Cols, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi, sheet)
  597. sheet.Hidden = rsheet.State == sheetStateHidden || rsheet.State == sheetStateVeryHidden
  598. sheet.SheetViews = readSheetViews(worksheet.SheetViews)
  599. sheet.SheetFormat.DefaultColWidth = worksheet.SheetFormatPr.DefaultColWidth
  600. sheet.SheetFormat.DefaultRowHeight = worksheet.SheetFormatPr.DefaultRowHeight
  601. result.Sheet = sheet
  602. sc <- result
  603. }
  604. // readSheetsFromZipFile is an internal helper function that loops
  605. // over the Worksheets defined in the XSLXWorkbook and loads them into
  606. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  607. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) (map[string]*Sheet, []*Sheet, error) {
  608. var workbook *xlsxWorkbook
  609. var err error
  610. var rc io.ReadCloser
  611. var decoder *xml.Decoder
  612. var sheetCount int
  613. workbook = new(xlsxWorkbook)
  614. rc, err = f.Open()
  615. if err != nil {
  616. return nil, nil, err
  617. }
  618. decoder = xml.NewDecoder(rc)
  619. err = decoder.Decode(workbook)
  620. if err != nil {
  621. return nil, nil, err
  622. }
  623. file.Date1904 = workbook.WorkbookPr.Date1904
  624. for entryNum, _ := range workbook.DefinedNames.DefinedName {
  625. file.DefinedNames = append(file.DefinedNames, &workbook.DefinedNames.DefinedName[entryNum])
  626. }
  627. // Only try and read sheets that have corresponding files.
  628. // Notably this excludes chartsheets don't right now
  629. var workbookSheets []xlsxSheet
  630. for _, sheet := range workbook.Sheets.Sheet {
  631. if f := worksheetFileForSheet(sheet, file.worksheets, sheetXMLMap); f != nil {
  632. workbookSheets = append(workbookSheets, sheet)
  633. }
  634. }
  635. sheetCount = len(workbookSheets)
  636. sheetsByName := make(map[string]*Sheet, sheetCount)
  637. sheets := make([]*Sheet, sheetCount)
  638. sheetChan := make(chan *indexedSheet, sheetCount)
  639. defer close(sheetChan)
  640. go func() {
  641. err = nil
  642. for i, rawsheet := range workbookSheets {
  643. readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap)
  644. }
  645. }()
  646. for j := 0; j < sheetCount; j++ {
  647. sheet := <-sheetChan
  648. if sheet.Error != nil {
  649. return nil, nil, sheet.Error
  650. }
  651. sheetName := workbookSheets[sheet.Index].Name
  652. sheetsByName[sheetName] = sheet.Sheet
  653. sheet.Sheet.Name = sheetName
  654. sheets[sheet.Index] = sheet.Sheet
  655. }
  656. return sheetsByName, sheets, nil
  657. }
  658. // readSharedStringsFromZipFile() is an internal helper function to
  659. // extract a reference table from the sharedStrings.xml file within
  660. // the XLSX zip file.
  661. func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
  662. var sst *xlsxSST
  663. var error error
  664. var rc io.ReadCloser
  665. var decoder *xml.Decoder
  666. var reftable *RefTable
  667. // In a file with no strings it's possible that
  668. // sharedStrings.xml doesn't exist. In this case the value
  669. // passed as f will be nil.
  670. if f == nil {
  671. return nil, nil
  672. }
  673. rc, error = f.Open()
  674. if error != nil {
  675. return nil, error
  676. }
  677. sst = new(xlsxSST)
  678. decoder = xml.NewDecoder(rc)
  679. error = decoder.Decode(sst)
  680. if error != nil {
  681. return nil, error
  682. }
  683. reftable = MakeSharedStringRefTable(sst)
  684. return reftable, nil
  685. }
  686. // readStylesFromZipFile() is an internal helper function to
  687. // extract a style table from the style.xml file within
  688. // the XLSX zip file.
  689. func readStylesFromZipFile(f *zip.File, theme *theme) (*xlsxStyleSheet, error) {
  690. var style *xlsxStyleSheet
  691. var error error
  692. var rc io.ReadCloser
  693. var decoder *xml.Decoder
  694. rc, error = f.Open()
  695. if error != nil {
  696. return nil, error
  697. }
  698. style = newXlsxStyleSheet(theme)
  699. decoder = xml.NewDecoder(rc)
  700. error = decoder.Decode(style)
  701. if error != nil {
  702. return nil, error
  703. }
  704. buildNumFmtRefTable(style)
  705. return style, nil
  706. }
  707. func buildNumFmtRefTable(style *xlsxStyleSheet) {
  708. for _, numFmt := range style.NumFmts.NumFmt {
  709. // We do this for the side effect of populating the NumFmtRefTable.
  710. style.addNumFmt(numFmt)
  711. }
  712. }
  713. func readThemeFromZipFile(f *zip.File) (*theme, error) {
  714. rc, err := f.Open()
  715. if err != nil {
  716. return nil, err
  717. }
  718. var themeXml xlsxTheme
  719. err = xml.NewDecoder(rc).Decode(&themeXml)
  720. if err != nil {
  721. return nil, err
  722. }
  723. return newTheme(themeXml), nil
  724. }
  725. type WorkBookRels map[string]string
  726. func (w *WorkBookRels) MakeXLSXWorkbookRels() xlsxWorkbookRels {
  727. relCount := len(*w)
  728. xWorkbookRels := xlsxWorkbookRels{}
  729. xWorkbookRels.Relationships = make([]xlsxWorkbookRelation, relCount+3)
  730. for k, v := range *w {
  731. index, err := strconv.Atoi(k[3:])
  732. if err != nil {
  733. panic(err.Error())
  734. }
  735. xWorkbookRels.Relationships[index-1] = xlsxWorkbookRelation{
  736. Id: k,
  737. Target: v,
  738. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"}
  739. }
  740. relCount++
  741. sheetId := fmt.Sprintf("rId%d", relCount)
  742. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  743. Id: sheetId,
  744. Target: "sharedStrings.xml",
  745. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"}
  746. relCount++
  747. sheetId = fmt.Sprintf("rId%d", relCount)
  748. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  749. Id: sheetId,
  750. Target: "theme/theme1.xml",
  751. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"}
  752. relCount++
  753. sheetId = fmt.Sprintf("rId%d", relCount)
  754. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  755. Id: sheetId,
  756. Target: "styles.xml",
  757. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"}
  758. return xWorkbookRels
  759. }
  760. // readWorkbookRelationsFromZipFile is an internal helper function to
  761. // extract a map of relationship ID strings to the name of the
  762. // worksheet.xml file they refer to. The resulting map can be used to
  763. // reliably derefence the worksheets in the XLSX file.
  764. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (WorkBookRels, error) {
  765. var sheetXMLMap WorkBookRels
  766. var wbRelationships *xlsxWorkbookRels
  767. var rc io.ReadCloser
  768. var decoder *xml.Decoder
  769. var err error
  770. rc, err = workbookRels.Open()
  771. if err != nil {
  772. return nil, err
  773. }
  774. decoder = xml.NewDecoder(rc)
  775. wbRelationships = new(xlsxWorkbookRels)
  776. err = decoder.Decode(wbRelationships)
  777. if err != nil {
  778. return nil, err
  779. }
  780. sheetXMLMap = make(WorkBookRels)
  781. for _, rel := range wbRelationships.Relationships {
  782. if strings.HasSuffix(rel.Target, ".xml") && rel.Type == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" {
  783. _, filename := path.Split(rel.Target)
  784. sheetXMLMap[rel.Id] = strings.Replace(filename, ".xml", "", 1)
  785. }
  786. }
  787. return sheetXMLMap, nil
  788. }
  789. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  790. // xlsx.File struct populated with its contents. In most cases
  791. // ReadZip is not used directly, but is called internally by OpenFile.
  792. func ReadZip(f *zip.ReadCloser) (*File, error) {
  793. defer f.Close()
  794. return ReadZipReader(&f.Reader)
  795. }
  796. // ReadZipReader() can be used to read an XLSX in memory without
  797. // touching the filesystem.
  798. func ReadZipReader(r *zip.Reader) (*File, error) {
  799. var err error
  800. var file *File
  801. var reftable *RefTable
  802. var sharedStrings *zip.File
  803. var sheetXMLMap map[string]string
  804. var sheetsByName map[string]*Sheet
  805. var sheets []*Sheet
  806. var style *xlsxStyleSheet
  807. var styles *zip.File
  808. var themeFile *zip.File
  809. var v *zip.File
  810. var workbook *zip.File
  811. var workbookRels *zip.File
  812. var worksheets map[string]*zip.File
  813. file = NewFile()
  814. // file.numFmtRefTable = make(map[int]xlsxNumFmt, 1)
  815. worksheets = make(map[string]*zip.File, len(r.File))
  816. for _, v = range r.File {
  817. switch v.Name {
  818. case "xl/sharedStrings.xml":
  819. sharedStrings = v
  820. case "xl/workbook.xml":
  821. workbook = v
  822. case "xl/_rels/workbook.xml.rels":
  823. workbookRels = v
  824. case "xl/styles.xml":
  825. styles = v
  826. case "xl/theme/theme1.xml":
  827. themeFile = v
  828. default:
  829. if len(v.Name) > 14 {
  830. if v.Name[0:13] == "xl/worksheets" {
  831. worksheets[v.Name[14:len(v.Name)-4]] = v
  832. }
  833. }
  834. }
  835. }
  836. if workbookRels == nil {
  837. return nil, fmt.Errorf("xl/_rels/workbook.xml.rels not found in input xlsx.")
  838. }
  839. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  840. if err != nil {
  841. return nil, err
  842. }
  843. if len(worksheets) == 0 {
  844. return nil, fmt.Errorf("Input xlsx contains no worksheets.")
  845. }
  846. file.worksheets = worksheets
  847. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  848. if err != nil {
  849. return nil, err
  850. }
  851. file.referenceTable = reftable
  852. if themeFile != nil {
  853. theme, err := readThemeFromZipFile(themeFile)
  854. if err != nil {
  855. return nil, err
  856. }
  857. file.theme = theme
  858. }
  859. if styles != nil {
  860. style, err = readStylesFromZipFile(styles, file.theme)
  861. if err != nil {
  862. return nil, err
  863. }
  864. file.styles = style
  865. }
  866. sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
  867. if err != nil {
  868. return nil, err
  869. }
  870. if sheets == nil {
  871. readerErr := new(XLSXReaderError)
  872. readerErr.Err = "No sheets found in XLSX File"
  873. return nil, readerErr
  874. }
  875. file.Sheet = sheetsByName
  876. file.Sheets = sheets
  877. return file, nil
  878. }