lib.go 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "bytes"
  5. "encoding/xml"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "path"
  10. "strconv"
  11. "strings"
  12. )
  13. const (
  14. sheetEnding = `</sheetData></worksheet>`
  15. )
  16. // XLSXReaderError is the standard error type for otherwise undefined
  17. // errors in the XSLX reading process.
  18. type XLSXReaderError struct {
  19. Err string
  20. }
  21. // Error returns a string value from an XLSXReaderError struct in order
  22. // that it might comply with the builtin.error interface.
  23. func (e *XLSXReaderError) Error() string {
  24. return e.Err
  25. }
  26. // getRangeFromString is an internal helper function that converts
  27. // XLSX internal range syntax to a pair of integers. For example,
  28. // the range string "1:3" yield the upper and lower integers 1 and 3.
  29. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  30. var parts []string
  31. parts = strings.SplitN(rangeString, ":", 2)
  32. if parts[0] == "" {
  33. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  34. }
  35. if parts[1] == "" {
  36. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  37. }
  38. lower, error = strconv.Atoi(parts[0])
  39. if error != nil {
  40. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  41. }
  42. upper, error = strconv.Atoi(parts[1])
  43. if error != nil {
  44. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  45. }
  46. return lower, upper, error
  47. }
  48. // ColLettersToIndex is used to convert a character based column
  49. // reference to a zero based numeric column identifier.
  50. func ColLettersToIndex(letters string) int {
  51. sum, mul, n := 0, 1, 0
  52. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  53. c := letters[i]
  54. switch {
  55. case 'A' <= c && c <= 'Z':
  56. n += int(c - 'A')
  57. case 'a' <= c && c <= 'z':
  58. n += int(c - 'a')
  59. }
  60. sum += n * mul
  61. }
  62. return sum
  63. }
  64. // Get the largestDenominator that is a multiple of a basedDenominator
  65. // and fits at least once into a given numerator.
  66. func getLargestDenominator(numerator, multiple, baseDenominator, power int) (int, int) {
  67. if numerator/multiple == 0 {
  68. return 1, power
  69. }
  70. next, nextPower := getLargestDenominator(
  71. numerator, multiple*baseDenominator, baseDenominator, power+1)
  72. if next > multiple {
  73. return next, nextPower
  74. }
  75. return multiple, power
  76. }
  77. // Convers a list of numbers representing a column into a alphabetic
  78. // representation, as used in the spreadsheet.
  79. func formatColumnName(colId []int) string {
  80. lastPart := len(colId) - 1
  81. result := ""
  82. for n, part := range colId {
  83. if n == lastPart {
  84. // The least significant number is in the
  85. // range 0-25, all other numbers are 1-26,
  86. // hence we use a differente offset for the
  87. // last part.
  88. result += string(part + 65)
  89. } else {
  90. // Don't output leading 0s, as there is no
  91. // representation of 0 in this format.
  92. if part > 0 {
  93. result += string(part + 64)
  94. }
  95. }
  96. }
  97. return result
  98. }
  99. func smooshBase26Slice(b26 []int) []int {
  100. // Smoosh values together, eliminating 0s from all but the
  101. // least significant part.
  102. lastButOnePart := len(b26) - 2
  103. for i := lastButOnePart; i > 0; i-- {
  104. part := b26[i]
  105. if part == 0 {
  106. greaterPart := b26[i-1]
  107. if greaterPart > 0 {
  108. b26[i-1] = greaterPart - 1
  109. b26[i] = 26
  110. }
  111. }
  112. }
  113. return b26
  114. }
  115. func intToBase26(x int) (parts []int) {
  116. // Excel column codes are pure evil - in essence they're just
  117. // base26, but they don't represent the number 0.
  118. b26Denominator, _ := getLargestDenominator(x, 1, 26, 0)
  119. // This loop terminates because integer division of 1 / 26
  120. // returns 0.
  121. for d := b26Denominator; d > 0; d = d / 26 {
  122. value := x / d
  123. remainder := x % d
  124. parts = append(parts, value)
  125. x = remainder
  126. }
  127. return parts
  128. }
  129. // ColIndexToLetters is used to convert a zero based, numeric column
  130. // indentifier into a character code.
  131. func ColIndexToLetters(colRef int) string {
  132. parts := intToBase26(colRef)
  133. return formatColumnName(smooshBase26Slice(parts))
  134. }
  135. // letterOnlyMapF is used in conjunction with strings.Map to return
  136. // only the characters A-Z and a-z in a string
  137. func letterOnlyMapF(rune rune) rune {
  138. switch {
  139. case 'A' <= rune && rune <= 'Z':
  140. return rune
  141. case 'a' <= rune && rune <= 'z':
  142. return rune - 32
  143. }
  144. return -1
  145. }
  146. // intOnlyMapF is used in conjunction with strings.Map to return only
  147. // the numeric portions of a string.
  148. func intOnlyMapF(rune rune) rune {
  149. if rune >= 48 && rune < 58 {
  150. return rune
  151. }
  152. return -1
  153. }
  154. // GetCoordsFromCellIDString returns the zero based cartesian
  155. // coordinates from a cell name in Excel format, e.g. the cellIDString
  156. // "A1" returns 0, 0 and the "B3" return 1, 2.
  157. func GetCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  158. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  159. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  160. if error != nil {
  161. return x, y, error
  162. }
  163. y -= 1 // Zero based
  164. x = ColLettersToIndex(letterPart)
  165. return x, y, error
  166. }
  167. // GetCellIDStringFromCoords returns the Excel format cell name that
  168. // represents a pair of zero based cartesian coordinates.
  169. func GetCellIDStringFromCoords(x, y int) string {
  170. letterPart := ColIndexToLetters(x)
  171. numericPart := y + 1
  172. return fmt.Sprintf("%s%d", letterPart, numericPart)
  173. }
  174. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  175. // and minimum coordinates from the dimension reference embedded in a
  176. // XLSX worksheet. For example, the dimension reference "A1:B2"
  177. // returns "0,0", "1,1".
  178. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  179. var parts []string
  180. parts = strings.Split(ref, ":")
  181. minx, miny, err = GetCoordsFromCellIDString(parts[0])
  182. if err != nil {
  183. return -1, -1, -1, -1, err
  184. }
  185. maxx, maxy, err = GetCoordsFromCellIDString(parts[1])
  186. if err != nil {
  187. return -1, -1, -1, -1, err
  188. }
  189. return
  190. }
  191. // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
  192. // that doesn't have a DimensionRef set. The only case currently
  193. // known where this is true is with XLSX exported from Google Docs.
  194. // This is also true for XLSX files created through the streaming APIs.
  195. func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
  196. // Note, this method could be very slow for large spreadsheets.
  197. var x, y int
  198. var maxVal int
  199. maxVal = int(^uint(0) >> 1)
  200. minx = maxVal
  201. miny = maxVal
  202. maxy = 0
  203. maxx = 0
  204. for _, row := range worksheet.SheetData.Row {
  205. for _, cell := range row.C {
  206. x, y, err = GetCoordsFromCellIDString(cell.R)
  207. if err != nil {
  208. return -1, -1, -1, -1, err
  209. }
  210. if x < minx {
  211. minx = x
  212. }
  213. if x > maxx {
  214. maxx = x
  215. }
  216. if y < miny {
  217. miny = y
  218. }
  219. if y > maxy {
  220. maxy = y
  221. }
  222. }
  223. }
  224. if minx == maxVal {
  225. minx = 0
  226. }
  227. if miny == maxVal {
  228. miny = 0
  229. }
  230. return
  231. }
  232. // makeRowFromSpan will, when given a span expressed as a string,
  233. // return an empty Row large enough to encompass that span and
  234. // populate it with empty cells. All rows start from cell 1 -
  235. // regardless of the lower bound of the span.
  236. func makeRowFromSpan(spans string, sheet *Sheet) *Row {
  237. var error error
  238. var upper int
  239. var row *Row
  240. var cell *Cell
  241. row = new(Row)
  242. row.Sheet = sheet
  243. _, upper, error = getRangeFromString(spans)
  244. if error != nil {
  245. panic(error)
  246. }
  247. error = nil
  248. row.Cells = make([]*Cell, upper)
  249. for i := 0; i < upper; i++ {
  250. cell = new(Cell)
  251. cell.Value = ""
  252. row.Cells[i] = cell
  253. }
  254. return row
  255. }
  256. // makeRowFromRaw returns the Row representation of the xlsxRow.
  257. func makeRowFromRaw(rawrow xlsxRow, sheet *Sheet) *Row {
  258. var upper int
  259. var row *Row
  260. var cell *Cell
  261. row = new(Row)
  262. row.Sheet = sheet
  263. upper = -1
  264. for _, rawcell := range rawrow.C {
  265. if rawcell.R != "" {
  266. x, _, error := GetCoordsFromCellIDString(rawcell.R)
  267. if error != nil {
  268. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  269. }
  270. if x > upper {
  271. upper = x
  272. }
  273. continue
  274. }
  275. upper++
  276. }
  277. upper++
  278. row.OutlineLevel = rawrow.OutlineLevel
  279. row.Cells = make([]*Cell, upper)
  280. for i := 0; i < upper; i++ {
  281. cell = new(Cell)
  282. cell.Value = ""
  283. row.Cells[i] = cell
  284. }
  285. return row
  286. }
  287. func makeEmptyRow(sheet *Sheet) *Row {
  288. row := new(Row)
  289. row.Cells = make([]*Cell, 0)
  290. row.Sheet = sheet
  291. return row
  292. }
  293. type sharedFormula struct {
  294. x, y int
  295. formula string
  296. }
  297. func formulaForCell(rawcell xlsxC, sharedFormulas map[int]sharedFormula) string {
  298. var res string
  299. f := rawcell.F
  300. if f == nil {
  301. return ""
  302. }
  303. if f.T == "shared" {
  304. x, y, err := GetCoordsFromCellIDString(rawcell.R)
  305. if err != nil {
  306. res = f.Content
  307. } else {
  308. if f.Ref != "" {
  309. res = f.Content
  310. sharedFormulas[f.Si] = sharedFormula{x, y, res}
  311. } else {
  312. sharedFormula := sharedFormulas[f.Si]
  313. dx := x - sharedFormula.x
  314. dy := y - sharedFormula.y
  315. orig := []byte(sharedFormula.formula)
  316. var start, end int
  317. var stringLiteral bool
  318. for end = 0; end < len(orig); end++ {
  319. c := orig[end]
  320. if c == '"' {
  321. stringLiteral = !stringLiteral
  322. }
  323. if stringLiteral {
  324. continue // Skip characters in quotes
  325. }
  326. if c >= 'A' && c <= 'Z' || c == '$' {
  327. res += string(orig[start:end])
  328. start = end
  329. end++
  330. foundNum := false
  331. for ; end < len(orig); end++ {
  332. idc := orig[end]
  333. if idc >= '0' && idc <= '9' || idc == '$' {
  334. foundNum = true
  335. } else if idc >= 'A' && idc <= 'Z' {
  336. if foundNum {
  337. break
  338. }
  339. } else {
  340. break
  341. }
  342. }
  343. if foundNum {
  344. cellID := string(orig[start:end])
  345. res += shiftCell(cellID, dx, dy)
  346. start = end
  347. }
  348. }
  349. }
  350. if start < len(orig) {
  351. res += string(orig[start:])
  352. }
  353. }
  354. }
  355. } else {
  356. res = f.Content
  357. }
  358. return strings.Trim(res, " \t\n\r")
  359. }
  360. // shiftCell returns the cell shifted according to dx and dy taking into consideration of absolute
  361. // references with dollar sign ($)
  362. func shiftCell(cellID string, dx, dy int) string {
  363. fx, fy, _ := GetCoordsFromCellIDString(cellID)
  364. // Is fixed column?
  365. fixedCol := strings.Index(cellID, "$") == 0
  366. // Is fixed row?
  367. fixedRow := strings.LastIndex(cellID, "$") > 0
  368. if !fixedCol {
  369. // Shift column
  370. fx += dx
  371. }
  372. if !fixedRow {
  373. // Shift row
  374. fy += dy
  375. }
  376. // New shifted cell
  377. shiftedCellID := GetCellIDStringFromCoords(fx, fy)
  378. if !fixedCol && !fixedRow {
  379. return shiftedCellID
  380. }
  381. // There are absolute references, need to put the $ back into the formula.
  382. letterPart := strings.Map(letterOnlyMapF, shiftedCellID)
  383. numberPart := strings.Map(intOnlyMapF, shiftedCellID)
  384. result := ""
  385. if fixedCol {
  386. result += "$"
  387. }
  388. result += letterPart
  389. if fixedRow {
  390. result += "$"
  391. }
  392. result += numberPart
  393. return result
  394. }
  395. // fillCellData attempts to extract a valid value, usable in
  396. // CSV form from the raw cell value. Note - this is not actually
  397. // general enough - we should support retaining tabs and newlines.
  398. func fillCellData(rawCell xlsxC, refTable *RefTable, sharedFormulas map[int]sharedFormula, cell *Cell) {
  399. val := strings.Trim(rawCell.V, " \t\n\r")
  400. cell.formula = formulaForCell(rawCell, sharedFormulas)
  401. switch rawCell.T {
  402. case "s": // Shared String
  403. cell.cellType = CellTypeString
  404. if val != "" {
  405. ref, err := strconv.Atoi(val)
  406. if err != nil {
  407. panic(err)
  408. }
  409. cell.Value = refTable.ResolveSharedString(ref)
  410. }
  411. case "inlineStr":
  412. cell.cellType = CellTypeInline
  413. fillCellDataFromInlineString(rawCell, cell)
  414. case "b": // Boolean
  415. cell.Value = val
  416. cell.cellType = CellTypeBool
  417. case "e": // Error
  418. cell.Value = val
  419. cell.cellType = CellTypeError
  420. case "str":
  421. // String Formula (special type for cells with formulas that return a string value)
  422. // Unlike the other string cell types, the string is stored directly in the value.
  423. cell.Value = val
  424. cell.cellType = CellTypeStringFormula
  425. case "d": // Date: Cell contains a date in the ISO 8601 format.
  426. cell.Value = val
  427. cell.cellType = CellTypeDate
  428. case "": // Numeric is the default
  429. fallthrough
  430. case "n": // Numeric
  431. cell.Value = val
  432. cell.cellType = CellTypeNumeric
  433. default:
  434. panic(errors.New("invalid cell type"))
  435. }
  436. }
  437. // fillCellDataFromInlineString attempts to get inline string data and put it into a Cell.
  438. func fillCellDataFromInlineString(rawcell xlsxC, cell *Cell) {
  439. cell.Value = ""
  440. if rawcell.Is != nil {
  441. if rawcell.Is.T != "" {
  442. cell.Value = strings.Trim(rawcell.Is.T, " \t\n\r")
  443. } else {
  444. for _, r := range rawcell.Is.R {
  445. cell.Value += r.T
  446. }
  447. }
  448. }
  449. }
  450. // readRowsFromSheet is an internal helper function that extracts the
  451. // rows from a XSLXWorksheet, populates them with Cells and resolves
  452. // the value references from the reference table and stores them in
  453. // the rows and columns.
  454. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLimit int) ([]*Row, []*Col, int, int) {
  455. var rows []*Row
  456. var cols []*Col
  457. var row *Row
  458. var minCol, maxCol, maxRow, colCount, rowCount int
  459. var reftable *RefTable
  460. var err error
  461. var insertRowIndex, insertColIndex int
  462. sharedFormulas := map[int]sharedFormula{}
  463. if len(Worksheet.SheetData.Row) == 0 {
  464. return nil, nil, 0, 0
  465. }
  466. reftable = file.referenceTable
  467. if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, ":")) == 2 && rowLimit == NoRowLimit {
  468. minCol, _, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  469. } else {
  470. minCol, _, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
  471. }
  472. if err != nil {
  473. panic(err.Error())
  474. }
  475. rowCount = maxRow + 1
  476. colCount = maxCol + 1
  477. rows = make([]*Row, rowCount)
  478. cols = make([]*Col, colCount)
  479. for i := range cols {
  480. cols[i] = &Col{
  481. Hidden: false,
  482. }
  483. }
  484. if Worksheet.Cols != nil {
  485. // Columns can apply to a range, for convenience we expand the
  486. // ranges out into individual column definitions.
  487. for _, rawcol := range Worksheet.Cols.Col {
  488. // Note, below, that sometimes column definitions can
  489. // exist outside the defined dimensions of the
  490. // spreadsheet - we deliberately exclude these
  491. // columns.
  492. for i := rawcol.Min; i <= rawcol.Max && i <= colCount; i++ {
  493. col := &Col{
  494. Min: rawcol.Min,
  495. Max: rawcol.Max,
  496. Hidden: rawcol.Hidden,
  497. Width: rawcol.Width,
  498. OutlineLevel: rawcol.OutlineLevel}
  499. cols[i-1] = col
  500. if file.styles != nil {
  501. col.style = file.styles.getStyle(rawcol.Style)
  502. col.numFmt, col.parsedNumFmt = file.styles.getNumberFormat(rawcol.Style)
  503. }
  504. }
  505. }
  506. }
  507. numRows := len(rows)
  508. for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ {
  509. rawrow := Worksheet.SheetData.Row[rowIndex]
  510. // Some spreadsheets will omit blank rows from the
  511. // stored data
  512. for rawrow.R > (insertRowIndex + 1) {
  513. // Put an empty Row into the array
  514. if insertRowIndex < numRows {
  515. rows[insertRowIndex] = makeEmptyRow(sheet)
  516. }
  517. insertRowIndex++
  518. }
  519. // range is not empty and only one range exist
  520. if len(rawrow.Spans) != 0 && strings.Count(rawrow.Spans, ":") == 1 {
  521. row = makeRowFromSpan(rawrow.Spans, sheet)
  522. } else {
  523. row = makeRowFromRaw(rawrow, sheet)
  524. }
  525. row.Hidden = rawrow.Hidden
  526. height, err := strconv.ParseFloat(rawrow.Ht, 64)
  527. if err == nil {
  528. row.Height = height
  529. }
  530. row.isCustom = rawrow.CustomHeight
  531. row.OutlineLevel = rawrow.OutlineLevel
  532. insertColIndex = minCol
  533. for _, rawcell := range rawrow.C {
  534. h, v, err := Worksheet.MergeCells.getExtent(rawcell.R)
  535. if err != nil {
  536. panic(err.Error())
  537. }
  538. x, _, _ := GetCoordsFromCellIDString(rawcell.R)
  539. // K1000000: Prevent panic when the range specified in the spreadsheet
  540. // view exceeds the actual number of columns in the dataset.
  541. // Some spreadsheets will omit blank cells
  542. // from the data.
  543. for x > insertColIndex {
  544. // Put an empty Cell into the array
  545. if insertColIndex < len(row.Cells) {
  546. row.Cells[insertColIndex] = new(Cell)
  547. }
  548. insertColIndex++
  549. }
  550. cellX := insertColIndex
  551. if cellX < len(row.Cells) {
  552. cell := row.Cells[cellX]
  553. cell.HMerge = h
  554. cell.VMerge = v
  555. fillCellData(rawcell, reftable, sharedFormulas, cell)
  556. if file.styles != nil {
  557. cell.style = file.styles.getStyle(rawcell.S)
  558. cell.NumFmt, cell.parsedNumFmt = file.styles.getNumberFormat(rawcell.S)
  559. }
  560. cell.date1904 = file.Date1904
  561. // Cell is considered hidden if the row or the column of this cell is hidden
  562. cell.Hidden = rawrow.Hidden || (len(cols) > cellX && cols[cellX].Hidden)
  563. insertColIndex++
  564. }
  565. }
  566. if len(rows) > insertRowIndex {
  567. rows[insertRowIndex] = row
  568. }
  569. insertRowIndex++
  570. }
  571. // insert trailing empty rows for the rest of the file
  572. for ; insertRowIndex < rowCount; insertRowIndex++ {
  573. rows[insertRowIndex] = makeEmptyRow(sheet)
  574. }
  575. return rows, cols, colCount, rowCount
  576. }
  577. type indexedSheet struct {
  578. Index int
  579. Sheet *Sheet
  580. Error error
  581. }
  582. func readSheetViews(xSheetViews xlsxSheetViews) []SheetView {
  583. if xSheetViews.SheetView == nil || len(xSheetViews.SheetView) == 0 {
  584. return nil
  585. }
  586. sheetViews := []SheetView{}
  587. for _, xSheetView := range xSheetViews.SheetView {
  588. sheetView := SheetView{}
  589. if xSheetView.Pane != nil {
  590. xlsxPane := xSheetView.Pane
  591. pane := &Pane{}
  592. pane.XSplit = xlsxPane.XSplit
  593. pane.YSplit = xlsxPane.YSplit
  594. pane.TopLeftCell = xlsxPane.TopLeftCell
  595. pane.ActivePane = xlsxPane.ActivePane
  596. pane.State = xlsxPane.State
  597. sheetView.Pane = pane
  598. }
  599. sheetViews = append(sheetViews, sheetView)
  600. }
  601. return sheetViews
  602. }
  603. // readSheetFromFile is the logic of converting a xlsxSheet struct
  604. // into a Sheet struct. This work can be done in parallel and so
  605. // readSheetsFromZipFile will spawn an instance of this function per
  606. // sheet and get the results back on the provided channel.
  607. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string, rowLimit int) (errRes error) {
  608. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  609. defer func() {
  610. if e := recover(); e != nil {
  611. switch e.(type) {
  612. case error:
  613. result.Error = e.(error)
  614. errRes = e.(error)
  615. default:
  616. result.Error = errors.New("unexpected error")
  617. }
  618. // The only thing here, is if one close the channel. but its not the case
  619. sc <- result
  620. }
  621. }()
  622. worksheet, err := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap, rowLimit)
  623. if err != nil {
  624. result.Error = err
  625. sc <- result
  626. return err
  627. }
  628. sheet := new(Sheet)
  629. sheet.File = fi
  630. sheet.Rows, sheet.Cols, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi, sheet, rowLimit)
  631. sheet.Hidden = rsheet.State == sheetStateHidden || rsheet.State == sheetStateVeryHidden
  632. sheet.SheetViews = readSheetViews(worksheet.SheetViews)
  633. sheet.SheetFormat.DefaultColWidth = worksheet.SheetFormatPr.DefaultColWidth
  634. sheet.SheetFormat.DefaultRowHeight = worksheet.SheetFormatPr.DefaultRowHeight
  635. sheet.SheetFormat.OutlineLevelCol = worksheet.SheetFormatPr.OutlineLevelCol
  636. sheet.SheetFormat.OutlineLevelRow = worksheet.SheetFormatPr.OutlineLevelRow
  637. if nil != worksheet.DataValidations {
  638. for _, dd := range worksheet.DataValidations.DataValidattion {
  639. sqrefArr := strings.Split(dd.Sqref, " ")
  640. for _, sqref := range sqrefArr {
  641. parts := strings.Split(sqref, ":")
  642. minCol, minRow, err := GetCoordsFromCellIDString(parts[0])
  643. if nil != err {
  644. return fmt.Errorf("data validation %s", err.Error())
  645. }
  646. if 2 == len(parts) {
  647. maxCol, maxRow, err := GetCoordsFromCellIDString(parts[1])
  648. if nil != err {
  649. return fmt.Errorf("data validation %s", err.Error())
  650. }
  651. if minCol == maxCol && minRow == maxRow {
  652. newDD := new(xlsxCellDataValidation)
  653. *newDD = *dd
  654. newDD.Sqref = ""
  655. sheet.Cell(minRow, minCol).SetDataValidation(newDD)
  656. } else {
  657. // one col mutli dd , error todo
  658. for i := minCol; i <= maxCol; i++ {
  659. newDD := new(xlsxCellDataValidation)
  660. *newDD = *dd
  661. newDD.Sqref = ""
  662. sheet.Col(i).SetDataValidation(dd, minRow, maxRow)
  663. }
  664. }
  665. } else {
  666. newDD := new(xlsxCellDataValidation)
  667. *newDD = *dd
  668. newDD.Sqref = ""
  669. sheet.Cell(minRow, minCol).SetDataValidation(dd)
  670. }
  671. }
  672. }
  673. }
  674. result.Sheet = sheet
  675. sc <- result
  676. return nil
  677. }
  678. // readSheetsFromZipFile is an internal helper function that loops
  679. // over the Worksheets defined in the XSLXWorkbook and loads them into
  680. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  681. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string, rowLimit int) (map[string]*Sheet, []*Sheet, error) {
  682. var workbook *xlsxWorkbook
  683. var err error
  684. var rc io.ReadCloser
  685. var decoder *xml.Decoder
  686. var sheetCount int
  687. workbook = new(xlsxWorkbook)
  688. rc, err = f.Open()
  689. if err != nil {
  690. return nil, nil, err
  691. }
  692. decoder = xml.NewDecoder(rc)
  693. err = decoder.Decode(workbook)
  694. if err != nil {
  695. return nil, nil, err
  696. }
  697. file.Date1904 = workbook.WorkbookPr.Date1904
  698. for entryNum := range workbook.DefinedNames.DefinedName {
  699. file.DefinedNames = append(file.DefinedNames, &workbook.DefinedNames.DefinedName[entryNum])
  700. }
  701. // Only try and read sheets that have corresponding files.
  702. // Notably this excludes chartsheets don't right now
  703. var workbookSheets []xlsxSheet
  704. for _, sheet := range workbook.Sheets.Sheet {
  705. if f := worksheetFileForSheet(sheet, file.worksheets, sheetXMLMap); f != nil {
  706. workbookSheets = append(workbookSheets, sheet)
  707. }
  708. }
  709. sheetCount = len(workbookSheets)
  710. sheetsByName := make(map[string]*Sheet, sheetCount)
  711. sheets := make([]*Sheet, sheetCount)
  712. sheetChan := make(chan *indexedSheet, sheetCount)
  713. go func() {
  714. defer close(sheetChan)
  715. err = nil
  716. for i, rawsheet := range workbookSheets {
  717. if err := readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap, rowLimit); err != nil {
  718. return
  719. }
  720. }
  721. }()
  722. for j := 0; j < sheetCount; j++ {
  723. sheet := <-sheetChan
  724. if sheet.Error != nil {
  725. return nil, nil, sheet.Error
  726. }
  727. sheetName := workbookSheets[sheet.Index].Name
  728. sheetsByName[sheetName] = sheet.Sheet
  729. sheet.Sheet.Name = sheetName
  730. sheets[sheet.Index] = sheet.Sheet
  731. }
  732. return sheetsByName, sheets, nil
  733. }
  734. // readSharedStringsFromZipFile() is an internal helper function to
  735. // extract a reference table from the sharedStrings.xml file within
  736. // the XLSX zip file.
  737. func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
  738. var sst *xlsxSST
  739. var error error
  740. var rc io.ReadCloser
  741. var decoder *xml.Decoder
  742. var reftable *RefTable
  743. // In a file with no strings it's possible that
  744. // sharedStrings.xml doesn't exist. In this case the value
  745. // passed as f will be nil.
  746. if f == nil {
  747. return nil, nil
  748. }
  749. rc, error = f.Open()
  750. if error != nil {
  751. return nil, error
  752. }
  753. sst = new(xlsxSST)
  754. decoder = xml.NewDecoder(rc)
  755. error = decoder.Decode(sst)
  756. if error != nil {
  757. return nil, error
  758. }
  759. reftable = MakeSharedStringRefTable(sst)
  760. return reftable, nil
  761. }
  762. // readStylesFromZipFile() is an internal helper function to
  763. // extract a style table from the style.xml file within
  764. // the XLSX zip file.
  765. func readStylesFromZipFile(f *zip.File, theme *theme) (*xlsxStyleSheet, error) {
  766. var style *xlsxStyleSheet
  767. var error error
  768. var rc io.ReadCloser
  769. var decoder *xml.Decoder
  770. rc, error = f.Open()
  771. if error != nil {
  772. return nil, error
  773. }
  774. style = newXlsxStyleSheet(theme)
  775. decoder = xml.NewDecoder(rc)
  776. error = decoder.Decode(style)
  777. if error != nil {
  778. return nil, error
  779. }
  780. buildNumFmtRefTable(style)
  781. return style, nil
  782. }
  783. func buildNumFmtRefTable(style *xlsxStyleSheet) {
  784. for _, numFmt := range style.NumFmts.NumFmt {
  785. // We do this for the side effect of populating the NumFmtRefTable.
  786. style.addNumFmt(numFmt)
  787. }
  788. }
  789. func readThemeFromZipFile(f *zip.File) (*theme, error) {
  790. rc, err := f.Open()
  791. if err != nil {
  792. return nil, err
  793. }
  794. var themeXml xlsxTheme
  795. err = xml.NewDecoder(rc).Decode(&themeXml)
  796. if err != nil {
  797. return nil, err
  798. }
  799. return newTheme(themeXml), nil
  800. }
  801. type WorkBookRels map[string]string
  802. func (w *WorkBookRels) MakeXLSXWorkbookRels() xlsxWorkbookRels {
  803. relCount := len(*w)
  804. xWorkbookRels := xlsxWorkbookRels{}
  805. xWorkbookRels.Relationships = make([]xlsxWorkbookRelation, relCount+3)
  806. for k, v := range *w {
  807. index, err := strconv.Atoi(k[3:])
  808. if err != nil {
  809. panic(err.Error())
  810. }
  811. xWorkbookRels.Relationships[index-1] = xlsxWorkbookRelation{
  812. Id: k,
  813. Target: v,
  814. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"}
  815. }
  816. relCount++
  817. sheetId := fmt.Sprintf("rId%d", relCount)
  818. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  819. Id: sheetId,
  820. Target: "sharedStrings.xml",
  821. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"}
  822. relCount++
  823. sheetId = fmt.Sprintf("rId%d", relCount)
  824. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  825. Id: sheetId,
  826. Target: "theme/theme1.xml",
  827. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"}
  828. relCount++
  829. sheetId = fmt.Sprintf("rId%d", relCount)
  830. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  831. Id: sheetId,
  832. Target: "styles.xml",
  833. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"}
  834. return xWorkbookRels
  835. }
  836. // readWorkbookRelationsFromZipFile is an internal helper function to
  837. // extract a map of relationship ID strings to the name of the
  838. // worksheet.xml file they refer to. The resulting map can be used to
  839. // reliably derefence the worksheets in the XLSX file.
  840. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (WorkBookRels, error) {
  841. var sheetXMLMap WorkBookRels
  842. var wbRelationships *xlsxWorkbookRels
  843. var rc io.ReadCloser
  844. var decoder *xml.Decoder
  845. var err error
  846. rc, err = workbookRels.Open()
  847. if err != nil {
  848. return nil, err
  849. }
  850. decoder = xml.NewDecoder(rc)
  851. wbRelationships = new(xlsxWorkbookRels)
  852. err = decoder.Decode(wbRelationships)
  853. if err != nil {
  854. return nil, err
  855. }
  856. sheetXMLMap = make(WorkBookRels)
  857. for _, rel := range wbRelationships.Relationships {
  858. if strings.HasSuffix(rel.Target, ".xml") && rel.Type == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" {
  859. _, filename := path.Split(rel.Target)
  860. sheetXMLMap[rel.Id] = strings.Replace(filename, ".xml", "", 1)
  861. }
  862. }
  863. return sheetXMLMap, nil
  864. }
  865. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  866. // xlsx.File struct populated with its contents. In most cases
  867. // ReadZip is not used directly, but is called internally by OpenFile.
  868. func ReadZip(f *zip.ReadCloser) (*File, error) {
  869. return ReadZipWithRowLimit(f, NoRowLimit)
  870. }
  871. // ReadZipWithRowLimit() takes a pointer to a zip.ReadCloser and returns a
  872. // xlsx.File struct populated with its contents. In most cases
  873. // ReadZip is not used directly, but is called internally by OpenFile.
  874. func ReadZipWithRowLimit(f *zip.ReadCloser, rowLimit int) (*File, error) {
  875. defer f.Close()
  876. return ReadZipReaderWithRowLimit(&f.Reader, rowLimit)
  877. }
  878. // ReadZipReader() can be used to read an XLSX in memory without
  879. // touching the filesystem.
  880. func ReadZipReader(r *zip.Reader) (*File, error) {
  881. return ReadZipReaderWithRowLimit(r, NoRowLimit)
  882. }
  883. // ReadZipReaderWithRowLimit() can be used to read an XLSX in memory without
  884. // touching the filesystem.
  885. // rowLimit is the number of rows that should be read from the file. If rowLimit is -1, no limit is applied.
  886. // You can specify this with the constant NoRowLimit.
  887. func ReadZipReaderWithRowLimit(r *zip.Reader, rowLimit int) (*File, error) {
  888. var err error
  889. var file *File
  890. var reftable *RefTable
  891. var sharedStrings *zip.File
  892. var sheetXMLMap map[string]string
  893. var sheetsByName map[string]*Sheet
  894. var sheets []*Sheet
  895. var style *xlsxStyleSheet
  896. var styles *zip.File
  897. var themeFile *zip.File
  898. var v *zip.File
  899. var workbook *zip.File
  900. var workbookRels *zip.File
  901. var worksheets map[string]*zip.File
  902. file = NewFile()
  903. // file.numFmtRefTable = make(map[int]xlsxNumFmt, 1)
  904. worksheets = make(map[string]*zip.File, len(r.File))
  905. for _, v = range r.File {
  906. switch v.Name {
  907. case "xl/sharedStrings.xml":
  908. sharedStrings = v
  909. case "xl/workbook.xml":
  910. workbook = v
  911. case "xl/_rels/workbook.xml.rels":
  912. workbookRels = v
  913. case "xl/styles.xml":
  914. styles = v
  915. case "xl/theme/theme1.xml":
  916. themeFile = v
  917. default:
  918. if len(v.Name) > 17 {
  919. if v.Name[0:13] == "xl/worksheets" {
  920. worksheets[v.Name[14:len(v.Name)-4]] = v
  921. }
  922. }
  923. }
  924. }
  925. if workbookRels == nil {
  926. return nil, fmt.Errorf("xl/_rels/workbook.xml.rels not found in input xlsx.")
  927. }
  928. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  929. if err != nil {
  930. return nil, err
  931. }
  932. if len(worksheets) == 0 {
  933. return nil, fmt.Errorf("Input xlsx contains no worksheets.")
  934. }
  935. file.worksheets = worksheets
  936. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  937. if err != nil {
  938. return nil, err
  939. }
  940. file.referenceTable = reftable
  941. if themeFile != nil {
  942. theme, err := readThemeFromZipFile(themeFile)
  943. if err != nil {
  944. return nil, err
  945. }
  946. file.theme = theme
  947. }
  948. if styles != nil {
  949. style, err = readStylesFromZipFile(styles, file.theme)
  950. if err != nil {
  951. return nil, err
  952. }
  953. file.styles = style
  954. }
  955. sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap, rowLimit)
  956. if err != nil {
  957. return nil, err
  958. }
  959. if sheets == nil {
  960. readerErr := new(XLSXReaderError)
  961. readerErr.Err = "No sheets found in XLSX File"
  962. return nil, readerErr
  963. }
  964. file.Sheet = sheetsByName
  965. file.Sheets = sheets
  966. return file, nil
  967. }
  968. // truncateSheetXML will take in a reader to an XML sheet file and will return a reader that will read an equivalent
  969. // XML sheet file with only the number of rows specified. This greatly speeds up XML unmarshalling when only
  970. // a few rows need to be read from a large sheet.
  971. // When sheets are truncated, all formatting present after the sheetData tag will be lost, but all of this formatting
  972. // is related to printing and visibility, and is out of scope for most purposes of this library.
  973. func truncateSheetXML(r io.Reader, rowLimit int) (io.Reader, error) {
  974. var rowCount int
  975. var token xml.Token
  976. var readErr error
  977. output := new(bytes.Buffer)
  978. r = io.TeeReader(r, output)
  979. decoder := xml.NewDecoder(r)
  980. for {
  981. token, readErr = decoder.Token()
  982. if readErr == io.EOF {
  983. break
  984. } else if readErr != nil {
  985. return nil, readErr
  986. }
  987. end, ok := token.(xml.EndElement)
  988. if ok && end.Name.Local == "row" {
  989. rowCount++
  990. if rowCount >= rowLimit {
  991. break
  992. }
  993. }
  994. }
  995. offset := decoder.InputOffset()
  996. output.Truncate(int(offset))
  997. if readErr != io.EOF {
  998. _, err := output.Write([]byte(sheetEnding))
  999. if err != nil {
  1000. return nil, err
  1001. }
  1002. }
  1003. return output, nil
  1004. }