lib.go 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "bytes"
  5. "encoding/xml"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "path"
  10. "strconv"
  11. "strings"
  12. )
  13. const (
  14. sheetEnding = `</sheetData></worksheet>`
  15. fixedCellRefChar = "$"
  16. cellRangeChar = ":"
  17. externalSheetBangChar = "!"
  18. )
  19. // XLSXReaderError is the standard error type for otherwise undefined
  20. // errors in the XSLX reading process.
  21. type XLSXReaderError struct {
  22. Err string
  23. }
  24. // Error returns a string value from an XLSXReaderError struct in order
  25. // that it might comply with the builtin.error interface.
  26. func (e *XLSXReaderError) Error() string {
  27. return e.Err
  28. }
  29. // getRangeFromString is an internal helper function that converts
  30. // XLSX internal range syntax to a pair of integers. For example,
  31. // the range string "1:3" yield the upper and lower integers 1 and 3.
  32. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  33. var parts []string
  34. parts = strings.SplitN(rangeString, cellRangeChar, 2)
  35. if parts[0] == "" {
  36. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  37. }
  38. if parts[1] == "" {
  39. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  40. }
  41. lower, error = strconv.Atoi(parts[0])
  42. if error != nil {
  43. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  44. }
  45. upper, error = strconv.Atoi(parts[1])
  46. if error != nil {
  47. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  48. }
  49. return lower, upper, error
  50. }
  51. // ColLettersToIndex is used to convert a character based column
  52. // reference to a zero based numeric column identifier.
  53. func ColLettersToIndex(letters string) int {
  54. sum, mul, n := 0, 1, 0
  55. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  56. c := letters[i]
  57. switch {
  58. case 'A' <= c && c <= 'Z':
  59. n += int(c - 'A')
  60. case 'a' <= c && c <= 'z':
  61. n += int(c - 'a')
  62. }
  63. sum += n * mul
  64. }
  65. return sum
  66. }
  67. // Get the largestDenominator that is a multiple of a basedDenominator
  68. // and fits at least once into a given numerator.
  69. func getLargestDenominator(numerator, multiple, baseDenominator, power int) (int, int) {
  70. if numerator/multiple == 0 {
  71. return 1, power
  72. }
  73. next, nextPower := getLargestDenominator(
  74. numerator, multiple*baseDenominator, baseDenominator, power+1)
  75. if next > multiple {
  76. return next, nextPower
  77. }
  78. return multiple, power
  79. }
  80. // Convers a list of numbers representing a column into a alphabetic
  81. // representation, as used in the spreadsheet.
  82. func formatColumnName(colId []int) string {
  83. lastPart := len(colId) - 1
  84. result := ""
  85. for n, part := range colId {
  86. if n == lastPart {
  87. // The least significant number is in the
  88. // range 0-25, all other numbers are 1-26,
  89. // hence we use a differente offset for the
  90. // last part.
  91. result += string(part + 65)
  92. } else {
  93. // Don't output leading 0s, as there is no
  94. // representation of 0 in this format.
  95. if part > 0 {
  96. result += string(part + 64)
  97. }
  98. }
  99. }
  100. return result
  101. }
  102. func smooshBase26Slice(b26 []int) []int {
  103. // Smoosh values together, eliminating 0s from all but the
  104. // least significant part.
  105. lastButOnePart := len(b26) - 2
  106. for i := lastButOnePart; i > 0; i-- {
  107. part := b26[i]
  108. if part == 0 {
  109. greaterPart := b26[i-1]
  110. if greaterPart > 0 {
  111. b26[i-1] = greaterPart - 1
  112. b26[i] = 26
  113. }
  114. }
  115. }
  116. return b26
  117. }
  118. func intToBase26(x int) (parts []int) {
  119. // Excel column codes are pure evil - in essence they're just
  120. // base26, but they don't represent the number 0.
  121. b26Denominator, _ := getLargestDenominator(x, 1, 26, 0)
  122. // This loop terminates because integer division of 1 / 26
  123. // returns 0.
  124. for d := b26Denominator; d > 0; d = d / 26 {
  125. value := x / d
  126. remainder := x % d
  127. parts = append(parts, value)
  128. x = remainder
  129. }
  130. return parts
  131. }
  132. // ColIndexToLetters is used to convert a zero based, numeric column
  133. // indentifier into a character code.
  134. func ColIndexToLetters(colRef int) string {
  135. parts := intToBase26(colRef)
  136. return formatColumnName(smooshBase26Slice(parts))
  137. }
  138. // RowIndexToString is used to convert a zero based, numeric row
  139. // indentifier into its string representation.
  140. func RowIndexToString(rowRef int) string {
  141. return strconv.Itoa(rowRef + 1)
  142. }
  143. // letterOnlyMapF is used in conjunction with strings.Map to return
  144. // only the characters A-Z and a-z in a string
  145. func letterOnlyMapF(rune rune) rune {
  146. switch {
  147. case 'A' <= rune && rune <= 'Z':
  148. return rune
  149. case 'a' <= rune && rune <= 'z':
  150. return rune - 32
  151. }
  152. return -1
  153. }
  154. // intOnlyMapF is used in conjunction with strings.Map to return only
  155. // the numeric portions of a string.
  156. func intOnlyMapF(rune rune) rune {
  157. if rune >= 48 && rune < 58 {
  158. return rune
  159. }
  160. return -1
  161. }
  162. // GetCoordsFromCellIDString returns the zero based cartesian
  163. // coordinates from a cell name in Excel format, e.g. the cellIDString
  164. // "A1" returns 0, 0 and the "B3" return 1, 2.
  165. func GetCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  166. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  167. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  168. if error != nil {
  169. return x, y, error
  170. }
  171. y -= 1 // Zero based
  172. x = ColLettersToIndex(letterPart)
  173. return x, y, error
  174. }
  175. // GetCellIDStringFromCoords returns the Excel format cell name that
  176. // represents a pair of zero based cartesian coordinates.
  177. func GetCellIDStringFromCoords(x, y int) string {
  178. return GetCellIDStringFromCoordsWithFixed(x, y, false, false)
  179. }
  180. // GetCellIDStringFromCoordsWithFixed returns the Excel format cell name that
  181. // represents a pair of zero based cartesian coordinates.
  182. // It can specify either value as fixed.
  183. func GetCellIDStringFromCoordsWithFixed(x, y int, xFixed, yFixed bool) string {
  184. xStr := ColIndexToLetters(x)
  185. if xFixed {
  186. xStr = fixedCellRefChar + xStr
  187. }
  188. yStr := RowIndexToString(y)
  189. if yFixed {
  190. yStr = fixedCellRefChar + yStr
  191. }
  192. return xStr + yStr
  193. }
  194. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  195. // and minimum coordinates from the dimension reference embedded in a
  196. // XLSX worksheet. For example, the dimension reference "A1:B2"
  197. // returns "0,0", "1,1".
  198. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  199. var parts []string
  200. parts = strings.Split(ref, cellRangeChar)
  201. minx, miny, err = GetCoordsFromCellIDString(parts[0])
  202. if err != nil {
  203. return -1, -1, -1, -1, err
  204. }
  205. maxx, maxy, err = GetCoordsFromCellIDString(parts[1])
  206. if err != nil {
  207. return -1, -1, -1, -1, err
  208. }
  209. return
  210. }
  211. // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
  212. // that doesn't have a DimensionRef set. The only case currently
  213. // known where this is true is with XLSX exported from Google Docs.
  214. // This is also true for XLSX files created through the streaming APIs.
  215. func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
  216. // Note, this method could be very slow for large spreadsheets.
  217. var x, y int
  218. var maxVal int
  219. maxVal = int(^uint(0) >> 1)
  220. minx = maxVal
  221. miny = maxVal
  222. maxy = 0
  223. maxx = 0
  224. for _, row := range worksheet.SheetData.Row {
  225. for _, cell := range row.C {
  226. x, y, err = GetCoordsFromCellIDString(cell.R)
  227. if err != nil {
  228. return -1, -1, -1, -1, err
  229. }
  230. if x < minx {
  231. minx = x
  232. }
  233. if x > maxx {
  234. maxx = x
  235. }
  236. if y < miny {
  237. miny = y
  238. }
  239. if y > maxy {
  240. maxy = y
  241. }
  242. }
  243. }
  244. if minx == maxVal {
  245. minx = 0
  246. }
  247. if miny == maxVal {
  248. miny = 0
  249. }
  250. return
  251. }
  252. // makeRowFromSpan will, when given a span expressed as a string,
  253. // return an empty Row large enough to encompass that span and
  254. // populate it with empty cells. All rows start from cell 1 -
  255. // regardless of the lower bound of the span.
  256. func makeRowFromSpan(spans string, sheet *Sheet) *Row {
  257. var error error
  258. var upper int
  259. var row *Row
  260. var cell *Cell
  261. row = new(Row)
  262. row.Sheet = sheet
  263. _, upper, error = getRangeFromString(spans)
  264. if error != nil {
  265. panic(error)
  266. }
  267. error = nil
  268. row.Cells = make([]*Cell, upper)
  269. for i := 0; i < upper; i++ {
  270. cell = new(Cell)
  271. cell.Value = ""
  272. row.Cells[i] = cell
  273. }
  274. return row
  275. }
  276. // makeRowFromRaw returns the Row representation of the xlsxRow.
  277. func makeRowFromRaw(rawrow xlsxRow, sheet *Sheet) *Row {
  278. var upper int
  279. var row *Row
  280. var cell *Cell
  281. row = new(Row)
  282. row.Sheet = sheet
  283. upper = -1
  284. for _, rawcell := range rawrow.C {
  285. if rawcell.R != "" {
  286. x, _, error := GetCoordsFromCellIDString(rawcell.R)
  287. if error != nil {
  288. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  289. }
  290. if x > upper {
  291. upper = x
  292. }
  293. continue
  294. }
  295. upper++
  296. }
  297. upper++
  298. row.OutlineLevel = rawrow.OutlineLevel
  299. row.Cells = make([]*Cell, upper)
  300. for i := 0; i < upper; i++ {
  301. cell = new(Cell)
  302. cell.Value = ""
  303. row.Cells[i] = cell
  304. }
  305. return row
  306. }
  307. func makeEmptyRow(sheet *Sheet) *Row {
  308. row := new(Row)
  309. row.Cells = make([]*Cell, 0)
  310. row.Sheet = sheet
  311. return row
  312. }
  313. type sharedFormula struct {
  314. x, y int
  315. formula string
  316. }
  317. func formulaForCell(rawcell xlsxC, sharedFormulas map[int]sharedFormula) string {
  318. var res string
  319. f := rawcell.F
  320. if f == nil {
  321. return ""
  322. }
  323. if f.T == "shared" {
  324. x, y, err := GetCoordsFromCellIDString(rawcell.R)
  325. if err != nil {
  326. res = f.Content
  327. } else {
  328. if f.Ref != "" {
  329. res = f.Content
  330. sharedFormulas[f.Si] = sharedFormula{x, y, res}
  331. } else {
  332. sharedFormula := sharedFormulas[f.Si]
  333. dx := x - sharedFormula.x
  334. dy := y - sharedFormula.y
  335. orig := []byte(sharedFormula.formula)
  336. var start, end int
  337. var stringLiteral bool
  338. for end = 0; end < len(orig); end++ {
  339. c := orig[end]
  340. if c == '"' {
  341. stringLiteral = !stringLiteral
  342. }
  343. if stringLiteral {
  344. continue // Skip characters in quotes
  345. }
  346. if c >= 'A' && c <= 'Z' || c == '$' {
  347. res += string(orig[start:end])
  348. start = end
  349. end++
  350. foundNum := false
  351. for ; end < len(orig); end++ {
  352. idc := orig[end]
  353. if idc >= '0' && idc <= '9' || idc == '$' {
  354. foundNum = true
  355. } else if idc >= 'A' && idc <= 'Z' {
  356. if foundNum {
  357. break
  358. }
  359. } else {
  360. break
  361. }
  362. }
  363. if foundNum {
  364. cellID := string(orig[start:end])
  365. res += shiftCell(cellID, dx, dy)
  366. start = end
  367. }
  368. }
  369. }
  370. if start < len(orig) {
  371. res += string(orig[start:])
  372. }
  373. }
  374. }
  375. } else {
  376. res = f.Content
  377. }
  378. return strings.Trim(res, " \t\n\r")
  379. }
  380. // shiftCell returns the cell shifted according to dx and dy taking into consideration of absolute
  381. // references with dollar sign ($)
  382. func shiftCell(cellID string, dx, dy int) string {
  383. fx, fy, _ := GetCoordsFromCellIDString(cellID)
  384. // Is fixed column?
  385. fixedCol := strings.Index(cellID, fixedCellRefChar) == 0
  386. // Is fixed row?
  387. fixedRow := strings.LastIndex(cellID, fixedCellRefChar) > 0
  388. if !fixedCol {
  389. // Shift column
  390. fx += dx
  391. }
  392. if !fixedRow {
  393. // Shift row
  394. fy += dy
  395. }
  396. // New shifted cell
  397. shiftedCellID := GetCellIDStringFromCoords(fx, fy)
  398. if !fixedCol && !fixedRow {
  399. return shiftedCellID
  400. }
  401. // There are absolute references, need to put the $ back into the formula.
  402. letterPart := strings.Map(letterOnlyMapF, shiftedCellID)
  403. numberPart := strings.Map(intOnlyMapF, shiftedCellID)
  404. result := ""
  405. if fixedCol {
  406. result += "$"
  407. }
  408. result += letterPart
  409. if fixedRow {
  410. result += "$"
  411. }
  412. result += numberPart
  413. return result
  414. }
  415. // fillCellData attempts to extract a valid value, usable in
  416. // CSV form from the raw cell value. Note - this is not actually
  417. // general enough - we should support retaining tabs and newlines.
  418. func fillCellData(rawCell xlsxC, refTable *RefTable, sharedFormulas map[int]sharedFormula, cell *Cell) {
  419. val := strings.Trim(rawCell.V, " \t\n\r")
  420. cell.formula = formulaForCell(rawCell, sharedFormulas)
  421. switch rawCell.T {
  422. case "s": // Shared String
  423. cell.cellType = CellTypeString
  424. if val != "" {
  425. ref, err := strconv.Atoi(val)
  426. if err != nil {
  427. panic(err)
  428. }
  429. cell.Value = refTable.ResolveSharedString(ref)
  430. }
  431. case "inlineStr":
  432. cell.cellType = CellTypeInline
  433. fillCellDataFromInlineString(rawCell, cell)
  434. case "b": // Boolean
  435. cell.Value = val
  436. cell.cellType = CellTypeBool
  437. case "e": // Error
  438. cell.Value = val
  439. cell.cellType = CellTypeError
  440. case "str":
  441. // String Formula (special type for cells with formulas that return a string value)
  442. // Unlike the other string cell types, the string is stored directly in the value.
  443. cell.Value = val
  444. cell.cellType = CellTypeStringFormula
  445. case "d": // Date: Cell contains a date in the ISO 8601 format.
  446. cell.Value = val
  447. cell.cellType = CellTypeDate
  448. case "": // Numeric is the default
  449. fallthrough
  450. case "n": // Numeric
  451. cell.Value = val
  452. cell.cellType = CellTypeNumeric
  453. default:
  454. panic(errors.New("invalid cell type"))
  455. }
  456. }
  457. // fillCellDataFromInlineString attempts to get inline string data and put it into a Cell.
  458. func fillCellDataFromInlineString(rawcell xlsxC, cell *Cell) {
  459. cell.Value = ""
  460. if rawcell.Is != nil {
  461. if rawcell.Is.T != "" {
  462. cell.Value = strings.Trim(rawcell.Is.T, " \t\n\r")
  463. } else {
  464. for _, r := range rawcell.Is.R {
  465. cell.Value += r.T
  466. }
  467. }
  468. }
  469. }
  470. // readRowsFromSheet is an internal helper function that extracts the
  471. // rows from a XSLXWorksheet, populates them with Cells and resolves
  472. // the value references from the reference table and stores them in
  473. // the rows and columns.
  474. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLimit int) ([]*Row, *ColStore, int, int) {
  475. var rows []*Row
  476. var cols *ColStore
  477. var row *Row
  478. var minCol, maxCol, maxRow, colCount, rowCount int
  479. var reftable *RefTable
  480. var err error
  481. var insertRowIndex, insertColIndex int
  482. sharedFormulas := map[int]sharedFormula{}
  483. if len(Worksheet.SheetData.Row) == 0 {
  484. return nil, nil, 0, 0
  485. }
  486. reftable = file.referenceTable
  487. if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, cellRangeChar)) == 2 && rowLimit == NoRowLimit {
  488. minCol, _, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  489. } else {
  490. minCol, _, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
  491. }
  492. if err != nil {
  493. panic(err.Error())
  494. }
  495. rowCount = maxRow + 1
  496. colCount = maxCol + 1
  497. rows = make([]*Row, rowCount)
  498. cols = &ColStore{}
  499. if Worksheet.Cols != nil {
  500. // Columns can apply to a range, for convenience we expand the
  501. // ranges out into individual column definitions.
  502. for _, rawcol := range Worksheet.Cols.Col {
  503. col := &Col{
  504. Min: rawcol.Min,
  505. Max: rawcol.Max,
  506. Hidden: rawcol.Hidden,
  507. Width: rawcol.Width,
  508. OutlineLevel: rawcol.OutlineLevel,
  509. BestFit: rawcol.BestFit,
  510. CustomWidth: rawcol.CustomWidth,
  511. Phonetic: rawcol.Phonetic,
  512. Collapsed: rawcol.Collapsed,
  513. }
  514. if file.styles != nil {
  515. col.style = file.styles.getStyle(rawcol.Style)
  516. col.numFmt, col.parsedNumFmt = file.styles.getNumberFormat(rawcol.Style)
  517. }
  518. cols.Add(col)
  519. }
  520. }
  521. numRows := len(rows)
  522. for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ {
  523. rawrow := Worksheet.SheetData.Row[rowIndex]
  524. // Some spreadsheets will omit blank rows from the
  525. // stored data
  526. for rawrow.R > (insertRowIndex + 1) {
  527. // Put an empty Row into the array
  528. if insertRowIndex < numRows {
  529. rows[insertRowIndex] = makeEmptyRow(sheet)
  530. }
  531. insertRowIndex++
  532. }
  533. // range is not empty and only one range exist
  534. if len(rawrow.Spans) != 0 && strings.Count(rawrow.Spans, cellRangeChar) == 1 {
  535. row = makeRowFromSpan(rawrow.Spans, sheet)
  536. } else {
  537. row = makeRowFromRaw(rawrow, sheet)
  538. }
  539. row.Hidden = rawrow.Hidden
  540. height, err := strconv.ParseFloat(rawrow.Ht, 64)
  541. if err == nil {
  542. row.Height = height
  543. }
  544. row.isCustom = rawrow.CustomHeight
  545. row.OutlineLevel = rawrow.OutlineLevel
  546. insertColIndex = minCol
  547. for _, rawcell := range rawrow.C {
  548. h, v, err := Worksheet.MergeCells.getExtent(rawcell.R)
  549. if err != nil {
  550. panic(err.Error())
  551. }
  552. x, _, _ := GetCoordsFromCellIDString(rawcell.R)
  553. // K1000000: Prevent panic when the range specified in the spreadsheet
  554. // view exceeds the actual number of columns in the dataset.
  555. // Some spreadsheets will omit blank cells
  556. // from the data.
  557. for x > insertColIndex {
  558. // Put an empty Cell into the array
  559. if insertColIndex < len(row.Cells) {
  560. row.Cells[insertColIndex] = new(Cell)
  561. }
  562. insertColIndex++
  563. }
  564. cellX := insertColIndex
  565. if cellX < len(row.Cells) {
  566. cell := row.Cells[cellX]
  567. cell.HMerge = h
  568. cell.VMerge = v
  569. fillCellData(rawcell, reftable, sharedFormulas, cell)
  570. if file.styles != nil {
  571. cell.style = file.styles.getStyle(rawcell.S)
  572. cell.NumFmt, cell.parsedNumFmt = file.styles.getNumberFormat(rawcell.S)
  573. }
  574. cell.date1904 = file.Date1904
  575. // Cell is considered hidden if the row or the column of this cell is hidden
  576. //
  577. col := cols.FindColByIndex(cellX + 1)
  578. cell.Hidden = rawrow.Hidden || (col != nil && col.Hidden)
  579. insertColIndex++
  580. }
  581. }
  582. if len(rows) > insertRowIndex {
  583. rows[insertRowIndex] = row
  584. }
  585. insertRowIndex++
  586. }
  587. // insert trailing empty rows for the rest of the file
  588. for ; insertRowIndex < rowCount; insertRowIndex++ {
  589. rows[insertRowIndex] = makeEmptyRow(sheet)
  590. }
  591. return rows, cols, colCount, rowCount
  592. }
  593. type indexedSheet struct {
  594. Index int
  595. Sheet *Sheet
  596. Error error
  597. }
  598. func readSheetViews(xSheetViews xlsxSheetViews) []SheetView {
  599. if xSheetViews.SheetView == nil || len(xSheetViews.SheetView) == 0 {
  600. return nil
  601. }
  602. sheetViews := []SheetView{}
  603. for _, xSheetView := range xSheetViews.SheetView {
  604. sheetView := SheetView{}
  605. if xSheetView.Pane != nil {
  606. xlsxPane := xSheetView.Pane
  607. pane := &Pane{}
  608. pane.XSplit = xlsxPane.XSplit
  609. pane.YSplit = xlsxPane.YSplit
  610. pane.TopLeftCell = xlsxPane.TopLeftCell
  611. pane.ActivePane = xlsxPane.ActivePane
  612. pane.State = xlsxPane.State
  613. sheetView.Pane = pane
  614. }
  615. sheetViews = append(sheetViews, sheetView)
  616. }
  617. return sheetViews
  618. }
  619. // readSheetFromFile is the logic of converting a xlsxSheet struct
  620. // into a Sheet struct. This work can be done in parallel and so
  621. // readSheetsFromZipFile will spawn an instance of this function per
  622. // sheet and get the results back on the provided channel.
  623. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string, rowLimit int) (errRes error) {
  624. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  625. defer func() {
  626. if e := recover(); e != nil {
  627. switch e.(type) {
  628. case error:
  629. result.Error = e.(error)
  630. errRes = e.(error)
  631. default:
  632. result.Error = errors.New("unexpected error")
  633. }
  634. // The only thing here, is if one close the channel. but its not the case
  635. sc <- result
  636. }
  637. }()
  638. worksheet, err := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap, rowLimit)
  639. if err != nil {
  640. result.Error = err
  641. sc <- result
  642. return err
  643. }
  644. sheet := new(Sheet)
  645. sheet.File = fi
  646. sheet.Rows, sheet.Cols, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi, sheet, rowLimit)
  647. sheet.Hidden = rsheet.State == sheetStateHidden || rsheet.State == sheetStateVeryHidden
  648. sheet.SheetViews = readSheetViews(worksheet.SheetViews)
  649. sheet.SheetFormat.DefaultColWidth = worksheet.SheetFormatPr.DefaultColWidth
  650. sheet.SheetFormat.DefaultRowHeight = worksheet.SheetFormatPr.DefaultRowHeight
  651. sheet.SheetFormat.OutlineLevelCol = worksheet.SheetFormatPr.OutlineLevelCol
  652. sheet.SheetFormat.OutlineLevelRow = worksheet.SheetFormatPr.OutlineLevelRow
  653. if nil != worksheet.DataValidations {
  654. for _, dd := range worksheet.DataValidations.DataValidation {
  655. sqrefArr := strings.Split(dd.Sqref, " ")
  656. for _, sqref := range sqrefArr {
  657. parts := strings.Split(sqref, cellRangeChar)
  658. minCol, minRow, err := GetCoordsFromCellIDString(parts[0])
  659. if nil != err {
  660. return fmt.Errorf("data validation %s", err.Error())
  661. }
  662. if 2 == len(parts) {
  663. maxCol, maxRow, err := GetCoordsFromCellIDString(parts[1])
  664. if nil != err {
  665. return fmt.Errorf("data validation %s", err.Error())
  666. }
  667. if minCol == maxCol && minRow == maxRow {
  668. newDD := new(xlsxCellDataValidation)
  669. *newDD = *dd
  670. newDD.Sqref = ""
  671. sheet.Cell(minRow, minCol).SetDataValidation(newDD)
  672. } else {
  673. // one col mutli dd , error todo
  674. for i := minCol; i <= maxCol; i++ {
  675. newDD := new(xlsxCellDataValidation)
  676. *newDD = *dd
  677. newDD.Sqref = ""
  678. sheet.Col(i).SetDataValidation(dd, minRow, maxRow)
  679. }
  680. }
  681. } else {
  682. newDD := new(xlsxCellDataValidation)
  683. *newDD = *dd
  684. newDD.Sqref = ""
  685. sheet.Cell(minRow, minCol).SetDataValidation(dd)
  686. }
  687. }
  688. }
  689. }
  690. result.Sheet = sheet
  691. sc <- result
  692. return nil
  693. }
  694. // readSheetsFromZipFile is an internal helper function that loops
  695. // over the Worksheets defined in the XSLXWorkbook and loads them into
  696. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  697. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string, rowLimit int) (map[string]*Sheet, []*Sheet, error) {
  698. var workbook *xlsxWorkbook
  699. var err error
  700. var rc io.ReadCloser
  701. var decoder *xml.Decoder
  702. var sheetCount int
  703. workbook = new(xlsxWorkbook)
  704. rc, err = f.Open()
  705. if err != nil {
  706. return nil, nil, err
  707. }
  708. decoder = xml.NewDecoder(rc)
  709. err = decoder.Decode(workbook)
  710. if err != nil {
  711. return nil, nil, err
  712. }
  713. file.Date1904 = workbook.WorkbookPr.Date1904
  714. for entryNum := range workbook.DefinedNames.DefinedName {
  715. file.DefinedNames = append(file.DefinedNames, &workbook.DefinedNames.DefinedName[entryNum])
  716. }
  717. // Only try and read sheets that have corresponding files.
  718. // Notably this excludes chartsheets don't right now
  719. var workbookSheets []xlsxSheet
  720. for _, sheet := range workbook.Sheets.Sheet {
  721. if f := worksheetFileForSheet(sheet, file.worksheets, sheetXMLMap); f != nil {
  722. workbookSheets = append(workbookSheets, sheet)
  723. }
  724. }
  725. sheetCount = len(workbookSheets)
  726. sheetsByName := make(map[string]*Sheet, sheetCount)
  727. sheets := make([]*Sheet, sheetCount)
  728. sheetChan := make(chan *indexedSheet, sheetCount)
  729. go func() {
  730. defer close(sheetChan)
  731. err = nil
  732. for i, rawsheet := range workbookSheets {
  733. if err := readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap, rowLimit); err != nil {
  734. return
  735. }
  736. }
  737. }()
  738. for j := 0; j < sheetCount; j++ {
  739. sheet := <-sheetChan
  740. if sheet.Error != nil {
  741. return nil, nil, sheet.Error
  742. }
  743. sheetName := workbookSheets[sheet.Index].Name
  744. sheetsByName[sheetName] = sheet.Sheet
  745. sheet.Sheet.Name = sheetName
  746. sheets[sheet.Index] = sheet.Sheet
  747. }
  748. return sheetsByName, sheets, nil
  749. }
  750. // readSharedStringsFromZipFile() is an internal helper function to
  751. // extract a reference table from the sharedStrings.xml file within
  752. // the XLSX zip file.
  753. func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
  754. var sst *xlsxSST
  755. var error error
  756. var rc io.ReadCloser
  757. var decoder *xml.Decoder
  758. var reftable *RefTable
  759. // In a file with no strings it's possible that
  760. // sharedStrings.xml doesn't exist. In this case the value
  761. // passed as f will be nil.
  762. if f == nil {
  763. return nil, nil
  764. }
  765. rc, error = f.Open()
  766. if error != nil {
  767. return nil, error
  768. }
  769. sst = new(xlsxSST)
  770. decoder = xml.NewDecoder(rc)
  771. error = decoder.Decode(sst)
  772. if error != nil {
  773. return nil, error
  774. }
  775. reftable = MakeSharedStringRefTable(sst)
  776. return reftable, nil
  777. }
  778. // readStylesFromZipFile() is an internal helper function to
  779. // extract a style table from the style.xml file within
  780. // the XLSX zip file.
  781. func readStylesFromZipFile(f *zip.File, theme *theme) (*xlsxStyleSheet, error) {
  782. var style *xlsxStyleSheet
  783. var error error
  784. var rc io.ReadCloser
  785. var decoder *xml.Decoder
  786. rc, error = f.Open()
  787. if error != nil {
  788. return nil, error
  789. }
  790. style = newXlsxStyleSheet(theme)
  791. decoder = xml.NewDecoder(rc)
  792. error = decoder.Decode(style)
  793. if error != nil {
  794. return nil, error
  795. }
  796. buildNumFmtRefTable(style)
  797. return style, nil
  798. }
  799. func buildNumFmtRefTable(style *xlsxStyleSheet) {
  800. for _, numFmt := range style.NumFmts.NumFmt {
  801. // We do this for the side effect of populating the NumFmtRefTable.
  802. style.addNumFmt(numFmt)
  803. }
  804. }
  805. func readThemeFromZipFile(f *zip.File) (*theme, error) {
  806. rc, err := f.Open()
  807. if err != nil {
  808. return nil, err
  809. }
  810. var themeXml xlsxTheme
  811. err = xml.NewDecoder(rc).Decode(&themeXml)
  812. if err != nil {
  813. return nil, err
  814. }
  815. return newTheme(themeXml), nil
  816. }
  817. type WorkBookRels map[string]string
  818. func (w *WorkBookRels) MakeXLSXWorkbookRels() xlsxWorkbookRels {
  819. relCount := len(*w)
  820. xWorkbookRels := xlsxWorkbookRels{}
  821. xWorkbookRels.Relationships = make([]xlsxWorkbookRelation, relCount+3)
  822. for k, v := range *w {
  823. index, err := strconv.Atoi(k[3:])
  824. if err != nil {
  825. panic(err.Error())
  826. }
  827. xWorkbookRels.Relationships[index-1] = xlsxWorkbookRelation{
  828. Id: k,
  829. Target: v,
  830. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"}
  831. }
  832. relCount++
  833. sheetId := fmt.Sprintf("rId%d", relCount)
  834. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  835. Id: sheetId,
  836. Target: "sharedStrings.xml",
  837. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"}
  838. relCount++
  839. sheetId = fmt.Sprintf("rId%d", relCount)
  840. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  841. Id: sheetId,
  842. Target: "theme/theme1.xml",
  843. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"}
  844. relCount++
  845. sheetId = fmt.Sprintf("rId%d", relCount)
  846. xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{
  847. Id: sheetId,
  848. Target: "styles.xml",
  849. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"}
  850. return xWorkbookRels
  851. }
  852. // readWorkbookRelationsFromZipFile is an internal helper function to
  853. // extract a map of relationship ID strings to the name of the
  854. // worksheet.xml file they refer to. The resulting map can be used to
  855. // reliably derefence the worksheets in the XLSX file.
  856. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (WorkBookRels, error) {
  857. var sheetXMLMap WorkBookRels
  858. var wbRelationships *xlsxWorkbookRels
  859. var rc io.ReadCloser
  860. var decoder *xml.Decoder
  861. var err error
  862. rc, err = workbookRels.Open()
  863. if err != nil {
  864. return nil, err
  865. }
  866. decoder = xml.NewDecoder(rc)
  867. wbRelationships = new(xlsxWorkbookRels)
  868. err = decoder.Decode(wbRelationships)
  869. if err != nil {
  870. return nil, err
  871. }
  872. sheetXMLMap = make(WorkBookRels)
  873. for _, rel := range wbRelationships.Relationships {
  874. if strings.HasSuffix(rel.Target, ".xml") && rel.Type == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" {
  875. _, filename := path.Split(rel.Target)
  876. sheetXMLMap[rel.Id] = strings.Replace(filename, ".xml", "", 1)
  877. }
  878. }
  879. return sheetXMLMap, nil
  880. }
  881. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  882. // xlsx.File struct populated with its contents. In most cases
  883. // ReadZip is not used directly, but is called internally by OpenFile.
  884. func ReadZip(f *zip.ReadCloser) (*File, error) {
  885. return ReadZipWithRowLimit(f, NoRowLimit)
  886. }
  887. // ReadZipWithRowLimit() takes a pointer to a zip.ReadCloser and returns a
  888. // xlsx.File struct populated with its contents. In most cases
  889. // ReadZip is not used directly, but is called internally by OpenFile.
  890. func ReadZipWithRowLimit(f *zip.ReadCloser, rowLimit int) (*File, error) {
  891. defer f.Close()
  892. return ReadZipReaderWithRowLimit(&f.Reader, rowLimit)
  893. }
  894. // ReadZipReader() can be used to read an XLSX in memory without
  895. // touching the filesystem.
  896. func ReadZipReader(r *zip.Reader) (*File, error) {
  897. return ReadZipReaderWithRowLimit(r, NoRowLimit)
  898. }
  899. // ReadZipReaderWithRowLimit() can be used to read an XLSX in memory without
  900. // touching the filesystem.
  901. // rowLimit is the number of rows that should be read from the file. If rowLimit is -1, no limit is applied.
  902. // You can specify this with the constant NoRowLimit.
  903. func ReadZipReaderWithRowLimit(r *zip.Reader, rowLimit int) (*File, error) {
  904. var err error
  905. var file *File
  906. var reftable *RefTable
  907. var sharedStrings *zip.File
  908. var sheetXMLMap map[string]string
  909. var sheetsByName map[string]*Sheet
  910. var sheets []*Sheet
  911. var style *xlsxStyleSheet
  912. var styles *zip.File
  913. var themeFile *zip.File
  914. var v *zip.File
  915. var workbook *zip.File
  916. var workbookRels *zip.File
  917. var worksheets map[string]*zip.File
  918. file = NewFile()
  919. // file.numFmtRefTable = make(map[int]xlsxNumFmt, 1)
  920. worksheets = make(map[string]*zip.File, len(r.File))
  921. for _, v = range r.File {
  922. switch v.Name {
  923. case "xl/sharedStrings.xml":
  924. sharedStrings = v
  925. case "xl/workbook.xml":
  926. workbook = v
  927. case "xl/_rels/workbook.xml.rels":
  928. workbookRels = v
  929. case "xl/styles.xml":
  930. styles = v
  931. case "xl/theme/theme1.xml":
  932. themeFile = v
  933. default:
  934. if len(v.Name) > 17 {
  935. if v.Name[0:13] == "xl/worksheets" {
  936. worksheets[v.Name[14:len(v.Name)-4]] = v
  937. }
  938. }
  939. }
  940. }
  941. if workbookRels == nil {
  942. return nil, fmt.Errorf("xl/_rels/workbook.xml.rels not found in input xlsx.")
  943. }
  944. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  945. if err != nil {
  946. return nil, err
  947. }
  948. if len(worksheets) == 0 {
  949. return nil, fmt.Errorf("Input xlsx contains no worksheets.")
  950. }
  951. file.worksheets = worksheets
  952. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  953. if err != nil {
  954. return nil, err
  955. }
  956. file.referenceTable = reftable
  957. if themeFile != nil {
  958. theme, err := readThemeFromZipFile(themeFile)
  959. if err != nil {
  960. return nil, err
  961. }
  962. file.theme = theme
  963. }
  964. if styles != nil {
  965. style, err = readStylesFromZipFile(styles, file.theme)
  966. if err != nil {
  967. return nil, err
  968. }
  969. file.styles = style
  970. }
  971. sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap, rowLimit)
  972. if err != nil {
  973. return nil, err
  974. }
  975. if sheets == nil {
  976. readerErr := new(XLSXReaderError)
  977. readerErr.Err = "No sheets found in XLSX File"
  978. return nil, readerErr
  979. }
  980. file.Sheet = sheetsByName
  981. file.Sheets = sheets
  982. return file, nil
  983. }
  984. // truncateSheetXML will take in a reader to an XML sheet file and will return a reader that will read an equivalent
  985. // XML sheet file with only the number of rows specified. This greatly speeds up XML unmarshalling when only
  986. // a few rows need to be read from a large sheet.
  987. // When sheets are truncated, all formatting present after the sheetData tag will be lost, but all of this formatting
  988. // is related to printing and visibility, and is out of scope for most purposes of this library.
  989. func truncateSheetXML(r io.Reader, rowLimit int) (io.Reader, error) {
  990. var rowCount int
  991. var token xml.Token
  992. var readErr error
  993. output := new(bytes.Buffer)
  994. r = io.TeeReader(r, output)
  995. decoder := xml.NewDecoder(r)
  996. for {
  997. token, readErr = decoder.Token()
  998. if readErr == io.EOF {
  999. break
  1000. } else if readErr != nil {
  1001. return nil, readErr
  1002. }
  1003. end, ok := token.(xml.EndElement)
  1004. if ok && end.Name.Local == "row" {
  1005. rowCount++
  1006. if rowCount >= rowLimit {
  1007. break
  1008. }
  1009. }
  1010. }
  1011. offset := decoder.InputOffset()
  1012. output.Truncate(int(offset))
  1013. if readErr != io.EOF {
  1014. _, err := output.Write([]byte(sheetEnding))
  1015. if err != nil {
  1016. return nil, err
  1017. }
  1018. }
  1019. return output, nil
  1020. }