lib.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. // XLSXReaderError is the standard error type for otherwise undefined
  12. // errors in the XSLX reading process.
  13. type XLSXReaderError struct {
  14. Err string
  15. }
  16. // String() returns a string value from an XLSXReaderError struct in
  17. // order that it might comply with the os.Error interface.
  18. func (e *XLSXReaderError) Error() string {
  19. return e.Err
  20. }
  21. // Cell is a high level structure intended to provide user access to
  22. // the contents of Cell within an xlsx.Row.
  23. type Cell struct {
  24. Value string
  25. styleIndex int
  26. styles *xlsxStyles
  27. }
  28. // CellInterface defines the public API of the Cell.
  29. type CellInterface interface {
  30. String() string
  31. }
  32. // String returns the value of a Cell as a string.
  33. func (c *Cell) String() string {
  34. return c.Value
  35. }
  36. // GetStyle returns the Style associated with a Cell
  37. func (c *Cell) GetStyle() *Style {
  38. style := &Style{}
  39. if c.styleIndex > 0 && c.styleIndex <= len(c.styles.CellXfs) {
  40. xf := c.styles.CellXfs[c.styleIndex-1]
  41. if xf.ApplyBorder {
  42. var border Border
  43. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  44. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  45. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  46. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  47. style.Border = border
  48. }
  49. if xf.ApplyFill {
  50. var fill Fill
  51. fill.PatternType = c.styles.Fills[xf.FillId].PatternFill.PatternType
  52. fill.BgColor = c.styles.Fills[xf.FillId].PatternFill.BgColor.RGB
  53. fill.FgColor = c.styles.Fills[xf.FillId].PatternFill.FgColor.RGB
  54. style.Fill = fill
  55. }
  56. if xf.ApplyFont {
  57. font := c.styles.Fonts[xf.FontId]
  58. style.Font = Font{}
  59. style.Font.Size, _ = strconv.Atoi(font.Sz.Val)
  60. style.Font.Name = font.Name.Val
  61. style.Font.Family, _ = strconv.Atoi(font.Family.Val)
  62. style.Font.Charset, _ = strconv.Atoi(font.Charset.Val)
  63. }
  64. }
  65. return style
  66. }
  67. // Row is a high level structure indended to provide user access to a
  68. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  69. type Row struct {
  70. Cells []*Cell
  71. }
  72. // Sheet is a high level structure intended to provide user access to
  73. // the contents of a particular sheet within an XLSX file.
  74. type Sheet struct {
  75. Name string
  76. Rows []*Row
  77. MaxRow int
  78. MaxCol int
  79. }
  80. // Style is a high level structure intended to provide user access to
  81. // the contents of Style within an XLSX file.
  82. type Style struct {
  83. Border Border
  84. Fill Fill
  85. Font Font
  86. }
  87. // Border is a high level structure intended to provide user access to
  88. // the contents of Border Style within an Sheet.
  89. type Border struct {
  90. Left string
  91. Right string
  92. Top string
  93. Bottom string
  94. }
  95. // Fill is a high level structure intended to provide user access to
  96. // the contents of background and foreground color index within an Sheet.
  97. type Fill struct {
  98. PatternType string
  99. BgColor string
  100. FgColor string
  101. }
  102. type Font struct {
  103. Size int
  104. Name string
  105. Family int
  106. Charset int
  107. }
  108. // File is a high level structure providing a slice of Sheet structs
  109. // to the user.
  110. type File struct {
  111. worksheets map[string]*zip.File
  112. referenceTable []string
  113. styles *xlsxStyles
  114. Sheets []*Sheet // sheet access by index
  115. Sheet map[string]*Sheet // sheet access by name
  116. }
  117. // getRangeFromString is an internal helper function that converts
  118. // XLSX internal range syntax to a pair of integers. For example,
  119. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  120. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  121. var parts []string
  122. parts = strings.SplitN(rangeString, ":", 2)
  123. if parts[0] == "" {
  124. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  125. }
  126. if parts[1] == "" {
  127. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  128. }
  129. lower, error = strconv.Atoi(parts[0])
  130. if error != nil {
  131. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  132. }
  133. upper, error = strconv.Atoi(parts[1])
  134. if error != nil {
  135. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  136. }
  137. return lower, upper, error
  138. }
  139. // lettersToNumeric is used to convert a character based column
  140. // reference to a zero based numeric column identifier.
  141. func lettersToNumeric(letters string) int {
  142. sum, mul, n := 0, 1, 0
  143. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  144. c := letters[i]
  145. switch {
  146. case 'A' <= c && c <= 'Z':
  147. n += int(c - 'A')
  148. case 'a' <= c && c <= 'z':
  149. n += int(c - 'a')
  150. }
  151. sum += n * mul
  152. }
  153. return sum
  154. }
  155. // letterOnlyMapF is used in conjunction with strings.Map to return
  156. // only the characters A-Z and a-z in a string
  157. func letterOnlyMapF(rune rune) rune {
  158. switch {
  159. case 'A' <= rune && rune <= 'Z':
  160. return rune
  161. case 'a' <= rune && rune <= 'z':
  162. return rune - 32
  163. }
  164. return -1
  165. }
  166. // intOnlyMapF is used in conjunction with strings.Map to return only
  167. // the numeric portions of a string.
  168. func intOnlyMapF(rune rune) rune {
  169. if rune >= 48 && rune < 58 {
  170. return rune
  171. }
  172. return -1
  173. }
  174. // getCoordsFromCellIDString returns the zero based cartesian
  175. // coordinates from a cell name in Excel format, e.g. the cellIDString
  176. // "A1" returns 0, 0 and the "B3" return 1, 2.
  177. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  178. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  179. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  180. if error != nil {
  181. return x, y, error
  182. }
  183. y -= 1 // Zero based
  184. x = lettersToNumeric(letterPart)
  185. return x, y, error
  186. }
  187. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  188. // and minimum coordinates from the dimension reference embedded in a
  189. // XLSX worksheet. For example, the dimension reference "A1:B2"
  190. // returns "0,0", "1,1".
  191. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  192. var parts []string
  193. parts = strings.Split(ref, ":")
  194. minx, miny, err = getCoordsFromCellIDString(parts[0])
  195. if err != nil {
  196. return -1, -1, -1, -1, err
  197. }
  198. if len(parts) == 1 {
  199. maxx, maxy = minx, miny
  200. return
  201. }
  202. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  203. if err != nil {
  204. return -1, -1, -1, -1, err
  205. }
  206. return
  207. }
  208. // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
  209. // that doesn't have a DimensionRef set. The only case currently
  210. // known where this is true is with XLSX exported from Google Docs.
  211. func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
  212. // Note, this method could be very slow for large spreadsheets.
  213. var x, y int
  214. minx = 0
  215. miny = 0
  216. maxy = 0
  217. maxx = 0
  218. for _, row := range worksheet.SheetData.Row {
  219. for _, cell := range row.C {
  220. x, y, err = getCoordsFromCellIDString(cell.R)
  221. if err != nil {
  222. return -1, -1, -1, -1, err
  223. }
  224. if x < minx {
  225. minx = x
  226. }
  227. if x > maxx {
  228. maxx = x
  229. }
  230. if y < miny {
  231. miny = y
  232. }
  233. if y > maxy {
  234. maxy = y
  235. }
  236. }
  237. }
  238. return
  239. }
  240. // makeRowFromSpan will, when given a span expressed as a string,
  241. // return an empty Row large enough to encompass that span and
  242. // populate it with empty cells. All rows start from cell 1 -
  243. // regardless of the lower bound of the span.
  244. func makeRowFromSpan(spans string) *Row {
  245. var error error
  246. var upper int
  247. var row *Row
  248. var cell *Cell
  249. row = new(Row)
  250. _, upper, error = getRangeFromString(spans)
  251. if error != nil {
  252. panic(error)
  253. }
  254. error = nil
  255. row.Cells = make([]*Cell, upper)
  256. for i := 0; i < upper; i++ {
  257. cell = new(Cell)
  258. cell.Value = ""
  259. row.Cells[i] = cell
  260. }
  261. return row
  262. }
  263. // makeRowFromRaw returns the Row representation of the xlsxRow.
  264. func makeRowFromRaw(rawrow xlsxRow) *Row {
  265. var upper int
  266. var row *Row
  267. var cell *Cell
  268. row = new(Row)
  269. upper = -1
  270. for _, rawcell := range rawrow.C {
  271. x, _, error := getCoordsFromCellIDString(rawcell.R)
  272. if error != nil {
  273. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  274. }
  275. if x > upper {
  276. upper = x
  277. }
  278. }
  279. upper++
  280. row.Cells = make([]*Cell, upper)
  281. for i := 0; i < upper; i++ {
  282. cell = new(Cell)
  283. cell.Value = ""
  284. row.Cells[i] = cell
  285. }
  286. return row
  287. }
  288. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  289. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  290. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  291. var value string = ""
  292. var data string = rawcell.V
  293. if len(data) > 0 {
  294. vval := strings.Trim(data, " \t\n\r")
  295. if rawcell.T == "s" {
  296. ref, error := strconv.Atoi(vval)
  297. if error != nil {
  298. panic(error)
  299. }
  300. value = reftable[ref]
  301. } else {
  302. value = vval
  303. }
  304. }
  305. return value
  306. }
  307. // readRowsFromSheet is an internal helper function that extracts the
  308. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  309. // the value references from the reference table and stores them in
  310. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  311. var rows []*Row
  312. var row *Row
  313. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  314. var reftable []string
  315. var err error
  316. var insertRowIndex, insertColIndex int
  317. if len(Worksheet.SheetData.Row) == 0 {
  318. return nil, 0, 0
  319. }
  320. reftable = file.referenceTable
  321. if len(Worksheet.Dimension.Ref) > 0 {
  322. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  323. } else {
  324. minCol, minRow, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
  325. }
  326. if err != nil {
  327. panic(err.Error())
  328. }
  329. rowCount = (maxRow - minRow) + 1
  330. colCount = (maxCol - minCol) + 1
  331. rows = make([]*Row, rowCount)
  332. insertRowIndex = minRow
  333. for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ {
  334. rawrow := Worksheet.SheetData.Row[rowIndex]
  335. // Some spreadsheets will omit blank rows from the
  336. // stored data
  337. for rawrow.R > (insertRowIndex + 1) {
  338. // Put an empty Row into the array
  339. rows[insertRowIndex-minRow] = new(Row)
  340. insertRowIndex++
  341. }
  342. // range is not empty and only one range exist
  343. if len(rawrow.Spans) != 0 && strings.Count(rawrow.Spans, ":") == 1 {
  344. row = makeRowFromSpan(rawrow.Spans)
  345. } else {
  346. row = makeRowFromRaw(rawrow)
  347. }
  348. insertColIndex = minCol
  349. for _, rawcell := range rawrow.C {
  350. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  351. // Some spreadsheets will omit blank cells
  352. // from the data.
  353. for x > insertColIndex {
  354. // Put an empty Cell into the array
  355. row.Cells[insertColIndex-minCol] = new(Cell)
  356. insertColIndex++
  357. }
  358. cellX := insertColIndex - minCol
  359. row.Cells[cellX].Value = getValueFromCellData(rawcell, reftable)
  360. row.Cells[cellX].styleIndex = rawcell.S
  361. row.Cells[cellX].styles = file.styles
  362. insertColIndex++
  363. }
  364. rows[insertRowIndex-minRow] = row
  365. insertRowIndex++
  366. }
  367. return rows, colCount, rowCount
  368. }
  369. type indexedSheet struct {
  370. Index int
  371. Sheet *Sheet
  372. Error error
  373. }
  374. // readSheetFromFile is the logic of converting a xlsxSheet struct
  375. // into a Sheet struct. This work can be done in parallel and so
  376. // readSheetsFromZipFile will spawn an instance of this function per
  377. // sheet and get the results back on the provided channel.
  378. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) {
  379. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  380. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
  381. if error != nil {
  382. result.Error = error
  383. sc <- result
  384. return
  385. }
  386. sheet := new(Sheet)
  387. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi)
  388. result.Sheet = sheet
  389. sc <- result
  390. }
  391. // readSheetsFromZipFile is an internal helper function that loops
  392. // over the Worksheets defined in the XSLXWorkbook and loads them into
  393. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  394. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) ([]*Sheet, error) {
  395. var workbook *xlsxWorkbook
  396. var error error
  397. var rc io.ReadCloser
  398. var decoder *xml.Decoder
  399. var sheetCount int
  400. workbook = new(xlsxWorkbook)
  401. rc, error = f.Open()
  402. if error != nil {
  403. return nil, error
  404. }
  405. decoder = xml.NewDecoder(rc)
  406. error = decoder.Decode(workbook)
  407. if error != nil {
  408. return nil, error
  409. }
  410. sheetCount = len(workbook.Sheets.Sheet)
  411. sheets := make([]*Sheet, sheetCount)
  412. sheetChan := make(chan *indexedSheet, sheetCount)
  413. for i, rawsheet := range workbook.Sheets.Sheet {
  414. go readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap)
  415. }
  416. for j := 0; j < sheetCount; j++ {
  417. sheet := <-sheetChan
  418. if sheet.Error != nil {
  419. return nil, sheet.Error
  420. }
  421. sheet.Sheet.Name = workbook.Sheets.Sheet[sheet.Index].Name
  422. sheets[sheet.Index] = sheet.Sheet
  423. }
  424. return sheets, nil
  425. }
  426. // readSharedStringsFromZipFile() is an internal helper function to
  427. // extract a reference table from the sharedStrings.xml file within
  428. // the XLSX zip file.
  429. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  430. var sst *xlsxSST
  431. var error error
  432. var rc io.ReadCloser
  433. var decoder *xml.Decoder
  434. var reftable []string
  435. rc, error = f.Open()
  436. if error != nil {
  437. return nil, error
  438. }
  439. sst = new(xlsxSST)
  440. decoder = xml.NewDecoder(rc)
  441. error = decoder.Decode(sst)
  442. if error != nil {
  443. return nil, error
  444. }
  445. reftable = MakeSharedStringRefTable(sst)
  446. return reftable, nil
  447. }
  448. // readStylesFromZipFile() is an internal helper function to
  449. // extract a style table from the style.xml file within
  450. // the XLSX zip file.
  451. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  452. var style *xlsxStyles
  453. var error error
  454. var rc io.ReadCloser
  455. var decoder *xml.Decoder
  456. rc, error = f.Open()
  457. if error != nil {
  458. return nil, error
  459. }
  460. style = new(xlsxStyles)
  461. decoder = xml.NewDecoder(rc)
  462. error = decoder.Decode(style)
  463. if error != nil {
  464. return nil, error
  465. }
  466. return style, nil
  467. }
  468. // readWorkbookRelationsFromZipFile is an internal helper function to
  469. // extract a map of relationship ID strings to the name of the
  470. // worksheet.xml file they refer to. The resulting map can be used to
  471. // reliably derefence the worksheets in the XLSX file.
  472. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (map[string]string, error) {
  473. var sheetXMLMap map[string]string
  474. var wbRelationships *xlsxWorkbookRels
  475. var rc io.ReadCloser
  476. var decoder *xml.Decoder
  477. var err error
  478. rc, err = workbookRels.Open()
  479. if err != nil {
  480. return nil, err
  481. }
  482. decoder = xml.NewDecoder(rc)
  483. wbRelationships = new(xlsxWorkbookRels)
  484. err = decoder.Decode(wbRelationships)
  485. if err != nil {
  486. return nil, err
  487. }
  488. sheetXMLMap = make(map[string]string)
  489. for _, rel := range wbRelationships.Relationships {
  490. if strings.HasSuffix(rel.Target, ".xml") && strings.HasPrefix(rel.Target, "worksheets/") {
  491. sheetXMLMap[rel.Id] = strings.Replace(rel.Target[len("worksheets/"):], ".xml", "", 1)
  492. }
  493. }
  494. return sheetXMLMap, nil
  495. }
  496. // OpenFile() take the name of an XLSX file and returns a populated
  497. // xlsx.File struct for it.
  498. func OpenFile(filename string) (*File, error) {
  499. var f *zip.ReadCloser
  500. f, err := zip.OpenReader(filename)
  501. if err != nil {
  502. return nil, err
  503. }
  504. return ReadZip(f)
  505. }
  506. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  507. // xlsx.File struct populated with its contents. In most cases
  508. // ReadZip is not used directly, but is called internally by OpenFile.
  509. func ReadZip(f *zip.ReadCloser) (*File, error) {
  510. defer f.Close()
  511. return ReadZipReader(&f.Reader)
  512. }
  513. // ReadZipReader() can be used to read xlsx in memory without touch filesystem.
  514. func ReadZipReader(r *zip.Reader) (*File, error) {
  515. var err error
  516. var file *File
  517. var reftable []string
  518. var sharedStrings *zip.File
  519. var sheetMap map[string]*Sheet
  520. var sheetXMLMap map[string]string
  521. var sheets []*Sheet
  522. var style *xlsxStyles
  523. var styles *zip.File
  524. var v *zip.File
  525. var workbook *zip.File
  526. var workbookRels *zip.File
  527. var worksheets map[string]*zip.File
  528. file = new(File)
  529. worksheets = make(map[string]*zip.File, len(r.File))
  530. for _, v = range r.File {
  531. switch v.Name {
  532. case "xl/sharedStrings.xml":
  533. sharedStrings = v
  534. case "xl/workbook.xml":
  535. workbook = v
  536. case "xl/_rels/workbook.xml.rels":
  537. workbookRels = v
  538. case "xl/styles.xml":
  539. styles = v
  540. default:
  541. if len(v.Name) > 14 {
  542. if v.Name[0:13] == "xl/worksheets" {
  543. worksheets[v.Name[14:len(v.Name)-4]] = v
  544. }
  545. }
  546. }
  547. }
  548. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  549. if err != nil {
  550. return nil, err
  551. }
  552. file.worksheets = worksheets
  553. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  554. if err != nil {
  555. return nil, err
  556. }
  557. if reftable == nil {
  558. readerErr := new(XLSXReaderError)
  559. readerErr.Err = "No valid sharedStrings.xml found in XLSX file"
  560. return nil, readerErr
  561. }
  562. file.referenceTable = reftable
  563. style, err = readStylesFromZipFile(styles)
  564. if err != nil {
  565. return nil, err
  566. }
  567. file.styles = style
  568. sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
  569. if err != nil {
  570. return nil, err
  571. }
  572. if sheets == nil {
  573. readerErr := new(XLSXReaderError)
  574. readerErr.Err = "No sheets found in XLSX File"
  575. return nil, readerErr
  576. }
  577. file.Sheets = sheets
  578. sheetMap = make(map[string]*Sheet, len(sheets))
  579. for i := 0; i < len(sheets); i++ {
  580. sheetMap[sheets[i].Name] = sheets[i]
  581. }
  582. file.Sheet = sheetMap
  583. return file, nil
  584. }
  585. func NewFile() *File {
  586. return &File{}
  587. }