lib.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. // XLSXReaderError is the standard error type for otherwise undefined
  12. // errors in the XSLX reading process.
  13. type XLSXReaderError struct {
  14. Err string
  15. }
  16. // String() returns a string value from an XLSXReaderError struct in
  17. // order that it might comply with the os.Error interface.
  18. func (e *XLSXReaderError) Error() string {
  19. return e.Err
  20. }
  21. // Cell is a high level structure intended to provide user access to
  22. // the contents of Cell within an xlsx.Row.
  23. type Cell struct {
  24. Value string
  25. styleIndex int
  26. styles *xlsxStyles
  27. }
  28. // CellInterface defines the public API of the Cell.
  29. type CellInterface interface {
  30. String() string
  31. }
  32. // String returns the value of a Cell as a string.
  33. func (c *Cell) String() string {
  34. return c.Value
  35. }
  36. // GetStyle returns the Style associated with a Cell
  37. func (c *Cell) GetStyle() *Style {
  38. style := &Style{}
  39. if c.styleIndex > 0 && c.styleIndex <= len(c.styles.CellXfs) {
  40. xf := c.styles.CellXfs[c.styleIndex-1]
  41. if xf.ApplyBorder {
  42. var border Border
  43. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  44. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  45. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  46. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  47. style.Border = border
  48. }
  49. if xf.ApplyFill {
  50. var fill Fill
  51. fill.PatternType = c.styles.Fills[xf.FillId].PatternFill.PatternType
  52. fill.BgColor = c.styles.Fills[xf.FillId].PatternFill.BgColor.RGB
  53. fill.FgColor = c.styles.Fills[xf.FillId].PatternFill.FgColor.RGB
  54. style.Fill = fill
  55. }
  56. if xf.ApplyFont {
  57. font := c.styles.Fonts[xf.FontId]
  58. style.Font = Font{}
  59. style.Font.Size, _ = strconv.Atoi(font.Sz.Val)
  60. style.Font.Name = font.Name.Val
  61. style.Font.Family, _ = strconv.Atoi(font.Family.Val)
  62. style.Font.Charset, _ = strconv.Atoi(font.Charset.Val)
  63. }
  64. }
  65. return style
  66. }
  67. // Row is a high level structure indended to provide user access to a
  68. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  69. type Row struct {
  70. Cells []*Cell
  71. }
  72. // Sheet is a high level structure intended to provide user access to
  73. // the contents of a particular sheet within an XLSX file.
  74. type Sheet struct {
  75. Rows []*Row
  76. MaxRow int
  77. MaxCol int
  78. }
  79. // Style is a high level structure intended to provide user access to
  80. // the contents of Style within an XLSX file.
  81. type Style struct {
  82. Border Border
  83. Fill Fill
  84. Font Font
  85. }
  86. // Border is a high level structure intended to provide user access to
  87. // the contents of Border Style within an Sheet.
  88. type Border struct {
  89. Left string
  90. Right string
  91. Top string
  92. Bottom string
  93. }
  94. // Fill is a high level structure intended to provide user access to
  95. // the contents of background and foreground color index within an Sheet.
  96. type Fill struct {
  97. PatternType string
  98. BgColor string
  99. FgColor string
  100. }
  101. type Font struct {
  102. Size int
  103. Name string
  104. Family int
  105. Charset int
  106. }
  107. // File is a high level structure providing a slice of Sheet structs
  108. // to the user.
  109. type File struct {
  110. worksheets map[string]*zip.File
  111. referenceTable []string
  112. styles *xlsxStyles
  113. Sheets []*Sheet // sheet access by index
  114. Sheet map[string]*Sheet // sheet access by name
  115. }
  116. // getRangeFromString is an internal helper function that converts
  117. // XLSX internal range syntax to a pair of integers. For example,
  118. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  119. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  120. var parts []string
  121. parts = strings.SplitN(rangeString, ":", 2)
  122. if parts[0] == "" {
  123. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  124. }
  125. if parts[1] == "" {
  126. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  127. }
  128. lower, error = strconv.Atoi(parts[0])
  129. if error != nil {
  130. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  131. }
  132. upper, error = strconv.Atoi(parts[1])
  133. if error != nil {
  134. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  135. }
  136. return lower, upper, error
  137. }
  138. // lettersToNumeric is used to convert a character based column
  139. // reference to a zero based numeric column identifier.
  140. func lettersToNumeric(letters string) int {
  141. sum, mul, n := 0, 1, 0
  142. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  143. c := letters[i]
  144. switch {
  145. case 'A' <= c && c <= 'Z':
  146. n += int(c - 'A')
  147. case 'a' <= c && c <= 'z':
  148. n += int(c - 'a')
  149. }
  150. sum += n * mul
  151. }
  152. return sum
  153. }
  154. // letterOnlyMapF is used in conjunction with strings.Map to return
  155. // only the characters A-Z and a-z in a string
  156. func letterOnlyMapF(rune rune) rune {
  157. switch {
  158. case 'A' <= rune && rune <= 'Z':
  159. return rune
  160. case 'a' <= rune && rune <= 'z':
  161. return rune - 32
  162. }
  163. return -1
  164. }
  165. // intOnlyMapF is used in conjunction with strings.Map to return only
  166. // the numeric portions of a string.
  167. func intOnlyMapF(rune rune) rune {
  168. if rune >= 48 && rune < 58 {
  169. return rune
  170. }
  171. return -1
  172. }
  173. // getCoordsFromCellIDString returns the zero based cartesian
  174. // coordinates from a cell name in Excel format, e.g. the cellIDString
  175. // "A1" returns 0, 0 and the "B3" return 1, 2.
  176. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  177. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  178. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  179. if error != nil {
  180. return x, y, error
  181. }
  182. y -= 1 // Zero based
  183. x = lettersToNumeric(letterPart)
  184. return x, y, error
  185. }
  186. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  187. // and minimum coordinates from the dimension reference embedded in a
  188. // XLSX worksheet. For example, the dimension reference "A1:B2"
  189. // returns "0,0", "1,1".
  190. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  191. var parts []string
  192. parts = strings.Split(ref, ":")
  193. minx, miny, err = getCoordsFromCellIDString(parts[0])
  194. if err != nil {
  195. return -1, -1, -1, -1, err
  196. }
  197. if len(parts) == 1 {
  198. maxx, maxy = minx, miny
  199. return
  200. }
  201. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  202. if err != nil {
  203. return -1, -1, -1, -1, err
  204. }
  205. return
  206. }
  207. // makeRowFromSpan will, when given a span expressed as a string,
  208. // return an empty Row large enough to encompass that span and
  209. // populate it with empty cells. All rows start from cell 1 -
  210. // regardless of the lower bound of the span.
  211. func makeRowFromSpan(spans string) *Row {
  212. var error error
  213. var upper int
  214. var row *Row
  215. var cell *Cell
  216. row = new(Row)
  217. _, upper, error = getRangeFromString(spans)
  218. if error != nil {
  219. panic(error)
  220. }
  221. error = nil
  222. row.Cells = make([]*Cell, upper)
  223. for i := 0; i < upper; i++ {
  224. cell = new(Cell)
  225. cell.Value = ""
  226. row.Cells[i] = cell
  227. }
  228. return row
  229. }
  230. // makeRowFromRaw returns the Row representation of the xlsxRow.
  231. func makeRowFromRaw(rawrow xlsxRow) *Row {
  232. var upper int
  233. var row *Row
  234. var cell *Cell
  235. row = new(Row)
  236. upper = -1
  237. for _, rawcell := range rawrow.C {
  238. x, _, error := getCoordsFromCellIDString(rawcell.R)
  239. if error != nil {
  240. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  241. }
  242. if x > upper {
  243. upper = x
  244. }
  245. }
  246. upper++
  247. row.Cells = make([]*Cell, upper)
  248. for i := 0; i < upper; i++ {
  249. cell = new(Cell)
  250. cell.Value = ""
  251. row.Cells[i] = cell
  252. }
  253. return row
  254. }
  255. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  256. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  257. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  258. var value string = ""
  259. var data string = rawcell.V
  260. if len(data) > 0 {
  261. vval := strings.Trim(data, " \t\n\r")
  262. if rawcell.T == "s" {
  263. ref, error := strconv.Atoi(vval)
  264. if error != nil {
  265. panic(error)
  266. }
  267. value = reftable[ref]
  268. } else {
  269. value = vval
  270. }
  271. }
  272. return value
  273. }
  274. // readRowsFromSheet is an internal helper function that extracts the
  275. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  276. // the value references from the reference table and stores them in
  277. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  278. var rows []*Row
  279. var row *Row
  280. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  281. var reftable []string
  282. var err error
  283. if len(Worksheet.SheetData.Row) == 0 {
  284. return nil, 0, 0
  285. }
  286. reftable = file.referenceTable
  287. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  288. if err != nil {
  289. panic(err.Error())
  290. }
  291. rowCount = (maxRow - minRow) + 1
  292. colCount = (maxCol - minCol) + 1
  293. rows = make([]*Row, rowCount)
  294. for rowIndex := 0; rowIndex < rowCount; rowIndex++ {
  295. rawrow := Worksheet.SheetData.Row[rowIndex]
  296. // range is not empty
  297. if len(rawrow.Spans) != 0 {
  298. row = makeRowFromSpan(rawrow.Spans)
  299. } else {
  300. row = makeRowFromRaw(rawrow)
  301. }
  302. for _, rawcell := range rawrow.C {
  303. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  304. if x < len(row.Cells) {
  305. row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
  306. row.Cells[x].styleIndex = rawcell.S
  307. row.Cells[x].styles = file.styles
  308. }
  309. }
  310. rows[rowIndex] = row
  311. }
  312. return rows, colCount, rowCount
  313. }
  314. type indexedSheet struct {
  315. Index int
  316. Sheet *Sheet
  317. Error error
  318. }
  319. // readSheetFromFile is the logic of converting a xlsxSheet struct
  320. // into a Sheet struct. This work can be done in parallel and so
  321. // readSheetsFromZipFile will spawn an instance of this function per
  322. // sheet and get the results back on the provided channel.
  323. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) {
  324. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  325. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
  326. if error != nil {
  327. result.Error = error
  328. sc <- result
  329. return
  330. }
  331. sheet := new(Sheet)
  332. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi)
  333. result.Sheet = sheet
  334. sc <- result
  335. }
  336. // readSheetsFromZipFile is an internal helper function that loops
  337. // over the Worksheets defined in the XSLXWorkbook and loads them into
  338. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  339. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) ([]*Sheet, []string, error) {
  340. var workbook *xlsxWorkbook
  341. var error error
  342. var rc io.ReadCloser
  343. var decoder *xml.Decoder
  344. var sheetCount int
  345. workbook = new(xlsxWorkbook)
  346. rc, error = f.Open()
  347. if error != nil {
  348. return nil, nil, error
  349. }
  350. decoder = xml.NewDecoder(rc)
  351. error = decoder.Decode(workbook)
  352. if error != nil {
  353. return nil, nil, error
  354. }
  355. sheetCount = len(workbook.Sheets.Sheet)
  356. sheets := make([]*Sheet, sheetCount)
  357. names := make([]string, sheetCount)
  358. sheetChan := make(chan *indexedSheet, sheetCount)
  359. for i, rawsheet := range workbook.Sheets.Sheet {
  360. go readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap)
  361. }
  362. for j := 0; j < sheetCount; j++ {
  363. sheet := <-sheetChan
  364. if sheet.Error != nil {
  365. return nil, nil, sheet.Error
  366. }
  367. sheets[sheet.Index] = sheet.Sheet
  368. names[sheet.Index] = workbook.Sheets.Sheet[sheet.Index].Name
  369. }
  370. return sheets, names, nil
  371. }
  372. // readSharedStringsFromZipFile() is an internal helper function to
  373. // extract a reference table from the sharedStrings.xml file within
  374. // the XLSX zip file.
  375. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  376. var sst *xlsxSST
  377. var error error
  378. var rc io.ReadCloser
  379. var decoder *xml.Decoder
  380. var reftable []string
  381. rc, error = f.Open()
  382. if error != nil {
  383. return nil, error
  384. }
  385. sst = new(xlsxSST)
  386. decoder = xml.NewDecoder(rc)
  387. error = decoder.Decode(sst)
  388. if error != nil {
  389. return nil, error
  390. }
  391. reftable = MakeSharedStringRefTable(sst)
  392. return reftable, nil
  393. }
  394. // readStylesFromZipFile() is an internal helper function to
  395. // extract a style table from the style.xml file within
  396. // the XLSX zip file.
  397. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  398. var style *xlsxStyles
  399. var error error
  400. var rc io.ReadCloser
  401. var decoder *xml.Decoder
  402. rc, error = f.Open()
  403. if error != nil {
  404. return nil, error
  405. }
  406. style = new(xlsxStyles)
  407. decoder = xml.NewDecoder(rc)
  408. error = decoder.Decode(style)
  409. if error != nil {
  410. return nil, error
  411. }
  412. return style, nil
  413. }
  414. // readWorkbookRelationsFromZipFile is an internal helper function to
  415. // extract a map of relationship ID strings to the name of the
  416. // worksheet.xml file they refer to. The resulting map can be used to
  417. // reliably derefence the worksheets in the XLSX file.
  418. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (map[string]string, error) {
  419. var sheetXMLMap map[string]string
  420. var wbRelationships *xlsxWorkbookRels
  421. var rc io.ReadCloser
  422. var decoder *xml.Decoder
  423. var err error
  424. rc, err = workbookRels.Open()
  425. if err != nil {
  426. return nil, err
  427. }
  428. decoder = xml.NewDecoder(rc)
  429. wbRelationships = new(xlsxWorkbookRels)
  430. err = decoder.Decode(wbRelationships)
  431. if err != nil {
  432. return nil, err
  433. }
  434. sheetXMLMap = make(map[string]string)
  435. for _, rel := range wbRelationships.Relationships {
  436. if strings.HasSuffix(rel.Target, ".xml") && strings.HasPrefix(rel.Target, "worksheets/") {
  437. sheetXMLMap[rel.Id] = strings.Replace(rel.Target[len("worksheets/"):], ".xml", "", 1)
  438. }
  439. }
  440. return sheetXMLMap, nil
  441. }
  442. // OpenFile() take the name of an XLSX file and returns a populated
  443. // xlsx.File struct for it.
  444. func OpenFile(filename string) (*File, error) {
  445. var f *zip.ReadCloser
  446. f, err := zip.OpenReader(filename)
  447. if err != nil {
  448. return nil, err
  449. }
  450. return ReadZip(f)
  451. }
  452. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  453. // xlsx.File struct populated with its contents. In most cases
  454. // ReadZip is not used directly, but is called internally by OpenFile.
  455. func ReadZip(f *zip.ReadCloser) (*File, error) {
  456. var err error
  457. var file *File
  458. var names []string
  459. var reftable []string
  460. var sharedStrings *zip.File
  461. var sheetMap map[string]*Sheet
  462. var sheetXMLMap map[string]string
  463. var sheets []*Sheet
  464. var style *xlsxStyles
  465. var styles *zip.File
  466. var v *zip.File
  467. var workbook *zip.File
  468. var workbookRels *zip.File
  469. var worksheets map[string]*zip.File
  470. file = new(File)
  471. worksheets = make(map[string]*zip.File, len(f.File))
  472. for _, v = range f.File {
  473. switch v.Name {
  474. case "xl/sharedStrings.xml":
  475. sharedStrings = v
  476. case "xl/workbook.xml":
  477. workbook = v
  478. case "xl/_rels/workbook.xml.rels":
  479. workbookRels = v
  480. case "xl/styles.xml":
  481. styles = v
  482. default:
  483. if len(v.Name) > 14 {
  484. if v.Name[0:13] == "xl/worksheets" {
  485. worksheets[v.Name[14:len(v.Name)-4]] = v
  486. }
  487. }
  488. }
  489. }
  490. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  491. if err != nil {
  492. return nil, err
  493. }
  494. file.worksheets = worksheets
  495. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  496. if err != nil {
  497. return nil, err
  498. }
  499. if reftable == nil {
  500. readerErr := new(XLSXReaderError)
  501. readerErr.Err = "No valid sharedStrings.xml found in XLSX file"
  502. return nil, readerErr
  503. }
  504. file.referenceTable = reftable
  505. style, err = readStylesFromZipFile(styles)
  506. if err != nil {
  507. return nil, err
  508. }
  509. file.styles = style
  510. sheets, names, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
  511. if err != nil {
  512. return nil, err
  513. }
  514. if sheets == nil {
  515. readerErr := new(XLSXReaderError)
  516. readerErr.Err = "No sheets found in XLSX File"
  517. return nil, readerErr
  518. }
  519. file.Sheets = sheets
  520. sheetMap = make(map[string]*Sheet, len(names))
  521. for i := 0; i < len(names); i++ {
  522. sheetMap[names[i]] = sheets[i]
  523. }
  524. file.Sheet = sheetMap
  525. f.Close()
  526. return file, nil
  527. }