lib.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. // XLSXReaderError is the standard error type for otherwise undefined
  12. // errors in the XSLX reading process.
  13. type XLSXReaderError struct {
  14. Err string
  15. }
  16. // String() returns a string value from an XLSXReaderError struct in
  17. // order that it might comply with the os.Error interface.
  18. func (e *XLSXReaderError) Error() string {
  19. return e.Err
  20. }
  21. // Cell is a high level structure intended to provide user access to
  22. // the contents of Cell within an xlsx.Row.
  23. type Cell struct {
  24. Value string
  25. styleIndex int
  26. styles *xlsxStyles
  27. }
  28. // CellInterface defines the public API of the Cell.
  29. type CellInterface interface {
  30. String() string
  31. }
  32. func (c *Cell) String() string {
  33. return c.Value
  34. }
  35. func (c *Cell) GetStyle() *Style {
  36. style := &Style{}
  37. if c.styleIndex > 0 && c.styleIndex <= len(c.styles.CellXfs) {
  38. xf := c.styles.CellXfs[c.styleIndex-1]
  39. if xf.ApplyBorder {
  40. var border Border
  41. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  42. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  43. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  44. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  45. style.Border = border
  46. }
  47. if xf.ApplyFill {
  48. var fill Fill
  49. fill.PatternType = c.styles.Fills[xf.FillId].PatternFill.PatternType
  50. fill.BgColor = c.styles.Fills[xf.FillId].PatternFill.BgColor.RGB
  51. fill.FgColor = c.styles.Fills[xf.FillId].PatternFill.FgColor.RGB
  52. style.Fill = fill
  53. }
  54. if xf.ApplyFont {
  55. font := c.styles.Fonts[xf.FontId]
  56. style.Font = Font{}
  57. style.Font.Size, _ = strconv.Atoi(font.Sz.Val)
  58. style.Font.Name = font.Name.Val
  59. style.Font.Family, _ = strconv.Atoi(font.Family.Val)
  60. style.Font.Charset, _ = strconv.Atoi(font.Charset.Val)
  61. }
  62. }
  63. return style
  64. }
  65. // Row is a high level structure indended to provide user access to a
  66. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  67. type Row struct {
  68. Cells []*Cell
  69. }
  70. // Sheet is a high level structure intended to provide user access to
  71. // the contents of a particular sheet within an XLSX file.
  72. type Sheet struct {
  73. Rows []*Row
  74. MaxRow int
  75. MaxCol int
  76. }
  77. // Style is a high level structure intended to provide user access to
  78. // the contents of Style within an XLSX file.
  79. type Style struct {
  80. Border Border
  81. Fill Fill
  82. Font Font
  83. }
  84. // Border is a high level structure intended to provide user access to
  85. // the contents of Border Style within an Sheet.
  86. type Border struct {
  87. Left string
  88. Right string
  89. Top string
  90. Bottom string
  91. }
  92. // Fill is a high level structure intended to provide user access to
  93. // the contents of background and foreground color index within an Sheet.
  94. type Fill struct {
  95. PatternType string
  96. BgColor string
  97. FgColor string
  98. }
  99. type Font struct {
  100. Size int
  101. Name string
  102. Family int
  103. Charset int
  104. }
  105. // File is a high level structure providing a slice of Sheet structs
  106. // to the user.
  107. type File struct {
  108. worksheets map[string]*zip.File
  109. referenceTable []string
  110. styles *xlsxStyles
  111. Sheets []*Sheet // sheet access by index
  112. Sheet map[string]*Sheet // sheet access by name
  113. }
  114. // getRangeFromString is an internal helper function that converts
  115. // XLSX internal range syntax to a pair of integers. For example,
  116. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  117. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  118. var parts []string
  119. parts = strings.SplitN(rangeString, ":", 2)
  120. if parts[0] == "" {
  121. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  122. }
  123. if parts[1] == "" {
  124. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  125. }
  126. lower, error = strconv.Atoi(parts[0])
  127. if error != nil {
  128. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  129. }
  130. upper, error = strconv.Atoi(parts[1])
  131. if error != nil {
  132. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  133. }
  134. return lower, upper, error
  135. }
  136. // lettersToNumeric is used to convert a character based column
  137. // reference to a zero based numeric column identifier.
  138. func lettersToNumeric(letters string) int {
  139. sum, mul, n := 0, 1, 0
  140. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  141. c := letters[i]
  142. switch {
  143. case 'A' <= c && c <= 'Z':
  144. n += int(c - 'A')
  145. case 'a' <= c && c <= 'z':
  146. n += int(c - 'a')
  147. }
  148. sum += n * mul
  149. }
  150. return sum
  151. }
  152. // letterOnlyMapF is used in conjunction with strings.Map to return
  153. // only the characters A-Z and a-z in a string
  154. func letterOnlyMapF(rune rune) rune {
  155. switch {
  156. case 'A' <= rune && rune <= 'Z':
  157. return rune
  158. case 'a' <= rune && rune <= 'z':
  159. return rune - 32
  160. }
  161. return -1
  162. }
  163. // intOnlyMapF is used in conjunction with strings.Map to return only
  164. // the numeric portions of a string.
  165. func intOnlyMapF(rune rune) rune {
  166. if rune >= 48 && rune < 58 {
  167. return rune
  168. }
  169. return -1
  170. }
  171. // getCoordsFromCellIDString returns the zero based cartesian
  172. // coordinates from a cell name in Excel format, e.g. the cellIDString
  173. // "A1" returns 0, 0 and the "B3" return 1, 2.
  174. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  175. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  176. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  177. if error != nil {
  178. return x, y, error
  179. }
  180. y -= 1 // Zero based
  181. x = lettersToNumeric(letterPart)
  182. return x, y, error
  183. }
  184. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  185. // and minimum coordinates from the dimension reference embedded in a
  186. // XLSX worksheet. For example, the dimension reference "A1:B2"
  187. // returns "0,0", "1,1".
  188. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  189. var parts []string
  190. parts = strings.Split(ref, ":")
  191. minx, miny, err = getCoordsFromCellIDString(parts[0])
  192. if err != nil {
  193. return -1, -1, -1, -1, err
  194. }
  195. if len(parts) == 1 {
  196. maxx, maxy = minx, miny
  197. return
  198. }
  199. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  200. if err != nil {
  201. return -1, -1, -1, -1, err
  202. }
  203. return
  204. }
  205. // makeRowFromSpan will, when given a span expressed as a string,
  206. // return an empty Row large enough to encompass that span and
  207. // populate it with empty cells. All rows start from cell 1 -
  208. // regardless of the lower bound of the span.
  209. func makeRowFromSpan(spans string) *Row {
  210. var error error
  211. var upper int
  212. var row *Row
  213. var cell *Cell
  214. row = new(Row)
  215. _, upper, error = getRangeFromString(spans)
  216. if error != nil {
  217. panic(error)
  218. }
  219. error = nil
  220. row.Cells = make([]*Cell, upper)
  221. for i := 0; i < upper; i++ {
  222. cell = new(Cell)
  223. cell.Value = ""
  224. row.Cells[i] = cell
  225. }
  226. return row
  227. }
  228. // get the max column
  229. // return the cells of columns
  230. func makeRowFromRaw(rawrow xlsxRow) *Row {
  231. var upper int
  232. var row *Row
  233. var cell *Cell
  234. row = new(Row)
  235. upper = -1
  236. for _, rawcell := range rawrow.C {
  237. x, _, error := getCoordsFromCellIDString(rawcell.R)
  238. if error != nil {
  239. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  240. }
  241. if x > upper {
  242. upper = x
  243. }
  244. }
  245. upper++
  246. row.Cells = make([]*Cell, upper)
  247. for i := 0; i < upper; i++ {
  248. cell = new(Cell)
  249. cell.Value = ""
  250. row.Cells[i] = cell
  251. }
  252. return row
  253. }
  254. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  255. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  256. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  257. var value string = ""
  258. var data string = rawcell.V
  259. if len(data) > 0 {
  260. vval := strings.Trim(data, " \t\n\r")
  261. if rawcell.T == "s" {
  262. ref, error := strconv.Atoi(vval)
  263. if error != nil {
  264. panic(error)
  265. }
  266. value = reftable[ref]
  267. } else {
  268. value = vval
  269. }
  270. }
  271. return value
  272. }
  273. // readRowsFromSheet is an internal helper function that extracts the
  274. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  275. // the value references from the reference table and stores them in
  276. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  277. var rows []*Row
  278. var row *Row
  279. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  280. var reftable []string
  281. var err error
  282. if len(Worksheet.SheetData.Row) == 0 {
  283. return nil, 0, 0
  284. }
  285. reftable = file.referenceTable
  286. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  287. if err != nil {
  288. panic(err.Error())
  289. }
  290. rowCount = (maxRow - minRow) + 1
  291. colCount = (maxCol - minCol) + 1
  292. rows = make([]*Row, rowCount)
  293. for rowIndex := 0; rowIndex < rowCount; rowIndex++ {
  294. rawrow := Worksheet.SheetData.Row[rowIndex]
  295. // range is not empty
  296. if len(rawrow.Spans) != 0 {
  297. row = makeRowFromSpan(rawrow.Spans)
  298. } else {
  299. row = makeRowFromRaw(rawrow)
  300. }
  301. for _, rawcell := range rawrow.C {
  302. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  303. if x < len(row.Cells) {
  304. row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
  305. row.Cells[x].styleIndex = rawcell.S
  306. row.Cells[x].styles = file.styles
  307. }
  308. }
  309. rows[rowIndex] = row
  310. }
  311. return rows, colCount, rowCount
  312. }
  313. type indexedSheet struct {
  314. Index int
  315. Sheet *Sheet
  316. Error error
  317. }
  318. // readSheetFromFile is the logic of converting a xlsxSheet struct
  319. // into a Sheet struct. This work can be done in parallel and so
  320. // readSheetsFromZipFile will spawn an instance of this function per
  321. // sheet and get the results back on the provided channel.
  322. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) {
  323. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  324. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
  325. if error != nil {
  326. result.Error = error
  327. sc <- result
  328. return
  329. }
  330. sheet := new(Sheet)
  331. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi)
  332. result.Sheet = sheet
  333. sc <- result
  334. }
  335. // readSheetsFromZipFile is an internal helper function that loops
  336. // over the Worksheets defined in the XSLXWorkbook and loads them into
  337. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  338. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) ([]*Sheet, []string, error) {
  339. var workbook *xlsxWorkbook
  340. var error error
  341. var rc io.ReadCloser
  342. var decoder *xml.Decoder
  343. var sheetCount int
  344. workbook = new(xlsxWorkbook)
  345. rc, error = f.Open()
  346. if error != nil {
  347. return nil, nil, error
  348. }
  349. decoder = xml.NewDecoder(rc)
  350. error = decoder.Decode(workbook)
  351. if error != nil {
  352. return nil, nil, error
  353. }
  354. sheetCount = len(workbook.Sheets.Sheet)
  355. sheets := make([]*Sheet, sheetCount)
  356. names := make([]string, sheetCount)
  357. sheetChan := make(chan *indexedSheet, sheetCount)
  358. for i, rawsheet := range workbook.Sheets.Sheet {
  359. go readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap)
  360. }
  361. for j := 0; j < sheetCount; j++ {
  362. sheet := <-sheetChan
  363. if sheet.Error != nil {
  364. return nil, nil, sheet.Error
  365. }
  366. sheets[sheet.Index] = sheet.Sheet
  367. names[sheet.Index] = workbook.Sheets.Sheet[sheet.Index].Name
  368. }
  369. return sheets, names, nil
  370. }
  371. // readSharedStringsFromZipFile() is an internal helper function to
  372. // extract a reference table from the sharedStrings.xml file within
  373. // the XLSX zip file.
  374. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  375. var sst *xlsxSST
  376. var error error
  377. var rc io.ReadCloser
  378. var decoder *xml.Decoder
  379. var reftable []string
  380. rc, error = f.Open()
  381. if error != nil {
  382. return nil, error
  383. }
  384. sst = new(xlsxSST)
  385. decoder = xml.NewDecoder(rc)
  386. error = decoder.Decode(sst)
  387. if error != nil {
  388. return nil, error
  389. }
  390. reftable = MakeSharedStringRefTable(sst)
  391. return reftable, nil
  392. }
  393. // readStylesFromZipFile() is an internal helper function to
  394. // extract a style table from the style.xml file within
  395. // the XLSX zip file.
  396. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  397. var style *xlsxStyles
  398. var error error
  399. var rc io.ReadCloser
  400. var decoder *xml.Decoder
  401. rc, error = f.Open()
  402. if error != nil {
  403. return nil, error
  404. }
  405. style = new(xlsxStyles)
  406. decoder = xml.NewDecoder(rc)
  407. error = decoder.Decode(style)
  408. if error != nil {
  409. return nil, error
  410. }
  411. return style, nil
  412. }
  413. func readWorkbookRelationsFromZipFile(workbook_rels *zip.File) (sheetXMLMap map[string]string) {
  414. sheetXMLMap = make(map[string]string)
  415. var wb_relationships *xlsxWorkbookRels
  416. var error error
  417. var rc io.ReadCloser
  418. var decoder *xml.Decoder
  419. rc, error = workbook_rels.Open()
  420. if error != nil {
  421. return
  422. }
  423. decoder = xml.NewDecoder(rc)
  424. wb_relationships = new(xlsxWorkbookRels)
  425. error = decoder.Decode(wb_relationships)
  426. if error != nil {
  427. return
  428. }
  429. for _, rel := range wb_relationships.Relationships {
  430. if strings.HasSuffix(rel.Target, ".xml") && strings.HasPrefix(rel.Target, "worksheets/") {
  431. sheetXMLMap[rel.Id] = strings.Replace(rel.Target[len("worksheets/"):], ".xml", "", 1)
  432. }
  433. }
  434. return
  435. }
  436. // OpenFile() take the name of an XLSX file and returns a populated
  437. // xlsx.File struct for it.
  438. func OpenFile(filename string) (*File, error) {
  439. var f *zip.ReadCloser
  440. f, err := zip.OpenReader(filename)
  441. if err != nil {
  442. return nil, err
  443. }
  444. return ReadZip(f)
  445. }
  446. func ReadZip(f *zip.ReadCloser) (*File, error) {
  447. var error error
  448. var file *File
  449. var v *zip.File
  450. var workbook *zip.File
  451. var workbook_rels *zip.File
  452. var styles *zip.File
  453. var sharedStrings *zip.File
  454. var reftable []string
  455. var worksheets map[string]*zip.File
  456. var sheetMap map[string]*Sheet
  457. file = new(File)
  458. worksheets = make(map[string]*zip.File, len(f.File))
  459. for _, v = range f.File {
  460. switch v.Name {
  461. case "xl/sharedStrings.xml":
  462. sharedStrings = v
  463. case "xl/workbook.xml":
  464. workbook = v
  465. case "xl/_rels/workbook.xml.rels":
  466. workbook_rels = v
  467. case "xl/styles.xml":
  468. styles = v
  469. default:
  470. if len(v.Name) > 12 {
  471. if v.Name[0:13] == "xl/worksheets" {
  472. worksheets[v.Name[14:len(v.Name)-4]] = v
  473. }
  474. }
  475. }
  476. }
  477. sheetXMLMap := readWorkbookRelationsFromZipFile(workbook_rels)
  478. file.worksheets = worksheets
  479. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  480. if error != nil {
  481. return nil, error
  482. }
  483. if reftable == nil {
  484. error := new(XLSXReaderError)
  485. error.Err = "No valid sharedStrings.xml found in XLSX file"
  486. return nil, error
  487. }
  488. file.referenceTable = reftable
  489. style, error := readStylesFromZipFile(styles)
  490. if error != nil {
  491. return nil, error
  492. }
  493. file.styles = style
  494. sheets, names, error := readSheetsFromZipFile(workbook, file, sheetXMLMap)
  495. if error != nil {
  496. return nil, error
  497. }
  498. if sheets == nil {
  499. error := new(XLSXReaderError)
  500. error.Err = "No sheets found in XLSX File"
  501. return nil, error
  502. }
  503. file.Sheets = sheets
  504. sheetMap = make(map[string]*Sheet, len(names))
  505. for i := 0; i < len(names); i++ {
  506. sheetMap[names[i]] = sheets[i]
  507. }
  508. file.Sheet = sheetMap
  509. f.Close()
  510. return file, nil
  511. }