lib.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. // XLSXReaderError is the standard error type for otherwise undefined
  12. // errors in the XSLX reading process.
  13. type XLSXReaderError struct {
  14. Err string
  15. }
  16. // String() returns a string value from an XLSXReaderError struct in
  17. // order that it might comply with the os.Error interface.
  18. func (e *XLSXReaderError) Error() string {
  19. return e.Err
  20. }
  21. // Cell is a high level structure intended to provide user access to
  22. // the contents of Cell within an xlsx.Row.
  23. type Cell struct {
  24. Value string
  25. styleIndex int
  26. styles *xlsxStyles
  27. }
  28. // CellInterface defines the public API of the Cell.
  29. type CellInterface interface {
  30. String() string
  31. }
  32. func (c *Cell) String() string {
  33. return c.Value
  34. }
  35. // TODO: TestMe!
  36. func (c *Cell) GetStyle() *Style {
  37. style := new(Style)
  38. if c.styleIndex > 0 && c.styleIndex < len(c.styles.CellXfs) {
  39. xf := c.styles.CellXfs[c.styleIndex]
  40. if xf.ApplyBorder {
  41. var border Border
  42. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  43. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  44. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  45. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  46. style.Boders = border
  47. }
  48. if xf.ApplyFill {
  49. var fill Fill
  50. fill.PatternType = c.styles.Fills[xf.FillId].PatternFill.PatternType
  51. fill.BgColor = c.styles.Fills[xf.FillId].PatternFill.BgColor.RGB
  52. fill.FgColor = c.styles.Fills[xf.FillId].PatternFill.FgColor.RGB
  53. style.Fills = fill
  54. }
  55. }
  56. return style
  57. }
  58. // Row is a high level structure indended to provide user access to a
  59. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  60. type Row struct {
  61. Cells []*Cell
  62. }
  63. // Sheet is a high level structure intended to provide user access to
  64. // the contents of a particular sheet within an XLSX file.
  65. type Sheet struct {
  66. Rows []*Row
  67. MaxRow int
  68. MaxCol int
  69. }
  70. // Style is a high level structure intended to provide user access to
  71. // the contents of Style within an XLSX file.
  72. type Style struct {
  73. Boders Border
  74. Fills Fill
  75. }
  76. // Border is a high level structure intended to provide user access to
  77. // the contents of Border Style within an Sheet.
  78. type Border struct {
  79. Left string
  80. Right string
  81. Top string
  82. Bottom string
  83. }
  84. // Fill is a high level structure intended to provide user access to
  85. // the contents of background and foreground color index within an Sheet.
  86. type Fill struct {
  87. PatternType string
  88. BgColor string
  89. FgColor string
  90. }
  91. // File is a high level structure providing a slice of Sheet structs
  92. // to the user.
  93. type File struct {
  94. worksheets map[string]*zip.File
  95. referenceTable []string
  96. styles *xlsxStyles
  97. Sheets []*Sheet // sheet access by index
  98. Sheet map[string]*Sheet // sheet access by name
  99. }
  100. // getRangeFromString is an internal helper function that converts
  101. // XLSX internal range syntax to a pair of integers. For example,
  102. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  103. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  104. var parts []string
  105. parts = strings.SplitN(rangeString, ":", 2)
  106. if parts[0] == "" {
  107. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  108. }
  109. if parts[1] == "" {
  110. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  111. }
  112. lower, error = strconv.Atoi(parts[0])
  113. if error != nil {
  114. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  115. }
  116. upper, error = strconv.Atoi(parts[1])
  117. if error != nil {
  118. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  119. }
  120. return lower, upper, error
  121. }
  122. // lettersToNumeric is used to convert a character based column
  123. // reference to a zero based numeric column identifier.
  124. func lettersToNumeric(letters string) int {
  125. sum, mul, n := 0, 1, 0
  126. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  127. c := letters[i]
  128. switch {
  129. case 'A' <= c && c <= 'Z':
  130. n += int(c - 'A')
  131. case 'a' <= c && c <= 'z':
  132. n += int(c - 'a')
  133. }
  134. sum += n * mul
  135. }
  136. return sum
  137. }
  138. // letterOnlyMapF is used in conjunction with strings.Map to return
  139. // only the characters A-Z and a-z in a string
  140. func letterOnlyMapF(rune rune) rune {
  141. switch {
  142. case 'A' <= rune && rune <= 'Z':
  143. return rune
  144. case 'a' <= rune && rune <= 'z':
  145. return rune - 32
  146. }
  147. return -1
  148. }
  149. // intOnlyMapF is used in conjunction with strings.Map to return only
  150. // the numeric portions of a string.
  151. func intOnlyMapF(rune rune) rune {
  152. if rune >= 48 && rune < 58 {
  153. return rune
  154. }
  155. return -1
  156. }
  157. // getCoordsFromCellIDString returns the zero based cartesian
  158. // coordinates from a cell name in Excel format, e.g. the cellIDString
  159. // "A1" returns 0, 0 and the "B3" return 1, 2.
  160. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  161. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  162. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  163. if error != nil {
  164. return x, y, error
  165. }
  166. y -= 1 // Zero based
  167. x = lettersToNumeric(letterPart)
  168. return x, y, error
  169. }
  170. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  171. // and minimum coordinates from the dimension reference embedded in a
  172. // XLSX worksheet. For example, the dimension reference "A1:B2"
  173. // returns "0,0", "1,1".
  174. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  175. var parts []string
  176. parts = strings.Split(ref, ":")
  177. minx, miny, err = getCoordsFromCellIDString(parts[0])
  178. if err != nil {
  179. return -1, -1, -1, -1, err
  180. }
  181. if len(parts) == 1 {
  182. maxx, maxy = minx, miny
  183. return
  184. }
  185. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  186. if err != nil {
  187. return -1, -1, -1, -1, err
  188. }
  189. return
  190. }
  191. // makeRowFromSpan will, when given a span expressed as a string,
  192. // return an empty Row large enough to encompass that span and
  193. // populate it with empty cells. All rows start from cell 1 -
  194. // regardless of the lower bound of the span.
  195. func makeRowFromSpan(spans string) *Row {
  196. var error error
  197. var upper int
  198. var row *Row
  199. var cell *Cell
  200. row = new(Row)
  201. _, upper, error = getRangeFromString(spans)
  202. if error != nil {
  203. panic(error)
  204. }
  205. error = nil
  206. row.Cells = make([]*Cell, upper)
  207. for i := 0; i < upper; i++ {
  208. cell = new(Cell)
  209. cell.Value = ""
  210. row.Cells[i] = cell
  211. }
  212. return row
  213. }
  214. // get the max column
  215. // return the cells of columns
  216. func makeRowFromRaw(rawrow xlsxRow) *Row {
  217. var upper int
  218. var row *Row
  219. var cell *Cell
  220. row = new(Row)
  221. upper = -1
  222. for _, rawcell := range rawrow.C {
  223. x, _, error := getCoordsFromCellIDString(rawcell.R)
  224. if error != nil {
  225. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  226. }
  227. if x > upper {
  228. upper = x
  229. }
  230. }
  231. upper++
  232. row.Cells = make([]*Cell, upper)
  233. for i := 0; i < upper; i++ {
  234. cell = new(Cell)
  235. cell.Value = ""
  236. row.Cells[i] = cell
  237. }
  238. return row
  239. }
  240. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  241. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  242. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  243. var value string = ""
  244. var data string = rawcell.V
  245. if len(data) > 0 {
  246. vval := strings.Trim(data, " \t\n\r")
  247. if rawcell.T == "s" {
  248. ref, error := strconv.Atoi(vval)
  249. if error != nil {
  250. panic(error)
  251. }
  252. value = reftable[ref]
  253. } else {
  254. value = vval
  255. }
  256. }
  257. return value
  258. }
  259. // readRowsFromSheet is an internal helper function that extracts the
  260. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  261. // the value references from the reference table and stores them in
  262. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  263. var rows []*Row
  264. var row *Row
  265. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  266. var reftable []string
  267. var err error
  268. if len(Worksheet.SheetData.Row) == 0 {
  269. return nil, 0, 0
  270. }
  271. reftable = file.referenceTable
  272. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  273. if err != nil {
  274. panic(err.Error())
  275. }
  276. rowCount = (maxRow - minRow) + 1
  277. colCount = (maxCol - minCol) + 1
  278. rows = make([]*Row, rowCount)
  279. for rowIndex := 0; rowIndex < rowCount; rowIndex++ {
  280. rawrow := Worksheet.SheetData.Row[rowIndex]
  281. // range is not empty
  282. if len(rawrow.Spans) != 0 {
  283. row = makeRowFromSpan(rawrow.Spans)
  284. } else {
  285. row = makeRowFromRaw(rawrow)
  286. }
  287. for _, rawcell := range rawrow.C {
  288. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  289. if x < len(row.Cells) {
  290. row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
  291. row.Cells[x].styleIndex = rawcell.S
  292. row.Cells[x].styles = file.styles
  293. }
  294. }
  295. rows[rowIndex] = row
  296. }
  297. return rows, colCount, rowCount
  298. }
  299. type indexedSheet struct {
  300. Index int
  301. Sheet *Sheet
  302. Error error
  303. }
  304. // readSheetFromFile is the logic of converting a xlsxSheet struct
  305. // into a Sheet struct. This work can be done in parallel and so
  306. // readSheetsFromZipFile will spawn an instance of this function per
  307. // sheet and get the results back on the provided channel.
  308. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File) {
  309. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  310. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets)
  311. if error != nil {
  312. result.Error = error
  313. sc <- result
  314. return
  315. }
  316. sheet := new(Sheet)
  317. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi)
  318. result.Sheet = sheet
  319. sc <- result
  320. }
  321. // readSheetsFromZipFile is an internal helper function that loops
  322. // over the Worksheets defined in the XSLXWorkbook and loads them into
  323. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  324. func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
  325. var workbook *xlsxWorkbook
  326. var error error
  327. var rc io.ReadCloser
  328. var decoder *xml.Decoder
  329. var sheetCount int
  330. workbook = new(xlsxWorkbook)
  331. rc, error = f.Open()
  332. if error != nil {
  333. return nil, nil, error
  334. }
  335. decoder = xml.NewDecoder(rc)
  336. error = decoder.Decode(workbook)
  337. if error != nil {
  338. return nil, nil, error
  339. }
  340. sheetCount = len(workbook.Sheets.Sheet)
  341. sheets := make([]*Sheet, sheetCount)
  342. names := make([]string, sheetCount)
  343. sheetChan := make(chan *indexedSheet, sheetCount)
  344. for i, rawsheet := range workbook.Sheets.Sheet {
  345. go readSheetFromFile(sheetChan, i, rawsheet, file)
  346. }
  347. for j := 0; j < sheetCount; j++ {
  348. sheet := <-sheetChan
  349. if sheet.Error != nil {
  350. return nil, nil, sheet.Error
  351. }
  352. sheets[sheet.Index] = sheet.Sheet
  353. names[sheet.Index] = workbook.Sheets.Sheet[sheet.Index].Name
  354. }
  355. return sheets, names, nil
  356. }
  357. // readSharedStringsFromZipFile() is an internal helper function to
  358. // extract a reference table from the sharedStrings.xml file within
  359. // the XLSX zip file.
  360. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  361. var sst *xlsxSST
  362. var error error
  363. var rc io.ReadCloser
  364. var decoder *xml.Decoder
  365. var reftable []string
  366. rc, error = f.Open()
  367. if error != nil {
  368. return nil, error
  369. }
  370. sst = new(xlsxSST)
  371. decoder = xml.NewDecoder(rc)
  372. error = decoder.Decode(sst)
  373. if error != nil {
  374. return nil, error
  375. }
  376. reftable = MakeSharedStringRefTable(sst)
  377. return reftable, nil
  378. }
  379. // readStylesFromZipFile() is an internal helper function to
  380. // extract a style table from the style.xml file within
  381. // the XLSX zip file.
  382. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  383. var style *xlsxStyles
  384. var error error
  385. var rc io.ReadCloser
  386. var decoder *xml.Decoder
  387. rc, error = f.Open()
  388. if error != nil {
  389. return nil, error
  390. }
  391. style = new(xlsxStyles)
  392. decoder = xml.NewDecoder(rc)
  393. error = decoder.Decode(style)
  394. if error != nil {
  395. return nil, error
  396. }
  397. return style, nil
  398. }
  399. // OpenFile() take the name of an XLSX file and returns a populated
  400. // xlsx.File struct for it.
  401. func OpenFile(filename string) (*File, error) {
  402. var f *zip.ReadCloser
  403. f, err := zip.OpenReader(filename)
  404. if err != nil {
  405. return nil, err
  406. }
  407. return ReadZip(f)
  408. }
  409. func ReadZip(f *zip.ReadCloser) (*File, error) {
  410. var error error
  411. var file *File
  412. var v *zip.File
  413. var workbook *zip.File
  414. var styles *zip.File
  415. var sharedStrings *zip.File
  416. var reftable []string
  417. var worksheets map[string]*zip.File
  418. var sheetMap map[string]*Sheet
  419. file = new(File)
  420. worksheets = make(map[string]*zip.File, len(f.File))
  421. for _, v = range f.File {
  422. switch v.Name {
  423. case "xl/sharedStrings.xml":
  424. sharedStrings = v
  425. case "xl/workbook.xml":
  426. workbook = v
  427. case "xl/styles.xml":
  428. styles = v
  429. default:
  430. if len(v.Name) > 12 {
  431. if v.Name[0:13] == "xl/worksheets" {
  432. worksheets[v.Name[14:len(v.Name)-4]] = v
  433. }
  434. }
  435. }
  436. }
  437. file.worksheets = worksheets
  438. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  439. if error != nil {
  440. return nil, error
  441. }
  442. if reftable == nil {
  443. error := new(XLSXReaderError)
  444. error.Err = "No valid sharedStrings.xml found in XLSX file"
  445. return nil, error
  446. }
  447. file.referenceTable = reftable
  448. style, error := readStylesFromZipFile(styles)
  449. if error != nil {
  450. return nil, error
  451. }
  452. file.styles = style
  453. sheets, names, error := readSheetsFromZipFile(workbook, file)
  454. if error != nil {
  455. return nil, error
  456. }
  457. if sheets == nil {
  458. error := new(XLSXReaderError)
  459. error.Err = "No sheets found in XLSX File"
  460. return nil, error
  461. }
  462. file.Sheets = sheets
  463. sheetMap = make(map[string]*Sheet, len(names))
  464. for i := 0; i < len(names); i++ {
  465. sheetMap[names[i]] = sheets[i]
  466. }
  467. file.Sheet = sheetMap
  468. f.Close()
  469. return file, nil
  470. }