lib.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math"
  9. "strconv"
  10. "strings"
  11. )
  12. // XLSXReaderError is the standard error type for otherwise undefined
  13. // errors in the XSLX reading process.
  14. type XLSXReaderError struct {
  15. Err string
  16. }
  17. // String() returns a string value from an XLSXReaderError struct in
  18. // order that it might comply with the os.Error interface.
  19. func (e *XLSXReaderError) Error() string {
  20. return e.Err
  21. }
  22. // Cell is a high level structure intended to provide user access to
  23. // the contents of Cell within an xlsx.Row.
  24. type Cell struct {
  25. Value string
  26. styleIndex int
  27. styles *xlsxStyles
  28. }
  29. // CellInterface defines the public API of the Cell.
  30. type CellInterface interface {
  31. String() string
  32. }
  33. func (c *Cell) String() string {
  34. return c.Value
  35. }
  36. func (c *Cell) GetStyle() *Style {
  37. style := new(Style)
  38. if c.styleIndex > 0 && c.styleIndex < len(c.styles.CellXfs) {
  39. xf := c.styles.CellXfs[c.styleIndex]
  40. if xf.ApplyBorder != "0" {
  41. var border Border
  42. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  43. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  44. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  45. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  46. style.Boders = border
  47. }
  48. if xf.ApplyFill != "0" {
  49. var fill Fill
  50. fill.BgColorIndex = c.styles.Fills[xf.FillId].BgColorIndex
  51. style.Fills = fill
  52. }
  53. }
  54. return style
  55. }
  56. // Row is a high level structure indended to provide user access to a
  57. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  58. type Row struct {
  59. Cells []*Cell
  60. }
  61. // Sheet is a high level structure intended to provide user access to
  62. // the contents of a particular sheet within an XLSX file.
  63. type Sheet struct {
  64. Rows []*Row
  65. MaxRow int
  66. MaxCol int
  67. }
  68. // Style is a high level structure intended to provide user access to
  69. // the contents of Style within an XLSX file.
  70. type Style struct {
  71. Boders Border
  72. Fills Fill
  73. }
  74. // Border is a high level structure intended to provide user access to
  75. // the contents of Border Style within an Sheet.
  76. type Border struct {
  77. Left string
  78. Right string
  79. Top string
  80. Bottom string
  81. }
  82. // Fill is a high level structure intended to provide user access to
  83. // the contents of background and foreground color index within an Sheet.
  84. type Fill struct {
  85. BgColorIndex string
  86. FgColorIndex string
  87. }
  88. // File is a high level structure providing a slice of Sheet structs
  89. // to the user.
  90. type File struct {
  91. worksheets map[string]*zip.File
  92. referenceTable []string
  93. styles *xlsxStyles
  94. Sheets []*Sheet // sheet access by index
  95. Sheet map[string]*Sheet // sheet access by name
  96. }
  97. // getRangeFromString is an internal helper function that converts
  98. // XLSX internal range syntax to a pair of integers. For example,
  99. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  100. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  101. var parts []string
  102. parts = strings.SplitN(rangeString, ":", 2)
  103. if parts[0] == "" {
  104. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  105. }
  106. if parts[1] == "" {
  107. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  108. }
  109. lower, error = strconv.Atoi(parts[0])
  110. if error != nil {
  111. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  112. }
  113. upper, error = strconv.Atoi(parts[1])
  114. if error != nil {
  115. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  116. }
  117. return lower, upper, error
  118. }
  119. // positionalLetterMultiplier gives an integer multiplier to use for a
  120. // position in a letter based column identifer. For example, the
  121. // column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
  122. // 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
  123. // ((26**2)*1)+((26**1)*2)+((26**0))*1
  124. func positionalLetterMultiplier(extent, pos int) int {
  125. var result float64
  126. var power float64
  127. var offset int
  128. offset = pos + 1
  129. power = float64(extent - offset)
  130. result = math.Pow(26, power)
  131. return int(result)
  132. }
  133. // lettersToNumeric is used to convert a character based column
  134. // reference to a zero based numeric column identifier.
  135. func lettersToNumeric(letters string) int {
  136. var sum int = 0
  137. var shift int
  138. extent := len(letters)
  139. for i, c := range letters {
  140. // Just to make life akward. If we think of this base
  141. // 26 notation as being like HEX or binary we hit a
  142. // nasty little problem. The issue is that we have no
  143. // 0s and therefore A can be both a 1 and a 0. The
  144. // value range of a letter is different in the most
  145. // significant position if (and only if) there is more
  146. // than one positions. For example:
  147. // "A" = 0
  148. // 676 | 26 | 0
  149. // ----+----+----
  150. // 0 | 0 | 0
  151. //
  152. // "Z" = 25
  153. // 676 | 26 | 0
  154. // ----+----+----
  155. // 0 | 0 | 25
  156. // "AA" = 26
  157. // 676 | 26 | 0
  158. // ----+----+----
  159. // 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
  160. if i == 0 && extent > 1 {
  161. shift = 1
  162. } else {
  163. shift = 0
  164. }
  165. multiplier := positionalLetterMultiplier(extent, i)
  166. switch {
  167. case 'A' <= c && c <= 'Z':
  168. sum += multiplier * (int((c - 'A')) + shift)
  169. case 'a' <= c && c <= 'z':
  170. sum += multiplier * (int((c - 'a')) + shift)
  171. }
  172. }
  173. return sum
  174. }
  175. // letterOnlyMapF is used in conjunction with strings.Map to return
  176. // only the characters A-Z and a-z in a string
  177. func letterOnlyMapF(rune rune) rune {
  178. switch {
  179. case 'A' <= rune && rune <= 'Z':
  180. return rune
  181. case 'a' <= rune && rune <= 'z':
  182. return rune - 32
  183. }
  184. return -1
  185. }
  186. // intOnlyMapF is used in conjunction with strings.Map to return only
  187. // the numeric portions of a string.
  188. func intOnlyMapF(rune rune) rune {
  189. if rune >= 48 && rune < 58 {
  190. return rune
  191. }
  192. return -1
  193. }
  194. // getCoordsFromCellIDString returns the zero based cartesian
  195. // coordinates from a cell name in Excel format, e.g. the cellIDString
  196. // "A1" returns 0, 0 and the "B3" return 1, 2.
  197. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  198. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  199. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  200. if error != nil {
  201. return x, y, error
  202. }
  203. y -= 1 // Zero based
  204. x = lettersToNumeric(letterPart)
  205. return x, y, error
  206. }
  207. // makeRowFromSpan will, when given a span expressed as a string,
  208. // return an empty Row large enough to encompass that span and
  209. // populate it with empty cells. All rows start from cell 1 -
  210. // regardless of the lower bound of the span.
  211. func makeRowFromSpan(spans string) *Row {
  212. var error error
  213. var upper int
  214. var row *Row
  215. var cell *Cell
  216. row = new(Row)
  217. _, upper, error = getRangeFromString(spans)
  218. if error != nil {
  219. panic(error)
  220. }
  221. error = nil
  222. row.Cells = make([]*Cell, upper)
  223. for i := 0; i < upper; i++ {
  224. cell = new(Cell)
  225. cell.Value = ""
  226. row.Cells[i] = cell
  227. }
  228. return row
  229. }
  230. // get the max column
  231. // return the cells of columns
  232. func makeRowFromRaw(rawrow xlsxRow) *Row {
  233. var upper int
  234. var row *Row
  235. var cell *Cell
  236. row = new(Row)
  237. upper = 0
  238. for _, rawcell := range rawrow.C {
  239. x, _, error := getCoordsFromCellIDString(rawcell.R)
  240. if error != nil {
  241. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  242. }
  243. if x > upper {
  244. upper = x
  245. }
  246. }
  247. row.Cells = make([]*Cell, upper)
  248. for i := 0; i < upper; i++ {
  249. cell = new(Cell)
  250. cell.Value = ""
  251. row.Cells[i] = cell
  252. }
  253. return row
  254. }
  255. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  256. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  257. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  258. var value string = ""
  259. var data string = rawcell.V
  260. if len(data) > 0 {
  261. vval := strings.Trim(data, " \t\n\r")
  262. if rawcell.T == "s" {
  263. ref, error := strconv.Atoi(vval)
  264. if error != nil {
  265. panic(error)
  266. }
  267. value = reftable[ref]
  268. } else {
  269. value = vval
  270. }
  271. }
  272. return value
  273. }
  274. // readRowsFromSheet is an internal helper function that extracts the
  275. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  276. // the value references from the reference table and stores them in
  277. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  278. var rows []*Row
  279. var row *Row
  280. var maxCol int
  281. var maxRow int
  282. var reftable []string
  283. reftable = file.referenceTable
  284. maxCol = 0
  285. maxRow = 0
  286. for _, rawrow := range Worksheet.SheetData.Row {
  287. for _, rawcell := range rawrow.C {
  288. x, y, error := getCoordsFromCellIDString(rawcell.R)
  289. if error != nil {
  290. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  291. }
  292. if x > maxCol {
  293. maxCol = x
  294. }
  295. if y > maxRow {
  296. maxRow = y
  297. }
  298. }
  299. }
  300. maxCol += 1
  301. maxRow += 1
  302. rows = make([]*Row, maxRow)
  303. for _, rawrow := range Worksheet.SheetData.Row {
  304. // range is not empty
  305. if len(rawrow.Spans) != 0 {
  306. row = makeRowFromSpan(rawrow.Spans)
  307. } else {
  308. row = makeRowFromRaw(rawrow)
  309. }
  310. rowno := 0
  311. for _, rawcell := range rawrow.C {
  312. x, y, _ := getCoordsFromCellIDString(rawcell.R)
  313. if y != 0 && rowno == 0 {
  314. rowno = y
  315. }
  316. if x < len(row.Cells) {
  317. row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
  318. row.Cells[x].styleIndex = rawcell.S
  319. row.Cells[x].styles = file.styles
  320. }
  321. }
  322. rows[rowno] = row
  323. }
  324. return rows, maxCol, maxRow
  325. }
  326. // readSheetsFromZipFile is an internal helper function that loops
  327. // over the Worksheets defined in the XSLXWorkbook and loads them into
  328. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  329. func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
  330. var workbook *xlsxWorkbook
  331. var error error
  332. var rc io.ReadCloser
  333. var decoder *xml.Decoder
  334. workbook = new(xlsxWorkbook)
  335. rc, error = f.Open()
  336. if error != nil {
  337. return nil, nil, error
  338. }
  339. decoder = xml.NewDecoder(rc)
  340. error = decoder.Decode(workbook)
  341. if error != nil {
  342. return nil, nil, error
  343. }
  344. sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
  345. names := make([]string, len(workbook.Sheets.Sheet))
  346. for i, rawsheet := range workbook.Sheets.Sheet {
  347. worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
  348. if error != nil {
  349. return nil, nil, error
  350. }
  351. sheet := new(Sheet)
  352. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, file)
  353. sheets[i] = sheet
  354. names[i] = rawsheet.Name
  355. }
  356. return sheets, names, nil
  357. }
  358. // readSharedStringsFromZipFile() is an internal helper function to
  359. // extract a reference table from the sharedStrings.xml file within
  360. // the XLSX zip file.
  361. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  362. var sst *xlsxSST
  363. var error error
  364. var rc io.ReadCloser
  365. var decoder *xml.Decoder
  366. var reftable []string
  367. rc, error = f.Open()
  368. if error != nil {
  369. return nil, error
  370. }
  371. sst = new(xlsxSST)
  372. decoder = xml.NewDecoder(rc)
  373. error = decoder.Decode(sst)
  374. if error != nil {
  375. return nil, error
  376. }
  377. reftable = MakeSharedStringRefTable(sst)
  378. return reftable, nil
  379. }
  380. // readStylesFromZipFile() is an internal helper function to
  381. // extract a style table from the style.xml file within
  382. // the XLSX zip file.
  383. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  384. var style *xlsxStyles
  385. var error error
  386. var rc io.ReadCloser
  387. var decoder *xml.Decoder
  388. rc, error = f.Open()
  389. if error != nil {
  390. return nil, error
  391. }
  392. style = new(xlsxStyles)
  393. decoder = xml.NewDecoder(rc)
  394. error = decoder.Decode(style)
  395. if error != nil {
  396. return nil, error
  397. }
  398. return style, nil
  399. }
  400. // OpenFile() take the name of an XLSX file and returns a populated
  401. // xlsx.File struct for it.
  402. func OpenFile(filename string) (x *File, e error) {
  403. var f *zip.ReadCloser
  404. var error error
  405. var file *File
  406. var v *zip.File
  407. var workbook *zip.File
  408. var styles *zip.File
  409. var sharedStrings *zip.File
  410. var reftable []string
  411. var worksheets map[string]*zip.File
  412. f, error = zip.OpenReader(filename)
  413. var sheetMap map[string]*Sheet
  414. if error != nil {
  415. return nil, error
  416. }
  417. file = new(File)
  418. worksheets = make(map[string]*zip.File, len(f.File))
  419. for _, v = range f.File {
  420. switch v.Name {
  421. case "xl/sharedStrings.xml":
  422. sharedStrings = v
  423. case "xl/workbook.xml":
  424. workbook = v
  425. case "xl/styles.xml":
  426. styles = v
  427. default:
  428. if len(v.Name) > 12 {
  429. if v.Name[0:13] == "xl/worksheets" {
  430. worksheets[v.Name[14:len(v.Name)-4]] = v
  431. }
  432. }
  433. }
  434. }
  435. file.worksheets = worksheets
  436. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  437. if error != nil {
  438. return nil, error
  439. }
  440. if reftable == nil {
  441. error := new(XLSXReaderError)
  442. error.Err = "No valid sharedStrings.xml found in XLSX file"
  443. return nil, error
  444. }
  445. file.referenceTable = reftable
  446. style, error := readStylesFromZipFile(styles)
  447. if error != nil {
  448. return nil, error
  449. }
  450. file.styles = style
  451. sheets, names, error := readSheetsFromZipFile(workbook, file)
  452. if error != nil {
  453. return nil, error
  454. }
  455. if sheets == nil {
  456. error := new(XLSXReaderError)
  457. error.Err = "No sheets found in XLSX File"
  458. return nil, error
  459. }
  460. file.Sheets = sheets
  461. sheetMap = make(map[string]*Sheet, len(names))
  462. for i := 0; i < len(names); i++ {
  463. sheetMap[names[i]] = sheets[i]
  464. }
  465. file.Sheet = sheetMap
  466. f.Close()
  467. return file, nil
  468. }