lib.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math"
  9. "strconv"
  10. "strings"
  11. )
  12. // XLSXReaderError is the standard error type for otherwise undefined
  13. // errors in the XSLX reading process.
  14. type XLSXReaderError struct {
  15. Err string
  16. }
  17. // String() returns a string value from an XLSXReaderError struct in
  18. // order that it might comply with the os.Error interface.
  19. func (e *XLSXReaderError) Error() string {
  20. return e.Err
  21. }
  22. // Cell is a high level structure intended to provide user access to
  23. // the contents of Cell within an xlsx.Row.
  24. type Cell struct {
  25. Value string
  26. styleIndex int
  27. styles *xlsxStyles
  28. }
  29. // CellInterface defines the public API of the Cell.
  30. type CellInterface interface {
  31. String() string
  32. }
  33. func (c *Cell) String() string {
  34. return c.Value
  35. }
  36. func (c *Cell) GetStyle() *Style {
  37. style := new(Style)
  38. if c.styleIndex > 0 && c.styleIndex < len(c.styles.CellXfs) {
  39. xf := c.styles.CellXfs[c.styleIndex]
  40. if xf.ApplyBorder != "0" {
  41. var border Border
  42. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  43. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  44. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  45. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  46. style.Boders = border
  47. }
  48. if xf.ApplyFill != "0" {
  49. var fill Fill
  50. fill.BgColorIndex = c.styles.Fills[xf.FillId].BgColorIndex
  51. style.Fills = fill
  52. }
  53. }
  54. return style
  55. }
  56. // Row is a high level structure indended to provide user access to a
  57. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  58. type Row struct {
  59. Cells []*Cell
  60. }
  61. // Sheet is a high level structure intended to provide user access to
  62. // the contents of a particular sheet within an XLSX file.
  63. type Sheet struct {
  64. Rows []*Row
  65. MaxRow int
  66. MaxCol int
  67. }
  68. // Style is a high level structure intended to provide user access to
  69. // the contents of Style within an XLSX file.
  70. type Style struct {
  71. Boders Border
  72. Fills Fill
  73. }
  74. // Border is a high level structure intended to provide user access to
  75. // the contents of Border Style within an Sheet.
  76. type Border struct {
  77. Left string
  78. Right string
  79. Top string
  80. Bottom string
  81. }
  82. // Fill is a high level structure intended to provide user access to
  83. // the contents of background and foreground color index within an Sheet.
  84. type Fill struct {
  85. BgColorIndex string
  86. FgColorIndex string
  87. }
  88. // File is a high level structure providing a slice of Sheet structs
  89. // to the user.
  90. type File struct {
  91. worksheets map[string]*zip.File
  92. referenceTable []string
  93. styles *xlsxStyles
  94. Sheets []*Sheet // sheet access by index
  95. Sheet map[string]*Sheet // sheet access by name
  96. }
  97. // getRangeFromString is an internal helper function that converts
  98. // XLSX internal range syntax to a pair of integers. For example,
  99. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  100. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  101. var parts []string
  102. parts = strings.SplitN(rangeString, ":", 2)
  103. if parts[0] == "" {
  104. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  105. }
  106. if parts[1] == "" {
  107. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  108. }
  109. lower, error = strconv.Atoi(parts[0])
  110. if error != nil {
  111. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  112. }
  113. upper, error = strconv.Atoi(parts[1])
  114. if error != nil {
  115. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  116. }
  117. return lower, upper, error
  118. }
  119. // positionalLetterMultiplier gives an integer multiplier to use for a
  120. // position in a letter based column identifer. For example, the
  121. // column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
  122. // 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
  123. // ((26**2)*1)+((26**1)*2)+((26**0))*1
  124. func positionalLetterMultiplier(extent, pos int) int {
  125. var result float64
  126. var power float64
  127. var offset int
  128. offset = pos + 1
  129. power = float64(extent - offset)
  130. result = math.Pow(26, power)
  131. return int(result)
  132. }
  133. // lettersToNumeric is used to convert a character based column
  134. // reference to a zero based numeric column identifier.
  135. func lettersToNumeric(letters string) int {
  136. var sum int = 0
  137. var shift int
  138. extent := len(letters)
  139. for i, c := range letters {
  140. // Just to make life akward. If we think of this base
  141. // 26 notation as being like HEX or binary we hit a
  142. // nasty little problem. The issue is that we have no
  143. // 0s and therefore A can be both a 1 and a 0. The
  144. // value range of a letter is different in the most
  145. // significant position if (and only if) there is more
  146. // than one positions. For example:
  147. // "A" = 0
  148. // 676 | 26 | 0
  149. // ----+----+----
  150. // 0 | 0 | 0
  151. //
  152. // "Z" = 25
  153. // 676 | 26 | 0
  154. // ----+----+----
  155. // 0 | 0 | 25
  156. // "AA" = 26
  157. // 676 | 26 | 0
  158. // ----+----+----
  159. // 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
  160. if i == 0 && extent > 1 {
  161. shift = 1
  162. } else {
  163. shift = 0
  164. }
  165. multiplier := positionalLetterMultiplier(extent, i)
  166. switch {
  167. case 'A' <= c && c <= 'Z':
  168. sum += multiplier * (int((c - 'A')) + shift)
  169. case 'a' <= c && c <= 'z':
  170. sum += multiplier * (int((c - 'a')) + shift)
  171. }
  172. }
  173. return sum
  174. }
  175. // letterOnlyMapF is used in conjunction with strings.Map to return
  176. // only the characters A-Z and a-z in a string
  177. func letterOnlyMapF(rune rune) rune {
  178. switch {
  179. case 'A' <= rune && rune <= 'Z':
  180. return rune
  181. case 'a' <= rune && rune <= 'z':
  182. return rune - 32
  183. }
  184. return -1
  185. }
  186. // intOnlyMapF is used in conjunction with strings.Map to return only
  187. // the numeric portions of a string.
  188. func intOnlyMapF(rune rune) rune {
  189. if rune >= 48 && rune < 58 {
  190. return rune
  191. }
  192. return -1
  193. }
  194. // getCoordsFromCellIDString returns the zero based cartesian
  195. // coordinates from a cell name in Excel format, e.g. the cellIDString
  196. // "A1" returns 0, 0 and the "B3" return 1, 2.
  197. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  198. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  199. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  200. if error != nil {
  201. return x, y, error
  202. }
  203. y -= 1 // Zero based
  204. x = lettersToNumeric(letterPart)
  205. return x, y, error
  206. }
  207. // makeRowFromSpan will, when given a span expressed as a string,
  208. // return an empty Row large enough to encompass that span and
  209. // populate it with empty cells. All rows start from cell 1 -
  210. // regardless of the lower bound of the span.
  211. func makeRowFromSpan(spans string) *Row {
  212. var error error
  213. var upper int
  214. var row *Row
  215. var cell *Cell
  216. row = new(Row)
  217. _, upper, error = getRangeFromString(spans)
  218. if error != nil {
  219. panic(error)
  220. }
  221. error = nil
  222. row.Cells = make([]*Cell, upper)
  223. for i := 0; i < upper; i++ {
  224. cell = new(Cell)
  225. cell.Value = ""
  226. row.Cells[i] = cell
  227. }
  228. return row
  229. }
  230. // get the max column
  231. // return the cells of columns
  232. func makeRowFromRaw(rawrow xlsxRow) *Row {
  233. var upper int
  234. var row *Row
  235. var cell *Cell
  236. row = new(Row)
  237. upper = 0
  238. for _, rawcell := range rawrow.C {
  239. x, _, error := getCoordsFromCellIDString(rawcell.R)
  240. if error != nil {
  241. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  242. }
  243. if x > upper {
  244. upper = x
  245. }
  246. }
  247. row.Cells = make([]*Cell, upper)
  248. for i := 0; i < upper; i++ {
  249. cell = new(Cell)
  250. cell.Value = ""
  251. row.Cells[i] = cell
  252. }
  253. return row
  254. }
  255. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  256. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  257. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  258. var value string = ""
  259. var data string = rawcell.V
  260. if len(data) > 0 {
  261. vval := strings.Trim(data, " \t\n\r")
  262. if rawcell.T == "s" {
  263. ref, error := strconv.Atoi(vval)
  264. if error != nil {
  265. panic(error)
  266. }
  267. value = reftable[ref]
  268. } else {
  269. value = vval
  270. }
  271. }
  272. return value
  273. }
  274. // readRowsFromSheet is an internal helper function that extracts the
  275. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  276. // the value references from the reference table and stores them in
  277. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  278. var rows []*Row
  279. var row *Row
  280. var maxCol int
  281. var maxRow int
  282. var reftable []string
  283. reftable = file.referenceTable
  284. maxCol = 0
  285. maxRow = 0
  286. for _, rawrow := range Worksheet.SheetData.Row {
  287. for _, rawcell := range rawrow.C {
  288. x, y, error := getCoordsFromCellIDString(rawcell.R)
  289. if error != nil {
  290. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  291. }
  292. if x > maxCol {
  293. maxCol = x
  294. }
  295. if y > maxRow {
  296. maxRow = y
  297. }
  298. }
  299. }
  300. maxCol += 1
  301. maxRow += 1
  302. rows = make([]*Row, maxRow)
  303. for _, rawrow := range Worksheet.SheetData.Row {
  304. // range is not empty
  305. if len(rawrow.Spans) != 0 {
  306. row = makeRowFromSpan(rawrow.Spans)
  307. } else {
  308. row = makeRowFromRaw(rawrow)
  309. }
  310. rowno := 0
  311. for _, rawcell := range rawrow.C {
  312. x, y, _ := getCoordsFromCellIDString(rawcell.R)
  313. if y != 0 && rowno == 0{
  314. rowno = y
  315. }
  316. row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
  317. row.Cells[x].styleIndex = rawcell.S
  318. row.Cells[x].styles = file.styles
  319. }
  320. rows[rowno] = row
  321. }
  322. return rows, maxCol, maxRow
  323. }
  324. // readSheetsFromZipFile is an internal helper function that loops
  325. // over the Worksheets defined in the XSLXWorkbook and loads them into
  326. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  327. func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
  328. var workbook *xlsxWorkbook
  329. var error error
  330. var rc io.ReadCloser
  331. var decoder *xml.Decoder
  332. workbook = new(xlsxWorkbook)
  333. rc, error = f.Open()
  334. if error != nil {
  335. return nil, nil, error
  336. }
  337. decoder = xml.NewDecoder(rc)
  338. error = decoder.Decode(workbook)
  339. if error != nil {
  340. return nil, nil, error
  341. }
  342. sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
  343. names := make([]string, len(workbook.Sheets.Sheet))
  344. for i, rawsheet := range workbook.Sheets.Sheet {
  345. worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
  346. if error != nil {
  347. return nil, nil, error
  348. }
  349. sheet := new(Sheet)
  350. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, file)
  351. sheets[i] = sheet
  352. names[i] = rawsheet.Name
  353. }
  354. return sheets, names, nil
  355. }
  356. // readSharedStringsFromZipFile() is an internal helper function to
  357. // extract a reference table from the sharedStrings.xml file within
  358. // the XLSX zip file.
  359. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  360. var sst *xlsxSST
  361. var error error
  362. var rc io.ReadCloser
  363. var decoder *xml.Decoder
  364. var reftable []string
  365. rc, error = f.Open()
  366. if error != nil {
  367. return nil, error
  368. }
  369. sst = new(xlsxSST)
  370. decoder = xml.NewDecoder(rc)
  371. error = decoder.Decode(sst)
  372. if error != nil {
  373. return nil, error
  374. }
  375. reftable = MakeSharedStringRefTable(sst)
  376. return reftable, nil
  377. }
  378. // readStylesFromZipFile() is an internal helper function to
  379. // extract a style table from the style.xml file within
  380. // the XLSX zip file.
  381. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  382. var style *xlsxStyles
  383. var error error
  384. var rc io.ReadCloser
  385. var decoder *xml.Decoder
  386. rc, error = f.Open()
  387. if error != nil {
  388. return nil, error
  389. }
  390. style = new(xlsxStyles)
  391. decoder = xml.NewDecoder(rc)
  392. error = decoder.Decode(style)
  393. if error != nil {
  394. return nil, error
  395. }
  396. return style, nil
  397. }
  398. // OpenFile() take the name of an XLSX file and returns a populated
  399. // xlsx.File struct for it.
  400. func OpenFile(filename string) (x *File, e error) {
  401. var f *zip.ReadCloser
  402. var error error
  403. var file *File
  404. var v *zip.File
  405. var workbook *zip.File
  406. var styles *zip.File
  407. var sharedStrings *zip.File
  408. var reftable []string
  409. var worksheets map[string]*zip.File
  410. f, error = zip.OpenReader(filename)
  411. var sheetMap map[string]*Sheet
  412. if error != nil {
  413. return nil, error
  414. }
  415. file = new(File)
  416. worksheets = make(map[string]*zip.File, len(f.File))
  417. for _, v = range f.File {
  418. switch v.Name {
  419. case "xl/sharedStrings.xml":
  420. sharedStrings = v
  421. case "xl/workbook.xml":
  422. workbook = v
  423. case "xl/styles.xml":
  424. styles = v
  425. default:
  426. if len(v.Name) > 12 {
  427. if v.Name[0:13] == "xl/worksheets" {
  428. worksheets[v.Name[14:len(v.Name)-4]] = v
  429. }
  430. }
  431. }
  432. }
  433. file.worksheets = worksheets
  434. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  435. if error != nil {
  436. return nil, error
  437. }
  438. if reftable == nil {
  439. error := new(XLSXReaderError)
  440. error.Err = "No valid sharedStrings.xml found in XLSX file"
  441. return nil, error
  442. }
  443. file.referenceTable = reftable
  444. style, error := readStylesFromZipFile(styles)
  445. if error != nil {
  446. return nil, error
  447. }
  448. file.styles = style
  449. sheets, names, error := readSheetsFromZipFile(workbook, file)
  450. if error != nil {
  451. return nil, error
  452. }
  453. if sheets == nil {
  454. error := new(XLSXReaderError)
  455. error.Err = "No sheets found in XLSX File"
  456. return nil, error
  457. }
  458. file.Sheets = sheets
  459. sheetMap = make(map[string]*Sheet, len(names))
  460. for i := 0; i < len(names); i++ {
  461. sheetMap[names[i]] = sheets[i]
  462. }
  463. file.Sheet = sheetMap
  464. f.Close()
  465. return file, nil
  466. }