lib.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. // XLSXReaderError is the standard error type for otherwise undefined
  12. // errors in the XSLX reading process.
  13. type XLSXReaderError struct {
  14. Err string
  15. }
  16. // String() returns a string value from an XLSXReaderError struct in
  17. // order that it might comply with the os.Error interface.
  18. func (e *XLSXReaderError) Error() string {
  19. return e.Err
  20. }
  21. // Cell is a high level structure intended to provide user access to
  22. // the contents of Cell within an xlsx.Row.
  23. type Cell struct {
  24. Value string
  25. styleIndex int
  26. styles *xlsxStyles
  27. }
  28. // CellInterface defines the public API of the Cell.
  29. type CellInterface interface {
  30. String() string
  31. }
  32. func (c *Cell) String() string {
  33. return c.Value
  34. }
  35. func (c *Cell) GetStyle() *Style {
  36. style := new(Style)
  37. if c.styleIndex > 0 && c.styleIndex < len(c.styles.CellXfs) {
  38. xf := c.styles.CellXfs[c.styleIndex]
  39. if xf.ApplyBorder != "0" {
  40. var border Border
  41. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  42. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  43. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  44. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  45. style.Boders = border
  46. }
  47. if xf.ApplyFill != "0" {
  48. var fill Fill
  49. fill.BgColorIndex = c.styles.Fills[xf.FillId].BgColorIndex
  50. style.Fills = fill
  51. }
  52. }
  53. return style
  54. }
  55. // Row is a high level structure indended to provide user access to a
  56. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  57. type Row struct {
  58. Cells []*Cell
  59. }
  60. // Sheet is a high level structure intended to provide user access to
  61. // the contents of a particular sheet within an XLSX file.
  62. type Sheet struct {
  63. Rows []*Row
  64. MaxRow int
  65. MaxCol int
  66. }
  67. // Style is a high level structure intended to provide user access to
  68. // the contents of Style within an XLSX file.
  69. type Style struct {
  70. Boders Border
  71. Fills Fill
  72. }
  73. // Border is a high level structure intended to provide user access to
  74. // the contents of Border Style within an Sheet.
  75. type Border struct {
  76. Left string
  77. Right string
  78. Top string
  79. Bottom string
  80. }
  81. // Fill is a high level structure intended to provide user access to
  82. // the contents of background and foreground color index within an Sheet.
  83. type Fill struct {
  84. BgColorIndex string
  85. FgColorIndex string
  86. }
  87. // File is a high level structure providing a slice of Sheet structs
  88. // to the user.
  89. type File struct {
  90. worksheets map[string]*zip.File
  91. referenceTable []string
  92. styles *xlsxStyles
  93. Sheets []*Sheet // sheet access by index
  94. Sheet map[string]*Sheet // sheet access by name
  95. }
  96. // getRangeFromString is an internal helper function that converts
  97. // XLSX internal range syntax to a pair of integers. For example,
  98. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  99. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  100. var parts []string
  101. parts = strings.SplitN(rangeString, ":", 2)
  102. if parts[0] == "" {
  103. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  104. }
  105. if parts[1] == "" {
  106. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  107. }
  108. lower, error = strconv.Atoi(parts[0])
  109. if error != nil {
  110. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  111. }
  112. upper, error = strconv.Atoi(parts[1])
  113. if error != nil {
  114. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  115. }
  116. return lower, upper, error
  117. }
  118. // lettersToNumeric is used to convert a character based column
  119. // reference to a zero based numeric column identifier.
  120. func lettersToNumeric(letters string) int {
  121. sum, mul, n := 0, 1, 0
  122. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  123. c := letters[i]
  124. switch {
  125. case 'A' <= c && c <= 'Z':
  126. n += int(c - 'A')
  127. case 'a' <= c && c <= 'z':
  128. n += int(c - 'a')
  129. }
  130. sum += n * mul
  131. }
  132. return sum
  133. }
  134. // letterOnlyMapF is used in conjunction with strings.Map to return
  135. // only the characters A-Z and a-z in a string
  136. func letterOnlyMapF(rune rune) rune {
  137. switch {
  138. case 'A' <= rune && rune <= 'Z':
  139. return rune
  140. case 'a' <= rune && rune <= 'z':
  141. return rune - 32
  142. }
  143. return -1
  144. }
  145. // intOnlyMapF is used in conjunction with strings.Map to return only
  146. // the numeric portions of a string.
  147. func intOnlyMapF(rune rune) rune {
  148. if rune >= 48 && rune < 58 {
  149. return rune
  150. }
  151. return -1
  152. }
  153. // getCoordsFromCellIDString returns the zero based cartesian
  154. // coordinates from a cell name in Excel format, e.g. the cellIDString
  155. // "A1" returns 0, 0 and the "B3" return 1, 2.
  156. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  157. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  158. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  159. if error != nil {
  160. return x, y, error
  161. }
  162. y -= 1 // Zero based
  163. x = lettersToNumeric(letterPart)
  164. return x, y, error
  165. }
  166. // makeRowFromSpan will, when given a span expressed as a string,
  167. // return an empty Row large enough to encompass that span and
  168. // populate it with empty cells. All rows start from cell 1 -
  169. // regardless of the lower bound of the span.
  170. func makeRowFromSpan(spans string) *Row {
  171. var error error
  172. var upper int
  173. var row *Row
  174. var cell *Cell
  175. row = new(Row)
  176. _, upper, error = getRangeFromString(spans)
  177. if error != nil {
  178. panic(error)
  179. }
  180. error = nil
  181. row.Cells = make([]*Cell, upper)
  182. for i := 0; i < upper; i++ {
  183. cell = new(Cell)
  184. cell.Value = ""
  185. row.Cells[i] = cell
  186. }
  187. return row
  188. }
  189. // get the max column
  190. // return the cells of columns
  191. func makeRowFromRaw(rawrow xlsxRow) *Row {
  192. var upper int
  193. var row *Row
  194. var cell *Cell
  195. row = new(Row)
  196. upper = -1
  197. for _, rawcell := range rawrow.C {
  198. x, _, error := getCoordsFromCellIDString(rawcell.R)
  199. if error != nil {
  200. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  201. }
  202. if x > upper {
  203. upper = x
  204. }
  205. }
  206. upper++
  207. row.Cells = make([]*Cell, upper)
  208. for i := 0; i < upper; i++ {
  209. cell = new(Cell)
  210. cell.Value = ""
  211. row.Cells[i] = cell
  212. }
  213. return row
  214. }
  215. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  216. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  217. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  218. var value string = ""
  219. var data string = rawcell.V
  220. if len(data) > 0 {
  221. vval := strings.Trim(data, " \t\n\r")
  222. if rawcell.T == "s" {
  223. ref, error := strconv.Atoi(vval)
  224. if error != nil {
  225. panic(error)
  226. }
  227. value = reftable[ref]
  228. } else {
  229. value = vval
  230. }
  231. }
  232. return value
  233. }
  234. // readRowsFromSheet is an internal helper function that extracts the
  235. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  236. // the value references from the reference table and stores them in
  237. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  238. var rows []*Row
  239. var row *Row
  240. var maxCol int
  241. var maxRow int
  242. var reftable []string
  243. reftable = file.referenceTable
  244. maxCol = 0
  245. maxRow = 0
  246. for _, rawrow := range Worksheet.SheetData.Row {
  247. for _, rawcell := range rawrow.C {
  248. x, y, error := getCoordsFromCellIDString(rawcell.R)
  249. if error != nil {
  250. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  251. }
  252. if x > maxCol {
  253. maxCol = x
  254. }
  255. if y > maxRow {
  256. maxRow = y
  257. }
  258. }
  259. }
  260. maxCol += 1
  261. maxRow += 1
  262. rows = make([]*Row, maxRow)
  263. for _, rawrow := range Worksheet.SheetData.Row {
  264. // range is not empty
  265. if len(rawrow.Spans) != 0 {
  266. row = makeRowFromSpan(rawrow.Spans)
  267. } else {
  268. row = makeRowFromRaw(rawrow)
  269. }
  270. rowno := 0
  271. for _, rawcell := range rawrow.C {
  272. x, y, _ := getCoordsFromCellIDString(rawcell.R)
  273. if y != 0 && rowno == 0 {
  274. rowno = y
  275. }
  276. if x < len(row.Cells) {
  277. row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
  278. row.Cells[x].styleIndex = rawcell.S
  279. row.Cells[x].styles = file.styles
  280. }
  281. }
  282. rows[rowno] = row
  283. }
  284. return rows, maxCol, maxRow
  285. }
  286. // readSheetsFromZipFile is an internal helper function that loops
  287. // over the Worksheets defined in the XSLXWorkbook and loads them into
  288. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  289. func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
  290. var workbook *xlsxWorkbook
  291. var error error
  292. var rc io.ReadCloser
  293. var decoder *xml.Decoder
  294. workbook = new(xlsxWorkbook)
  295. rc, error = f.Open()
  296. if error != nil {
  297. return nil, nil, error
  298. }
  299. decoder = xml.NewDecoder(rc)
  300. error = decoder.Decode(workbook)
  301. if error != nil {
  302. return nil, nil, error
  303. }
  304. sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
  305. names := make([]string, len(workbook.Sheets.Sheet))
  306. for i, rawsheet := range workbook.Sheets.Sheet {
  307. worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
  308. if error != nil {
  309. return nil, nil, error
  310. }
  311. sheet := new(Sheet)
  312. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, file)
  313. sheets[i] = sheet
  314. names[i] = rawsheet.Name
  315. }
  316. return sheets, names, nil
  317. }
  318. // readSharedStringsFromZipFile() is an internal helper function to
  319. // extract a reference table from the sharedStrings.xml file within
  320. // the XLSX zip file.
  321. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  322. var sst *xlsxSST
  323. var error error
  324. var rc io.ReadCloser
  325. var decoder *xml.Decoder
  326. var reftable []string
  327. rc, error = f.Open()
  328. if error != nil {
  329. return nil, error
  330. }
  331. sst = new(xlsxSST)
  332. decoder = xml.NewDecoder(rc)
  333. error = decoder.Decode(sst)
  334. if error != nil {
  335. return nil, error
  336. }
  337. reftable = MakeSharedStringRefTable(sst)
  338. return reftable, nil
  339. }
  340. // readStylesFromZipFile() is an internal helper function to
  341. // extract a style table from the style.xml file within
  342. // the XLSX zip file.
  343. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  344. var style *xlsxStyles
  345. var error error
  346. var rc io.ReadCloser
  347. var decoder *xml.Decoder
  348. rc, error = f.Open()
  349. if error != nil {
  350. return nil, error
  351. }
  352. style = new(xlsxStyles)
  353. decoder = xml.NewDecoder(rc)
  354. error = decoder.Decode(style)
  355. if error != nil {
  356. return nil, error
  357. }
  358. return style, nil
  359. }
  360. // OpenFile() take the name of an XLSX file and returns a populated
  361. // xlsx.File struct for it.
  362. func OpenFile(filename string) (*File, error) {
  363. var f *zip.ReadCloser
  364. f, err := zip.OpenReader(filename)
  365. if err != nil {
  366. return nil, err
  367. }
  368. return ReadZip(f)
  369. }
  370. func ReadZip(f *zip.ReadCloser) (*File, error) {
  371. var error error
  372. var file *File
  373. var v *zip.File
  374. var workbook *zip.File
  375. var styles *zip.File
  376. var sharedStrings *zip.File
  377. var reftable []string
  378. var worksheets map[string]*zip.File
  379. var sheetMap map[string]*Sheet
  380. file = new(File)
  381. worksheets = make(map[string]*zip.File, len(f.File))
  382. for _, v = range f.File {
  383. switch v.Name {
  384. case "xl/sharedStrings.xml":
  385. sharedStrings = v
  386. case "xl/workbook.xml":
  387. workbook = v
  388. case "xl/styles.xml":
  389. styles = v
  390. default:
  391. if len(v.Name) > 12 {
  392. if v.Name[0:13] == "xl/worksheets" {
  393. worksheets[v.Name[14:len(v.Name)-4]] = v
  394. }
  395. }
  396. }
  397. }
  398. file.worksheets = worksheets
  399. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  400. if error != nil {
  401. return nil, error
  402. }
  403. if reftable == nil {
  404. error := new(XLSXReaderError)
  405. error.Err = "No valid sharedStrings.xml found in XLSX file"
  406. return nil, error
  407. }
  408. file.referenceTable = reftable
  409. style, error := readStylesFromZipFile(styles)
  410. if error != nil {
  411. return nil, error
  412. }
  413. file.styles = style
  414. sheets, names, error := readSheetsFromZipFile(workbook, file)
  415. if error != nil {
  416. return nil, error
  417. }
  418. if sheets == nil {
  419. error := new(XLSXReaderError)
  420. error.Err = "No sheets found in XLSX File"
  421. return nil, error
  422. }
  423. file.Sheets = sheets
  424. sheetMap = make(map[string]*Sheet, len(names))
  425. for i := 0; i < len(names); i++ {
  426. sheetMap[names[i]] = sheets[i]
  427. }
  428. file.Sheet = sheetMap
  429. f.Close()
  430. return file, nil
  431. }