lib.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math"
  9. "strconv"
  10. "strings"
  11. )
  12. // XLSXReaderError is the standard error type for otherwise undefined
  13. // errors in the XSLX reading process.
  14. type XLSXReaderError struct {
  15. Err string
  16. }
  17. // String() returns a string value from an XLSXReaderError struct in
  18. // order that it might comply with the os.Error interface.
  19. func (e *XLSXReaderError) Error() string {
  20. return e.Err
  21. }
  22. // Cell is a high level structure intended to provide user access to
  23. // the contents of Cell within an xlsx.Row.
  24. type Cell struct {
  25. Value string
  26. styleIndex int
  27. styles *xlsxStyles
  28. }
  29. // CellInterface defines the public API of the Cell.
  30. type CellInterface interface {
  31. String() string
  32. }
  33. func (c *Cell) String() string {
  34. return c.Value
  35. }
  36. func (c *Cell) GetStyle() *Style {
  37. if c.styleIndex > 0 && c.styleIndex < len(c.styles.CellXfs) {
  38. xf := c.styles.CellXfs[c.styleIndex]
  39. if xf.ApplyBorder != "0" {
  40. var border Border
  41. style := new(Style)
  42. border.Left = c.styles.Borders[xf.BorderId].Left.Style
  43. border.Right = c.styles.Borders[xf.BorderId].Right.Style
  44. border.Top = c.styles.Borders[xf.BorderId].Top.Style
  45. border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
  46. style.Boders = border
  47. return style
  48. } else {
  49. return new(Style)
  50. }
  51. } else {
  52. return new(Style)
  53. }
  54. return new(Style)
  55. }
  56. // Row is a high level structure indended to provide user access to a
  57. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  58. type Row struct {
  59. Cells []*Cell
  60. }
  61. // Sheet is a high level structure intended to provide user access to
  62. // the contents of a particular sheet within an XLSX file.
  63. type Sheet struct {
  64. Rows []*Row
  65. MaxRow int
  66. MaxCol int
  67. }
  68. // Style is a high level structure intended to provide user access to
  69. // the contents of Style within an XLSX file.
  70. type Style struct {
  71. Boders Border
  72. }
  73. // Border is a high level structure intended to provide user access to
  74. // the contents of Border Style within an Sheet.
  75. type Border struct {
  76. Left string
  77. Right string
  78. Top string
  79. Bottom string
  80. }
  81. // File is a high level structure providing a slice of Sheet structs
  82. // to the user.
  83. type File struct {
  84. worksheets map[string]*zip.File
  85. referenceTable []string
  86. styles *xlsxStyles
  87. Sheets []*Sheet // sheet access by index
  88. Sheet map[string]*Sheet // sheet access by name
  89. }
  90. // getRangeFromString is an internal helper function that converts
  91. // XLSX internal range syntax to a pair of integers. For example,
  92. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  93. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  94. var parts []string
  95. parts = strings.SplitN(rangeString, ":", 2)
  96. if parts[0] == "" {
  97. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  98. }
  99. if parts[1] == "" {
  100. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  101. }
  102. lower, error = strconv.Atoi(parts[0])
  103. if error != nil {
  104. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  105. }
  106. upper, error = strconv.Atoi(parts[1])
  107. if error != nil {
  108. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  109. }
  110. return lower, upper, error
  111. }
  112. // positionalLetterMultiplier gives an integer multiplier to use for a
  113. // position in a letter based column identifer. For example, the
  114. // column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
  115. // 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
  116. // ((26**2)*1)+((26**1)*2)+((26**0))*1
  117. func positionalLetterMultiplier(extent, pos int) int {
  118. var result float64
  119. var power float64
  120. var offset int
  121. offset = pos + 1
  122. power = float64(extent - offset)
  123. result = math.Pow(26, power)
  124. return int(result)
  125. }
  126. // lettersToNumeric is used to convert a character based column
  127. // reference to a zero based numeric column identifier.
  128. func lettersToNumeric(letters string) int {
  129. var sum int = 0
  130. var shift int
  131. extent := len(letters)
  132. for i, c := range letters {
  133. // Just to make life akward. If we think of this base
  134. // 26 notation as being like HEX or binary we hit a
  135. // nasty little problem. The issue is that we have no
  136. // 0s and therefore A can be both a 1 and a 0. The
  137. // value range of a letter is different in the most
  138. // significant position if (and only if) there is more
  139. // than one positions. For example:
  140. // "A" = 0
  141. // 676 | 26 | 0
  142. // ----+----+----
  143. // 0 | 0 | 0
  144. //
  145. // "Z" = 25
  146. // 676 | 26 | 0
  147. // ----+----+----
  148. // 0 | 0 | 25
  149. // "AA" = 26
  150. // 676 | 26 | 0
  151. // ----+----+----
  152. // 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
  153. if i == 0 && extent > 1 {
  154. shift = 1
  155. } else {
  156. shift = 0
  157. }
  158. multiplier := positionalLetterMultiplier(extent, i)
  159. switch {
  160. case 'A' <= c && c <= 'Z':
  161. sum += multiplier * (int((c - 'A')) + shift)
  162. case 'a' <= c && c <= 'z':
  163. sum += multiplier * (int((c - 'a')) + shift)
  164. }
  165. }
  166. return sum
  167. }
  168. // letterOnlyMapF is used in conjunction with strings.Map to return
  169. // only the characters A-Z and a-z in a string
  170. func letterOnlyMapF(rune rune) rune {
  171. switch {
  172. case 'A' <= rune && rune <= 'Z':
  173. return rune
  174. case 'a' <= rune && rune <= 'z':
  175. return rune - 32
  176. }
  177. return -1
  178. }
  179. // intOnlyMapF is used in conjunction with strings.Map to return only
  180. // the numeric portions of a string.
  181. func intOnlyMapF(rune rune) rune {
  182. if rune >= 48 && rune < 58 {
  183. return rune
  184. }
  185. return -1
  186. }
  187. // getCoordsFromCellIDString returns the zero based cartesian
  188. // coordinates from a cell name in Excel format, e.g. the cellIDString
  189. // "A1" returns 0, 0 and the "B3" return 1, 2.
  190. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  191. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  192. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  193. if error != nil {
  194. return x, y, error
  195. }
  196. y -= 1 // Zero based
  197. x = lettersToNumeric(letterPart)
  198. return x, y, error
  199. }
  200. // makeRowFromSpan will, when given a span expressed as a string,
  201. // return an empty Row large enough to encompass that span and
  202. // populate it with empty cells. All rows start from cell 1 -
  203. // regardless of the lower bound of the span.
  204. func makeRowFromSpan(spans string) *Row {
  205. var error error
  206. var upper int
  207. var row *Row
  208. var cell *Cell
  209. row = new(Row)
  210. _, upper, error = getRangeFromString(spans)
  211. if error != nil {
  212. panic(error)
  213. }
  214. error = nil
  215. row.Cells = make([]*Cell, upper)
  216. for i := 0; i < upper; i++ {
  217. cell = new(Cell)
  218. cell.Value = ""
  219. row.Cells[i] = cell
  220. }
  221. return row
  222. }
  223. // get the max column
  224. // return the cells of columns
  225. func makeRowFromRaw(rawrow xlsxRow) *Row {
  226. var upper int
  227. var row *Row
  228. var cell *Cell
  229. row = new(Row)
  230. upper = 0
  231. for _, rawcell := range rawrow.C {
  232. x, _, error := getCoordsFromCellIDString(rawcell.R)
  233. if error != nil {
  234. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  235. }
  236. if x > upper {
  237. upper = x
  238. }
  239. }
  240. row.Cells = make([]*Cell, upper)
  241. for i := 0; i < upper; i++ {
  242. cell = new(Cell)
  243. cell.Value = ""
  244. row.Cells[i] = cell
  245. }
  246. return row
  247. }
  248. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  249. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  250. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  251. var value string = ""
  252. var data string = rawcell.V
  253. if len(data) > 0 {
  254. vval := strings.Trim(data, " \t\n\r")
  255. if rawcell.T == "s" {
  256. ref, error := strconv.Atoi(vval)
  257. if error != nil {
  258. panic(error)
  259. }
  260. value = reftable[ref]
  261. } else {
  262. value = vval
  263. }
  264. }
  265. return value
  266. }
  267. // readRowsFromSheet is an internal helper function that extracts the
  268. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  269. // the value references from the reference table and stores them in
  270. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row ,int, int) {
  271. var rows []*Row
  272. var row *Row
  273. var maxCol int
  274. var maxRow int
  275. var reftable []string
  276. reftable = file.referenceTable
  277. maxCol = 0
  278. maxRow = 0
  279. for _, rawrow := range Worksheet.SheetData.Row {
  280. for _, rawcell := range rawrow.C {
  281. x, y, error := getCoordsFromCellIDString(rawcell.R)
  282. if error != nil {
  283. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  284. }
  285. if x > maxCol {
  286. maxCol = x
  287. }
  288. if y > maxRow {
  289. maxRow = y
  290. }
  291. }
  292. }
  293. maxCol += 1
  294. maxRow += 1
  295. rows = make([]*Row, maxRow)
  296. for _, rawrow := range Worksheet.SheetData.Row {
  297. // range is not empty
  298. if len(rawrow.Spans) != 0 {
  299. row = makeRowFromSpan(rawrow.Spans)
  300. } else {
  301. row = makeRowFromRaw(rawrow)
  302. }
  303. _,y, _ := getCoordsFromCellIDString(rawrow.C[0].R)
  304. for _, rawcell := range rawrow.C {
  305. x,_, _ := getCoordsFromCellIDString(rawcell.R)
  306. row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
  307. row.Cells[x].styleIndex = rawcell.S
  308. row.Cells[x].styles = file.styles
  309. }
  310. rows[y] = row
  311. }
  312. for i := 0; i < len(rows); i++{
  313. if rows[i] == nil {
  314. rows[i] = new(Row)
  315. }
  316. }
  317. return rows,maxCol,maxRow
  318. }
  319. // readSheetsFromZipFile is an internal helper function that loops
  320. // over the Worksheets defined in the XSLXWorkbook and loads them into
  321. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  322. func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
  323. var workbook *xlsxWorkbook
  324. var error error
  325. var rc io.ReadCloser
  326. var decoder *xml.Decoder
  327. workbook = new(xlsxWorkbook)
  328. rc, error = f.Open()
  329. if error != nil {
  330. return nil, nil, error
  331. }
  332. decoder = xml.NewDecoder(rc)
  333. error = decoder.Decode(workbook)
  334. if error != nil {
  335. return nil, nil, error
  336. }
  337. sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
  338. names := make([]string, len(workbook.Sheets.Sheet))
  339. for i, rawsheet := range workbook.Sheets.Sheet {
  340. worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
  341. if error != nil {
  342. return nil, nil, error
  343. }
  344. sheet := new(Sheet)
  345. sheet.Rows,sheet.MaxCol,sheet.MaxRow = readRowsFromSheet(worksheet, file)
  346. sheets[i] = sheet
  347. names[i] = rawsheet.Name
  348. }
  349. return sheets, names, nil
  350. }
  351. // readSharedStringsFromZipFile() is an internal helper function to
  352. // extract a reference table from the sharedStrings.xml file within
  353. // the XLSX zip file.
  354. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  355. var sst *xlsxSST
  356. var error error
  357. var rc io.ReadCloser
  358. var decoder *xml.Decoder
  359. var reftable []string
  360. rc, error = f.Open()
  361. if error != nil {
  362. return nil, error
  363. }
  364. sst = new(xlsxSST)
  365. decoder = xml.NewDecoder(rc)
  366. error = decoder.Decode(sst)
  367. if error != nil {
  368. return nil, error
  369. }
  370. reftable = MakeSharedStringRefTable(sst)
  371. return reftable, nil
  372. }
  373. // readStylesFromZipFile() is an internal helper function to
  374. // extract a style table from the style.xml file within
  375. // the XLSX zip file.
  376. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  377. var style *xlsxStyles
  378. var error error
  379. var rc io.ReadCloser
  380. var decoder *xml.Decoder
  381. rc, error = f.Open()
  382. if error != nil {
  383. return nil, error
  384. }
  385. style = new(xlsxStyles)
  386. decoder = xml.NewDecoder(rc)
  387. error = decoder.Decode(style)
  388. if error != nil {
  389. return nil, error
  390. }
  391. return style, nil
  392. }
  393. // OpenFile() take the name of an XLSX file and returns a populated
  394. // xlsx.File struct for it.
  395. func OpenFile(filename string) (x *File, e error) {
  396. var f *zip.ReadCloser
  397. var error error
  398. var file *File
  399. var v *zip.File
  400. var workbook *zip.File
  401. var styles *zip.File
  402. var sharedStrings *zip.File
  403. var reftable []string
  404. var worksheets map[string]*zip.File
  405. f, error = zip.OpenReader(filename)
  406. var sheetMap map[string]*Sheet
  407. if error != nil {
  408. return nil, error
  409. }
  410. file = new(File)
  411. worksheets = make(map[string]*zip.File, len(f.File))
  412. for _, v = range f.File {
  413. switch v.Name {
  414. case "xl/sharedStrings.xml":
  415. sharedStrings = v
  416. case "xl/workbook.xml":
  417. workbook = v
  418. case "xl/styles.xml":
  419. styles = v
  420. default:
  421. if len(v.Name) > 12 {
  422. if v.Name[0:13] == "xl/worksheets" {
  423. worksheets[v.Name[14:len(v.Name)-4]] = v
  424. }
  425. }
  426. }
  427. }
  428. file.worksheets = worksheets
  429. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  430. if error != nil {
  431. return nil, error
  432. }
  433. if reftable == nil {
  434. error := new(XLSXReaderError)
  435. error.Err = "No valid sharedStrings.xml found in XLSX file"
  436. return nil, error
  437. }
  438. file.referenceTable = reftable
  439. style , error := readStylesFromZipFile(styles)
  440. if error != nil {
  441. return nil, error
  442. }
  443. file.styles = style
  444. sheets, names, error := readSheetsFromZipFile(workbook, file)
  445. if error != nil {
  446. return nil, error
  447. }
  448. if sheets == nil {
  449. error := new(XLSXReaderError)
  450. error.Err = "No sheets found in XLSX File"
  451. return nil, error
  452. }
  453. file.Sheets = sheets
  454. sheetMap = make(map[string]*Sheet,len(names))
  455. for i := 0; i < len(names); i++ {
  456. sheetMap[names[i]] = sheets[i]
  457. }
  458. file.Sheet = sheetMap
  459. f.Close()
  460. return file, nil
  461. }