lib.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math"
  9. "strconv"
  10. "strings"
  11. )
  12. // XLSXReaderError is the standard error type for otherwise undefined
  13. // errors in the XSLX reading process.
  14. type XLSXReaderError struct {
  15. Err string
  16. }
  17. // String() returns a string value from an XLSXReaderError struct in
  18. // order that it might comply with the os.Error interface.
  19. func (e *XLSXReaderError) Error() string {
  20. return e.Err
  21. }
  22. // Cell is a high level structure intended to provide user access to
  23. // the contents of Cell within an xlsx.Row.
  24. type Cell struct {
  25. data string
  26. }
  27. // CellInterface defines the public API of the Cell.
  28. type CellInterface interface {
  29. String() string
  30. }
  31. func (c *Cell) String() string {
  32. return c.data
  33. }
  34. // Row is a high level structure indended to provide user access to a
  35. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  36. type Row struct {
  37. Cells []*Cell
  38. }
  39. // Sheet is a high level structure intended to provide user access to
  40. // the contents of a particular sheet within an XLSX file.
  41. type Sheet struct {
  42. Rows []*Row
  43. }
  44. // File is a high level structure providing a slice of Sheet structs
  45. // to the user.
  46. type File struct {
  47. worksheets map[string]*zip.File
  48. referenceTable []string
  49. Sheets []*Sheet // sheet access by index
  50. Sheet map[string]*Sheet // sheet access by name
  51. }
  52. // getRangeFromString is an internal helper function that converts
  53. // XLSX internal range syntax to a pair of integers. For example,
  54. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  55. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  56. var parts []string
  57. parts = strings.SplitN(rangeString, ":", 2)
  58. if parts[0] == "" {
  59. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  60. }
  61. if parts[1] == "" {
  62. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  63. }
  64. lower, error = strconv.Atoi(parts[0])
  65. if error != nil {
  66. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  67. }
  68. upper, error = strconv.Atoi(parts[1])
  69. if error != nil {
  70. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  71. }
  72. return lower, upper, error
  73. }
  74. // positionalLetterMultiplier gives an integer multiplier to use for a
  75. // position in a letter based column identifer. For example, the
  76. // column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
  77. // 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
  78. // ((26**2)*1)+((26**1)*2)+((26**0))*1
  79. func positionalLetterMultiplier(extent, pos int) int {
  80. var result float64
  81. var power float64
  82. var offset int
  83. offset = pos + 1
  84. power = float64(extent - offset)
  85. result = math.Pow(26, power)
  86. return int(result)
  87. }
  88. // lettersToNumeric is used to convert a character based column
  89. // reference to a zero based numeric column identifier.
  90. func lettersToNumeric(letters string) int {
  91. var sum int = 0
  92. var shift int
  93. extent := len(letters)
  94. for i, c := range letters {
  95. // Just to make life akward. If we think of this base
  96. // 26 notation as being like HEX or binary we hit a
  97. // nasty little problem. The issue is that we have no
  98. // 0s and therefore A can be both a 1 and a 0. The
  99. // value range of a letter is different in the most
  100. // significant position if (and only if) there is more
  101. // than one positions. For example:
  102. // "A" = 0
  103. // 676 | 26 | 0
  104. // ----+----+----
  105. // 0 | 0 | 0
  106. //
  107. // "Z" = 25
  108. // 676 | 26 | 0
  109. // ----+----+----
  110. // 0 | 0 | 25
  111. // "AA" = 26
  112. // 676 | 26 | 0
  113. // ----+----+----
  114. // 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
  115. if i == 0 && extent > 1 {
  116. shift = 1
  117. } else {
  118. shift = 0
  119. }
  120. multiplier := positionalLetterMultiplier(extent, i)
  121. switch {
  122. case 'A' <= c && c <= 'Z':
  123. sum += multiplier * (int((c - 'A')) + shift)
  124. case 'a' <= c && c <= 'z':
  125. sum += multiplier * (int((c - 'a')) + shift)
  126. }
  127. }
  128. return sum
  129. }
  130. // letterOnlyMapF is used in conjunction with strings.Map to return
  131. // only the characters A-Z and a-z in a string
  132. func letterOnlyMapF(rune rune) rune {
  133. switch {
  134. case 'A' <= rune && rune <= 'Z':
  135. return rune
  136. case 'a' <= rune && rune <= 'z':
  137. return rune - 32
  138. }
  139. return -1
  140. }
  141. // intOnlyMapF is used in conjunction with strings.Map to return only
  142. // the numeric portions of a string.
  143. func intOnlyMapF(rune rune) rune {
  144. if rune >= 48 && rune < 58 {
  145. return rune
  146. }
  147. return -1
  148. }
  149. // getCoordsFromCellIDString returns the zero based cartesian
  150. // coordinates from a cell name in Excel format, e.g. the cellIDString
  151. // "A1" returns 0, 0 and the "B3" return 1, 2.
  152. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  153. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  154. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  155. if error != nil {
  156. return x, y, error
  157. }
  158. y -= 1 // Zero based
  159. x = lettersToNumeric(letterPart)
  160. return x, y, error
  161. }
  162. // makeRowFromSpan will, when given a span expressed as a string,
  163. // return an empty Row large enough to encompass that span and
  164. // populate it with empty cells. All rows start from cell 1 -
  165. // regardless of the lower bound of the span.
  166. func makeRowFromSpan(spans string) *Row {
  167. var error error
  168. var upper int
  169. var row *Row
  170. var cell *Cell
  171. row = new(Row)
  172. _, upper, error = getRangeFromString(spans)
  173. if error != nil {
  174. panic(error)
  175. }
  176. error = nil
  177. row.Cells = make([]*Cell, upper)
  178. for i := 0; i < upper; i++ {
  179. cell = new(Cell)
  180. cell.data = ""
  181. row.Cells[i] = cell
  182. }
  183. return row
  184. }
  185. // get the max column
  186. // return the cells of columns
  187. func makeRowFromRaw(rawrow xlsxRow) *Row {
  188. var upper int
  189. var row *Row
  190. var cell *Cell
  191. row = new(Row)
  192. upper = 0
  193. for _, rawcell := range rawrow.C {
  194. x, _, error := getCoordsFromCellIDString(rawcell.R)
  195. if error != nil {
  196. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  197. }
  198. if x > upper {
  199. upper = x
  200. }
  201. }
  202. row.Cells = make([]*Cell, upper)
  203. for i := 0; i < upper; i++ {
  204. cell = new(Cell)
  205. cell.data = ""
  206. row.Cells[i] = cell
  207. }
  208. return row
  209. }
  210. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  211. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  212. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  213. var value string = ""
  214. var data string = rawcell.V
  215. if len(data) > 0 {
  216. vval := strings.Trim(data, " \t\n\r")
  217. if rawcell.T == "s" {
  218. ref, error := strconv.Atoi(vval)
  219. if error != nil {
  220. panic(error)
  221. }
  222. value = reftable[ref]
  223. } else {
  224. value = vval
  225. }
  226. }
  227. return value
  228. }
  229. // readRowsFromSheet is an internal helper function that extracts the
  230. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  231. // the value references from the reference table and stores them in
  232. func readRowsFromSheet(Worksheet *xlsxWorksheet, reftable []string) []*Row {
  233. var rows []*Row
  234. var row *Row
  235. rows = make([]*Row, len(Worksheet.SheetData.Row))
  236. for i, rawrow := range Worksheet.SheetData.Row {
  237. // range is not empty
  238. if len(rawrow.Spans) != 0 {
  239. row = makeRowFromSpan(rawrow.Spans)
  240. } else {
  241. row = makeRowFromRaw(rawrow)
  242. }
  243. for _, rawcell := range rawrow.C {
  244. x, _, error := getCoordsFromCellIDString(rawcell.R)
  245. if error != nil {
  246. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  247. }
  248. row.Cells[x].data = getValueFromCellData(rawcell, reftable)
  249. }
  250. rows[i] = row
  251. }
  252. return rows
  253. }
  254. // readSheetsFromZipFile is an internal helper function that loops
  255. // over the Worksheets defined in the XSLXWorkbook and loads them into
  256. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  257. func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
  258. var workbook *xlsxWorkbook
  259. var error error
  260. var rc io.ReadCloser
  261. var decoder *xml.Decoder
  262. workbook = new(xlsxWorkbook)
  263. rc, error = f.Open()
  264. if error != nil {
  265. return nil, nil, error
  266. }
  267. decoder = xml.NewDecoder(rc)
  268. error = decoder.Decode(workbook)
  269. if error != nil {
  270. return nil, nil, error
  271. }
  272. sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
  273. names := make([]string, len(workbook.Sheets.Sheet))
  274. for i, rawsheet := range workbook.Sheets.Sheet {
  275. worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
  276. if error != nil {
  277. return nil, nil, error
  278. }
  279. sheet := new(Sheet)
  280. sheet.Rows = readRowsFromSheet(worksheet, file.referenceTable)
  281. sheets[i] = sheet
  282. names[i] = rawsheet.Name
  283. }
  284. return sheets, names, nil
  285. }
  286. // readSharedStringsFromZipFile() is an internal helper function to
  287. // extract a reference table from the sharedStrings.xml file within
  288. // the XLSX zip file.
  289. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  290. var sst *xlsxSST
  291. var error error
  292. var rc io.ReadCloser
  293. var decoder *xml.Decoder
  294. var reftable []string
  295. rc, error = f.Open()
  296. if error != nil {
  297. return nil, error
  298. }
  299. sst = new(xlsxSST)
  300. decoder = xml.NewDecoder(rc)
  301. error = decoder.Decode(sst)
  302. if error != nil {
  303. return nil, error
  304. }
  305. reftable = MakeSharedStringRefTable(sst)
  306. return reftable, nil
  307. }
  308. // OpenFile() take the name of an XLSX file and returns a populated
  309. // xlsx.File struct for it.
  310. func OpenFile(filename string) (x *File, e error) {
  311. var f *zip.ReadCloser
  312. var error error
  313. var file *File
  314. var v *zip.File
  315. var workbook *zip.File
  316. var sharedStrings *zip.File
  317. var reftable []string
  318. var worksheets map[string]*zip.File
  319. f, error = zip.OpenReader(filename)
  320. var sheetMap map[string]*Sheet
  321. if error != nil {
  322. return nil, error
  323. }
  324. file = new(File)
  325. worksheets = make(map[string]*zip.File, len(f.File))
  326. for _, v = range f.File {
  327. switch v.Name {
  328. case "xl/sharedStrings.xml":
  329. sharedStrings = v
  330. case "xl/workbook.xml":
  331. workbook = v
  332. default:
  333. if len(v.Name) > 12 {
  334. if v.Name[0:13] == "xl/worksheets" {
  335. worksheets[v.Name[14:len(v.Name)-4]] = v
  336. }
  337. }
  338. }
  339. }
  340. file.worksheets = worksheets
  341. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  342. if error != nil {
  343. return nil, error
  344. }
  345. if reftable == nil {
  346. error := new(XLSXReaderError)
  347. error.Err = "No valid sharedStrings.xml found in XLSX file"
  348. return nil, error
  349. }
  350. file.referenceTable = reftable
  351. sheets, names, error := readSheetsFromZipFile(workbook, file)
  352. if error != nil {
  353. return nil, error
  354. }
  355. if sheets == nil {
  356. error := new(XLSXReaderError)
  357. error.Err = "No sheets found in XLSX File"
  358. return nil, error
  359. }
  360. file.Sheets = sheets
  361. sheetMap = make(map[string]*Sheet,len(names))
  362. for i := 0; i < len(names); i++ {
  363. sheetMap[names[i]] = sheets[i]
  364. }
  365. file.Sheet = sheetMap
  366. f.Close()
  367. return file, nil
  368. }