lib.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math"
  9. "strconv"
  10. "strings"
  11. )
  12. // XLSXReaderError is the standard error type for otherwise undefined
  13. // errors in the XSLX reading process.
  14. type XLSXReaderError struct {
  15. Err string
  16. }
  17. // String() returns a string value from an XLSXReaderError struct in
  18. // order that it might comply with the os.Error interface.
  19. func (e *XLSXReaderError) Error() string {
  20. return e.Err
  21. }
  22. // Cell is a high level structure intended to provide user access to
  23. // the contents of Cell within an xlsx.Row.
  24. type Cell struct {
  25. Value string
  26. }
  27. // CellInterface defines the public API of the Cell.
  28. type CellInterface interface {
  29. String() string
  30. }
  31. func (c *Cell) String() string {
  32. return c.Value
  33. }
  34. // Row is a high level structure indended to provide user access to a
  35. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  36. type Row struct {
  37. Cells []*Cell
  38. }
  39. // Sheet is a high level structure intended to provide user access to
  40. // the contents of a particular sheet within an XLSX file.
  41. type Sheet struct {
  42. Rows []*Row
  43. MaxRow int
  44. MaxCol int
  45. }
  46. // File is a high level structure providing a slice of Sheet structs
  47. // to the user.
  48. type File struct {
  49. worksheets map[string]*zip.File
  50. referenceTable []string
  51. Sheets []*Sheet // sheet access by index
  52. Sheet map[string]*Sheet // sheet access by name
  53. }
  54. // getRangeFromString is an internal helper function that converts
  55. // XLSX internal range syntax to a pair of integers. For example,
  56. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  57. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  58. var parts []string
  59. parts = strings.SplitN(rangeString, ":", 2)
  60. if parts[0] == "" {
  61. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  62. }
  63. if parts[1] == "" {
  64. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  65. }
  66. lower, error = strconv.Atoi(parts[0])
  67. if error != nil {
  68. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  69. }
  70. upper, error = strconv.Atoi(parts[1])
  71. if error != nil {
  72. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  73. }
  74. return lower, upper, error
  75. }
  76. // positionalLetterMultiplier gives an integer multiplier to use for a
  77. // position in a letter based column identifer. For example, the
  78. // column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
  79. // 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
  80. // ((26**2)*1)+((26**1)*2)+((26**0))*1
  81. func positionalLetterMultiplier(extent, pos int) int {
  82. var result float64
  83. var power float64
  84. var offset int
  85. offset = pos + 1
  86. power = float64(extent - offset)
  87. result = math.Pow(26, power)
  88. return int(result)
  89. }
  90. // lettersToNumeric is used to convert a character based column
  91. // reference to a zero based numeric column identifier.
  92. func lettersToNumeric(letters string) int {
  93. var sum int = 0
  94. var shift int
  95. extent := len(letters)
  96. for i, c := range letters {
  97. // Just to make life akward. If we think of this base
  98. // 26 notation as being like HEX or binary we hit a
  99. // nasty little problem. The issue is that we have no
  100. // 0s and therefore A can be both a 1 and a 0. The
  101. // value range of a letter is different in the most
  102. // significant position if (and only if) there is more
  103. // than one positions. For example:
  104. // "A" = 0
  105. // 676 | 26 | 0
  106. // ----+----+----
  107. // 0 | 0 | 0
  108. //
  109. // "Z" = 25
  110. // 676 | 26 | 0
  111. // ----+----+----
  112. // 0 | 0 | 25
  113. // "AA" = 26
  114. // 676 | 26 | 0
  115. // ----+----+----
  116. // 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
  117. if i == 0 && extent > 1 {
  118. shift = 1
  119. } else {
  120. shift = 0
  121. }
  122. multiplier := positionalLetterMultiplier(extent, i)
  123. switch {
  124. case 'A' <= c && c <= 'Z':
  125. sum += multiplier * (int((c - 'A')) + shift)
  126. case 'a' <= c && c <= 'z':
  127. sum += multiplier * (int((c - 'a')) + shift)
  128. }
  129. }
  130. return sum
  131. }
  132. // letterOnlyMapF is used in conjunction with strings.Map to return
  133. // only the characters A-Z and a-z in a string
  134. func letterOnlyMapF(rune rune) rune {
  135. switch {
  136. case 'A' <= rune && rune <= 'Z':
  137. return rune
  138. case 'a' <= rune && rune <= 'z':
  139. return rune - 32
  140. }
  141. return -1
  142. }
  143. // intOnlyMapF is used in conjunction with strings.Map to return only
  144. // the numeric portions of a string.
  145. func intOnlyMapF(rune rune) rune {
  146. if rune >= 48 && rune < 58 {
  147. return rune
  148. }
  149. return -1
  150. }
  151. // getCoordsFromCellIDString returns the zero based cartesian
  152. // coordinates from a cell name in Excel format, e.g. the cellIDString
  153. // "A1" returns 0, 0 and the "B3" return 1, 2.
  154. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  155. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  156. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  157. if error != nil {
  158. return x, y, error
  159. }
  160. y -= 1 // Zero based
  161. x = lettersToNumeric(letterPart)
  162. return x, y, error
  163. }
  164. // makeRowFromSpan will, when given a span expressed as a string,
  165. // return an empty Row large enough to encompass that span and
  166. // populate it with empty cells. All rows start from cell 1 -
  167. // regardless of the lower bound of the span.
  168. func makeRowFromSpan(spans string) *Row {
  169. var error error
  170. var upper int
  171. var row *Row
  172. var cell *Cell
  173. row = new(Row)
  174. _, upper, error = getRangeFromString(spans)
  175. if error != nil {
  176. panic(error)
  177. }
  178. error = nil
  179. row.Cells = make([]*Cell, upper)
  180. for i := 0; i < upper; i++ {
  181. cell = new(Cell)
  182. cell.Value = ""
  183. row.Cells[i] = cell
  184. }
  185. return row
  186. }
  187. // get the max column
  188. // return the cells of columns
  189. func makeRowFromRaw(rawrow xlsxRow) *Row {
  190. var upper int
  191. var row *Row
  192. var cell *Cell
  193. row = new(Row)
  194. upper = 0
  195. for _, rawcell := range rawrow.C {
  196. x, _, error := getCoordsFromCellIDString(rawcell.R)
  197. if error != nil {
  198. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  199. }
  200. if x > upper {
  201. upper = x
  202. }
  203. }
  204. row.Cells = make([]*Cell, upper)
  205. for i := 0; i < upper; i++ {
  206. cell = new(Cell)
  207. cell.Value = ""
  208. row.Cells[i] = cell
  209. }
  210. return row
  211. }
  212. // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
  213. // Note - this is not actually general enough - we should support retaining tabs and newlines.
  214. func getValueFromCellData(rawcell xlsxC, reftable []string) string {
  215. var value string = ""
  216. var data string = rawcell.V
  217. if len(data) > 0 {
  218. vval := strings.Trim(data, " \t\n\r")
  219. if rawcell.T == "s" {
  220. ref, error := strconv.Atoi(vval)
  221. if error != nil {
  222. panic(error)
  223. }
  224. value = reftable[ref]
  225. } else {
  226. value = vval
  227. }
  228. }
  229. return value
  230. }
  231. // readRowsFromSheet is an internal helper function that extracts the
  232. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  233. // the value references from the reference table and stores them in
  234. func readRowsFromSheet(Worksheet *xlsxWorksheet, reftable []string) ([]*Row ,int) {
  235. var rows []*Row
  236. var row *Row
  237. var maxCol int
  238. rows = make([]*Row, len(Worksheet.SheetData.Row))
  239. maxCol = 0
  240. for i, rawrow := range Worksheet.SheetData.Row {
  241. // range is not empty
  242. if len(rawrow.Spans) != 0 {
  243. row = makeRowFromSpan(rawrow.Spans)
  244. } else {
  245. row = makeRowFromRaw(rawrow)
  246. }
  247. for _, rawcell := range rawrow.C {
  248. x, _, error := getCoordsFromCellIDString(rawcell.R)
  249. if error != nil {
  250. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  251. }
  252. if x > maxCol {
  253. maxCol = x
  254. }
  255. row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
  256. }
  257. rows[i] = row
  258. }
  259. return rows,maxCol
  260. }
  261. // readSheetsFromZipFile is an internal helper function that loops
  262. // over the Worksheets defined in the XSLXWorkbook and loads them into
  263. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  264. func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
  265. var workbook *xlsxWorkbook
  266. var error error
  267. var rc io.ReadCloser
  268. var decoder *xml.Decoder
  269. workbook = new(xlsxWorkbook)
  270. rc, error = f.Open()
  271. if error != nil {
  272. return nil, nil, error
  273. }
  274. decoder = xml.NewDecoder(rc)
  275. error = decoder.Decode(workbook)
  276. if error != nil {
  277. return nil, nil, error
  278. }
  279. sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
  280. names := make([]string, len(workbook.Sheets.Sheet))
  281. for i, rawsheet := range workbook.Sheets.Sheet {
  282. worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
  283. if error != nil {
  284. return nil, nil, error
  285. }
  286. sheet := new(Sheet)
  287. sheet.Rows,sheet.MaxCol = readRowsFromSheet(worksheet, file.referenceTable)
  288. sheets[i] = sheet
  289. sheet.MaxRow = len(sheet.Rows)
  290. names[i] = rawsheet.Name
  291. }
  292. return sheets, names, nil
  293. }
  294. // readSharedStringsFromZipFile() is an internal helper function to
  295. // extract a reference table from the sharedStrings.xml file within
  296. // the XLSX zip file.
  297. func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
  298. var sst *xlsxSST
  299. var error error
  300. var rc io.ReadCloser
  301. var decoder *xml.Decoder
  302. var reftable []string
  303. rc, error = f.Open()
  304. if error != nil {
  305. return nil, error
  306. }
  307. sst = new(xlsxSST)
  308. decoder = xml.NewDecoder(rc)
  309. error = decoder.Decode(sst)
  310. if error != nil {
  311. return nil, error
  312. }
  313. reftable = MakeSharedStringRefTable(sst)
  314. return reftable, nil
  315. }
  316. // OpenFile() take the name of an XLSX file and returns a populated
  317. // xlsx.File struct for it.
  318. func OpenFile(filename string) (x *File, e error) {
  319. var f *zip.ReadCloser
  320. var error error
  321. var file *File
  322. var v *zip.File
  323. var workbook *zip.File
  324. var sharedStrings *zip.File
  325. var reftable []string
  326. var worksheets map[string]*zip.File
  327. f, error = zip.OpenReader(filename)
  328. var sheetMap map[string]*Sheet
  329. if error != nil {
  330. return nil, error
  331. }
  332. file = new(File)
  333. worksheets = make(map[string]*zip.File, len(f.File))
  334. for _, v = range f.File {
  335. switch v.Name {
  336. case "xl/sharedStrings.xml":
  337. sharedStrings = v
  338. case "xl/workbook.xml":
  339. workbook = v
  340. default:
  341. if len(v.Name) > 12 {
  342. if v.Name[0:13] == "xl/worksheets" {
  343. worksheets[v.Name[14:len(v.Name)-4]] = v
  344. }
  345. }
  346. }
  347. }
  348. file.worksheets = worksheets
  349. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  350. if error != nil {
  351. return nil, error
  352. }
  353. if reftable == nil {
  354. error := new(XLSXReaderError)
  355. error.Err = "No valid sharedStrings.xml found in XLSX file"
  356. return nil, error
  357. }
  358. file.referenceTable = reftable
  359. sheets, names, error := readSheetsFromZipFile(workbook, file)
  360. if error != nil {
  361. return nil, error
  362. }
  363. if sheets == nil {
  364. error := new(XLSXReaderError)
  365. error.Err = "No sheets found in XLSX File"
  366. return nil, error
  367. }
  368. file.Sheets = sheets
  369. sheetMap = make(map[string]*Sheet,len(names))
  370. for i := 0; i < len(names); i++ {
  371. sheetMap[names[i]] = sheets[i]
  372. }
  373. file.Sheet = sheetMap
  374. f.Close()
  375. return file, nil
  376. }