lib.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "fmt"
  5. "io"
  6. "math"
  7. "os"
  8. "strconv"
  9. "strings"
  10. "xml"
  11. )
  12. // XLSXReaderError is the standard error type for otherwise undefined
  13. // errors in the XSLX reading process.
  14. type XLSXReaderError struct {
  15. Error string
  16. }
  17. // String() returns a string value from an XLSXReaderError struct in
  18. // order that it might comply with the os.Error interface.
  19. func (e *XLSXReaderError) String() string {
  20. return e.Error
  21. }
  22. // Cell is a high level structure intended to provide user access to
  23. // the contents of Cell within an xlsx.Row.
  24. type Cell struct {
  25. data string
  26. }
  27. // CellInterface defines the public API of the Cell.
  28. type CellInterface interface {
  29. String() string
  30. }
  31. func (c *Cell) String() string {
  32. return c.data
  33. }
  34. // Row is a high level structure indended to provide user access to a
  35. // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
  36. type Row struct {
  37. Cells []*Cell
  38. }
  39. // Sheet is a high level structure intended to provide user access to
  40. // the contents of a particular sheet within an XLSX file.
  41. type Sheet struct {
  42. Rows []*Row
  43. }
  44. // File is a high level structure providing a slice of Sheet structs
  45. // to the user.
  46. type File struct {
  47. worksheets map[string]*zip.File
  48. referenceTable []string
  49. Sheets []*Sheet
  50. }
  51. // getRangeFromString is an internal helper function that converts
  52. // XLSX internal range syntax to a pair of integers. For example,
  53. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  54. func getRangeFromString(rangeString string) (lower int, upper int, error os.Error) {
  55. var parts []string
  56. parts = strings.SplitN(rangeString, ":", 2)
  57. if parts[0] == "" {
  58. error = os.NewError(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  59. }
  60. if parts[1] == "" {
  61. error = os.NewError(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  62. }
  63. lower, error = strconv.Atoi(parts[0])
  64. if error != nil {
  65. error = os.NewError(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  66. }
  67. upper, error = strconv.Atoi(parts[1])
  68. if error != nil {
  69. error = os.NewError(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  70. }
  71. return lower, upper, error
  72. }
  73. // positionalLetterMultiplier gives an integer multiplier to use for a
  74. // position in a letter based column identifer. For example, the
  75. // column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
  76. // 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
  77. // ((26**2)*1)+((26**1)*2)+((26**0))*1
  78. func positionalLetterMultiplier(extent, pos int) int {
  79. var result float64
  80. var power float64
  81. var offset int
  82. offset = pos + 1
  83. power = float64(extent - offset)
  84. result = math.Pow(26, power)
  85. return int(result)
  86. }
  87. // lettersToNumeric is used to convert a character based column
  88. // reference to a zero based numeric column identifier.
  89. func lettersToNumeric(letters string) int {
  90. var sum int = 0
  91. var shift int
  92. extent := len(letters)
  93. for i, c := range letters {
  94. // Just to make life akward. If we think of this base
  95. // 26 notation as being like HEX or binary we hit a
  96. // nasty little problem. The issue is that we have no
  97. // 0s and therefore A can be both a 1 and a 0. The
  98. // value range of a letter is different in the most
  99. // significant position if (and only if) there is more
  100. // than one positions. For example:
  101. // "A" = 0
  102. // 676 | 26 | 0
  103. // ----+----+----
  104. // 0 | 0 | 0
  105. //
  106. // "Z" = 25
  107. // 676 | 26 | 0
  108. // ----+----+----
  109. // 0 | 0 | 25
  110. // "AA" = 26
  111. // 676 | 26 | 0
  112. // ----+----+----
  113. // 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
  114. if i == 0 && extent > 1 {
  115. shift = 1
  116. } else {
  117. shift = 0
  118. }
  119. multiplier := positionalLetterMultiplier(extent, i)
  120. switch {
  121. case 'A' <= c && c <= 'Z':
  122. sum += multiplier * ((c - 'A') + shift)
  123. case 'a' <= c && c <= 'z':
  124. sum += multiplier * ((c - 'a') + shift)
  125. }
  126. }
  127. return sum
  128. }
  129. // letterOnlyMapF is used in conjunction with strings.Map to return
  130. // only the characters A-Z and a-z in a string
  131. func letterOnlyMapF(rune int) int {
  132. switch {
  133. case 'A' <= rune && rune <= 'Z':
  134. return rune
  135. case 'a' <= rune && rune <= 'z':
  136. return rune - 32
  137. }
  138. return -1
  139. }
  140. // intOnlyMapF is used in conjunction with strings.Map to return only
  141. // the numeric portions of a string.
  142. func intOnlyMapF(rune int) int {
  143. if rune >= 48 && rune < 58 {
  144. return rune
  145. }
  146. return -1
  147. }
  148. // getCoordsFromCellIDString returns the zero based cartesian
  149. // coordinates from a cell name in Excel format, e.g. the cellIDString
  150. // "A1" returns 0, 0 and the "B3" return 1, 2.
  151. func getCoordsFromCellIDString(cellIDString string) (x, y int, error os.Error) {
  152. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  153. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  154. if error != nil {
  155. return x, y, error
  156. }
  157. y -= 1 // Zero based
  158. x = lettersToNumeric(letterPart)
  159. return x, y, error
  160. }
  161. // makeRowFromSpan will, when given a span expressed as a string,
  162. // return an empty Row large enough to encompass that span and
  163. // populate it with empty cells. All rows start from cell 1 -
  164. // regardless of the lower bound of the span.
  165. func makeRowFromSpan(spans string) *Row {
  166. var error os.Error
  167. var upper int
  168. var row *Row
  169. var cell *Cell
  170. row = new(Row)
  171. _, upper, error = getRangeFromString(spans)
  172. if error != nil {
  173. panic(error)
  174. }
  175. error = nil
  176. row.Cells = make([]*Cell, upper)
  177. for i := 0; i < upper; i++ {
  178. cell = new(Cell)
  179. cell.data = ""
  180. row.Cells[i] = cell
  181. }
  182. return row
  183. }
  184. // readRowsFromSheet is an internal helper function that extracts the
  185. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  186. // the value references from the reference table and stores them in
  187. func readRowsFromSheet(worksheet *XLSXWorksheet, reftable []string) []*Row {
  188. // Note, this function needs tidying up!
  189. var rows []*Row
  190. var row *Row
  191. var cell *Cell
  192. rows = make([]*Row, len(worksheet.SheetData.Row))
  193. for i, rawrow := range worksheet.SheetData.Row {
  194. row = makeRowFromSpan(rawrow.Spans)
  195. for _, rawcell := range rawrow.C {
  196. x, _, error := getCoordsFromCellIDString(rawcell.R)
  197. if error != nil {
  198. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  199. }
  200. error = nil
  201. cell = new(Cell)
  202. cell.data = ""
  203. if len(rawcell.V.Data) > 0 {
  204. vval := strings.Trim(rawcell.V.Data, " \t\n\r")
  205. if rawcell.T == "s" {
  206. ref, error := strconv.Atoi(vval)
  207. if error != nil {
  208. panic(error)
  209. panic(fmt.Sprintf("Invalid reference in Excel Cell (not found in sharedStrings.xml) - the reference was %v\n", rawcell.V.Data))
  210. }
  211. cell.data = reftable[ref]
  212. } else {
  213. cell.data = vval
  214. }
  215. }
  216. row.Cells[x] = cell
  217. }
  218. rows[i] = row
  219. }
  220. return rows
  221. }
  222. // readSheetsFromZipFile is an internal helper function that loops
  223. // over the Worksheets defined in the XSLXWorkbook and loads them into
  224. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  225. func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, os.Error) {
  226. var workbook *XLSXWorkbook
  227. var error os.Error
  228. var rc io.ReadCloser
  229. workbook = new(XLSXWorkbook)
  230. rc, error = f.Open()
  231. if error != nil {
  232. return nil, error
  233. }
  234. error = xml.Unmarshal(rc, workbook)
  235. if error != nil {
  236. return nil, error
  237. }
  238. sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
  239. for i, rawsheet := range workbook.Sheets.Sheet {
  240. worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets) //
  241. if error != nil {
  242. return nil, error
  243. }
  244. sheet := new(Sheet)
  245. sheet.Rows = readRowsFromSheet(worksheet, file.referenceTable)
  246. sheets[i] = sheet
  247. }
  248. return sheets, nil
  249. }
  250. // readSharedStringsFromZipFile() is an internal helper function to
  251. // extract a reference table from the sharedStrings.xml file within
  252. // the XLSX zip file.
  253. func readSharedStringsFromZipFile(f *zip.File) ([]string, os.Error) {
  254. var sst *XLSXSST
  255. var error os.Error
  256. var rc io.ReadCloser
  257. var reftable []string
  258. rc, error = f.Open()
  259. if error != nil {
  260. return nil, error
  261. }
  262. sst = new(XLSXSST)
  263. error = xml.Unmarshal(rc, sst)
  264. if error != nil {
  265. return nil, error
  266. }
  267. reftable = MakeSharedStringRefTable(sst)
  268. return reftable, nil
  269. }
  270. // OpenFile() take the name of an XLSX file and returns a populated
  271. // xlsx.File struct for it.
  272. func OpenFile(filename string) (x *File, e os.Error) {
  273. var f *zip.ReadCloser
  274. var error os.Error
  275. var file *File
  276. var v *zip.File
  277. var workbook *zip.File
  278. var sharedStrings *zip.File
  279. var reftable []string
  280. var worksheets map[string]*zip.File
  281. f, error = zip.OpenReader(filename)
  282. if error != nil {
  283. return nil, error
  284. }
  285. file = new(File)
  286. worksheets = make(map[string]*zip.File, len(f.File))
  287. for _, v = range f.File {
  288. switch v.Name {
  289. case "xl/sharedStrings.xml":
  290. sharedStrings = v
  291. case "xl/workbook.xml":
  292. workbook = v
  293. default:
  294. if len(v.Name) > 12 {
  295. if v.Name[0:13] == "xl/worksheets" {
  296. worksheets[v.Name[14:len(v.Name)-4]] = v
  297. }
  298. }
  299. }
  300. }
  301. file.worksheets = worksheets
  302. reftable, error = readSharedStringsFromZipFile(sharedStrings)
  303. if error != nil {
  304. return nil, error
  305. }
  306. if reftable == nil {
  307. error := new(XLSXReaderError)
  308. error.Error = "No valid sharedStrings.xml found in XLSX file"
  309. return nil, error
  310. }
  311. file.referenceTable = reftable
  312. sheets, error := readSheetsFromZipFile(workbook, file)
  313. if error != nil {
  314. return nil, error
  315. }
  316. if sheets == nil {
  317. error := new(XLSXReaderError)
  318. error.Error = "No sheets found in XLSX File"
  319. return nil, error
  320. }
  321. file.Sheets = sheets
  322. f.Close()
  323. return file, nil
  324. }