lib.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. package xlsx
  2. import (
  3. "archive/zip"
  4. "encoding/xml"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. // XLSXReaderError is the standard error type for otherwise undefined
  12. // errors in the XSLX reading process.
  13. type XLSXReaderError struct {
  14. Err string
  15. }
  16. // String() returns a string value from an XLSXReaderError struct in
  17. // order that it might comply with the os.Error interface.
  18. func (e *XLSXReaderError) Error() string {
  19. return e.Err
  20. }
  21. // getRangeFromString is an internal helper function that converts
  22. // XLSX internal range syntax to a pair of integers. For example,
  23. // the range string "1:3" yield the upper and lower intergers 1 and 3.
  24. func getRangeFromString(rangeString string) (lower int, upper int, error error) {
  25. var parts []string
  26. parts = strings.SplitN(rangeString, ":", 2)
  27. if parts[0] == "" {
  28. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  29. }
  30. if parts[1] == "" {
  31. error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
  32. }
  33. lower, error = strconv.Atoi(parts[0])
  34. if error != nil {
  35. error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
  36. }
  37. upper, error = strconv.Atoi(parts[1])
  38. if error != nil {
  39. error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
  40. }
  41. return lower, upper, error
  42. }
  43. // lettersToNumeric is used to convert a character based column
  44. // reference to a zero based numeric column identifier.
  45. func lettersToNumeric(letters string) int {
  46. sum, mul, n := 0, 1, 0
  47. for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
  48. c := letters[i]
  49. switch {
  50. case 'A' <= c && c <= 'Z':
  51. n += int(c - 'A')
  52. case 'a' <= c && c <= 'z':
  53. n += int(c - 'a')
  54. }
  55. sum += n * mul
  56. }
  57. return sum
  58. }
  59. // Get the largestDenominator that is a multiple of a basedDenominator
  60. // and fits at least once into a given numerator.
  61. func getLargestDenominator(numerator, multiple, baseDenominator, power int) (int, int) {
  62. if numerator / multiple == 0 {
  63. return 1, power
  64. }
  65. next, nextPower := getLargestDenominator(
  66. numerator, multiple * baseDenominator, baseDenominator, power + 1)
  67. if next > multiple {
  68. return next, nextPower
  69. }
  70. return multiple, power
  71. }
  72. // Convers a list of numbers representing a column into a alphabetic
  73. // representation, as used in the spreadsheet.
  74. func formatColumnName(colId []int) string {
  75. lastPart := len(colId) - 1
  76. result := ""
  77. for n, part := range(colId) {
  78. if n == lastPart {
  79. // The least significant number is in the
  80. // range 0-25, all other numbers are 1-26,
  81. // hence we use a differente offset for the
  82. // last part.
  83. result += string(part + 65)
  84. } else {
  85. // Don't output leading 0s, as there is no
  86. // representation of 0 in this format.
  87. if part > 0 {
  88. result += string(part + 64)
  89. }
  90. }
  91. }
  92. return result
  93. }
  94. func smooshBase26Slice(b26 []int) []int {
  95. // Smoosh values together, eliminating 0s from all but the
  96. // least significant part.
  97. lastButOnePart := len(b26) - 2
  98. for i := lastButOnePart; i > 0; i-- {
  99. part := b26[i]
  100. if part == 0 {
  101. greaterPart := b26[i-1]
  102. if greaterPart > 0 {
  103. b26[i-1] = greaterPart - 1
  104. b26[i] = 26
  105. }
  106. }
  107. }
  108. return b26
  109. }
  110. func intToBase26(x int) (parts []int) {
  111. // Excel column codes are pure evil - in essence they're just
  112. // base26, but they don't represent the number 0.
  113. b26Denominator, _ := getLargestDenominator(x, 1, 26, 0)
  114. // This loop terminates because integer division of 1 / 26
  115. // returns 0.
  116. for d := b26Denominator; d > 0; d = d / 26 {
  117. value := x / d
  118. remainder := x % d
  119. parts = append(parts, value)
  120. x = remainder
  121. }
  122. return parts
  123. }
  124. // numericToLetters is used to convert a zero based, numeric column
  125. // indentifier into a character code.
  126. func numericToLetters(colRef int) string {
  127. parts := intToBase26(colRef)
  128. return formatColumnName(smooshBase26Slice(parts))
  129. }
  130. // letterOnlyMapF is used in conjunction with strings.Map to return
  131. // only the characters A-Z and a-z in a string
  132. func letterOnlyMapF(rune rune) rune {
  133. switch {
  134. case 'A' <= rune && rune <= 'Z':
  135. return rune
  136. case 'a' <= rune && rune <= 'z':
  137. return rune - 32
  138. }
  139. return -1
  140. }
  141. // intOnlyMapF is used in conjunction with strings.Map to return only
  142. // the numeric portions of a string.
  143. func intOnlyMapF(rune rune) rune {
  144. if rune >= 48 && rune < 58 {
  145. return rune
  146. }
  147. return -1
  148. }
  149. // getCoordsFromCellIDString returns the zero based cartesian
  150. // coordinates from a cell name in Excel format, e.g. the cellIDString
  151. // "A1" returns 0, 0 and the "B3" return 1, 2.
  152. func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
  153. var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
  154. y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
  155. if error != nil {
  156. return x, y, error
  157. }
  158. y -= 1 // Zero based
  159. x = lettersToNumeric(letterPart)
  160. return x, y, error
  161. }
  162. // getCellIDStringFromCoords returns the Excel format cell name that
  163. // represents a pair of zero based cartesian coordinates.
  164. func getCellIDStringFromCoords(x, y int) string {
  165. letterPart := numericToLetters(x);
  166. numericPart := y + 1
  167. return fmt.Sprintf("%s%d", letterPart, numericPart)
  168. }
  169. // getMaxMinFromDimensionRef return the zero based cartesian maximum
  170. // and minimum coordinates from the dimension reference embedded in a
  171. // XLSX worksheet. For example, the dimension reference "A1:B2"
  172. // returns "0,0", "1,1".
  173. func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
  174. var parts []string
  175. parts = strings.Split(ref, ":")
  176. minx, miny, err = getCoordsFromCellIDString(parts[0])
  177. if err != nil {
  178. return -1, -1, -1, -1, err
  179. }
  180. if len(parts) == 1 {
  181. maxx, maxy = minx, miny
  182. return
  183. }
  184. maxx, maxy, err = getCoordsFromCellIDString(parts[1])
  185. if err != nil {
  186. return -1, -1, -1, -1, err
  187. }
  188. return
  189. }
  190. // makeRowFromSpan will, when given a span expressed as a string,
  191. // return an empty Row large enough to encompass that span and
  192. // populate it with empty cells. All rows start from cell 1 -
  193. // regardless of the lower bound of the span.
  194. func makeRowFromSpan(spans string) *Row {
  195. var error error
  196. var upper int
  197. var row *Row
  198. var cell *Cell
  199. row = new(Row)
  200. _, upper, error = getRangeFromString(spans)
  201. if error != nil {
  202. panic(error)
  203. }
  204. error = nil
  205. row.Cells = make([]*Cell, upper)
  206. for i := 0; i < upper; i++ {
  207. cell = new(Cell)
  208. cell.Value = ""
  209. row.Cells[i] = cell
  210. }
  211. return row
  212. }
  213. // makeRowFromRaw returns the Row representation of the xlsxRow.
  214. func makeRowFromRaw(rawrow xlsxRow) *Row {
  215. var upper int
  216. var row *Row
  217. var cell *Cell
  218. row = new(Row)
  219. upper = -1
  220. for _, rawcell := range rawrow.C {
  221. x, _, error := getCoordsFromCellIDString(rawcell.R)
  222. if error != nil {
  223. panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
  224. }
  225. if x > upper {
  226. upper = x
  227. }
  228. }
  229. upper++
  230. row.Cells = make([]*Cell, upper)
  231. for i := 0; i < upper; i++ {
  232. cell = new(Cell)
  233. cell.Value = ""
  234. row.Cells[i] = cell
  235. }
  236. return row
  237. }
  238. // getValueFromCellData attempts to extract a valid value, usable in
  239. // CSV form from the raw cell value. Note - this is not actually
  240. // general enough - we should support retaining tabs and newlines.
  241. func getValueFromCellData(rawcell xlsxC, reftable *RefTable) string {
  242. var value string = ""
  243. var data string = rawcell.V
  244. if len(data) > 0 {
  245. vval := strings.Trim(data, " \t\n\r")
  246. if rawcell.T == "s" {
  247. ref, error := strconv.Atoi(vval)
  248. if error != nil {
  249. panic(error)
  250. }
  251. value = reftable.ResolveSharedString(ref)
  252. } else {
  253. value = vval
  254. }
  255. }
  256. return value
  257. }
  258. // readRowsFromSheet is an internal helper function that extracts the
  259. // rows from a XSLXWorksheet, poulates them with Cells and resolves
  260. // the value references from the reference table and stores them in
  261. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
  262. var rows []*Row
  263. var row *Row
  264. var minCol, maxCol, minRow, maxRow, colCount, rowCount int
  265. var reftable *RefTable
  266. var err error
  267. var insertRowIndex, insertColIndex int
  268. if len(Worksheet.SheetData.Row) == 0 {
  269. return nil, 0, 0
  270. }
  271. reftable = file.referenceTable
  272. minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
  273. if err != nil {
  274. panic(err.Error())
  275. }
  276. rowCount = (maxRow - minRow) + 1
  277. colCount = (maxCol - minCol) + 1
  278. rows = make([]*Row, rowCount)
  279. insertRowIndex = minRow
  280. for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ {
  281. rawrow := Worksheet.SheetData.Row[rowIndex]
  282. // Some spreadsheets will omit blank rows from the
  283. // stored data
  284. for rawrow.R > (insertRowIndex + 1) {
  285. // Put an empty Row into the array
  286. rows[insertRowIndex-minRow] = new(Row)
  287. insertRowIndex++
  288. }
  289. // range is not empty
  290. if len(rawrow.Spans) != 0 {
  291. row = makeRowFromSpan(rawrow.Spans)
  292. } else {
  293. row = makeRowFromRaw(rawrow)
  294. }
  295. insertColIndex = minCol
  296. for _, rawcell := range rawrow.C {
  297. x, _, _ := getCoordsFromCellIDString(rawcell.R)
  298. // Some spreadsheets will omit blank cells
  299. // from the data.
  300. for x > insertColIndex {
  301. // Put an empty Cell into the array
  302. row.Cells[insertColIndex-minCol] = new(Cell)
  303. insertColIndex++
  304. }
  305. cellX := insertColIndex - minCol
  306. row.Cells[cellX].Value = getValueFromCellData(rawcell, reftable)
  307. row.Cells[cellX].styleIndex = rawcell.S
  308. row.Cells[cellX].styles = file.styles
  309. insertColIndex++
  310. }
  311. rows[insertRowIndex-minRow] = row
  312. insertRowIndex++
  313. }
  314. return rows, colCount, rowCount
  315. }
  316. type indexedSheet struct {
  317. Index int
  318. Sheet *Sheet
  319. Error error
  320. }
  321. // readSheetFromFile is the logic of converting a xlsxSheet struct
  322. // into a Sheet struct. This work can be done in parallel and so
  323. // readSheetsFromZipFile will spawn an instance of this function per
  324. // sheet and get the results back on the provided channel.
  325. func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) {
  326. result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
  327. worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
  328. if error != nil {
  329. result.Error = error
  330. sc <- result
  331. return
  332. }
  333. sheet := new(Sheet)
  334. sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi)
  335. result.Sheet = sheet
  336. sc <- result
  337. }
  338. // readSheetsFromZipFile is an internal helper function that loops
  339. // over the Worksheets defined in the XSLXWorkbook and loads them into
  340. // Sheet objects stored in the Sheets slice of a xlsx.File struct.
  341. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) (map[string]*Sheet, error) {
  342. var workbook *xlsxWorkbook
  343. var error error
  344. var rc io.ReadCloser
  345. var decoder *xml.Decoder
  346. var sheetCount int
  347. workbook = new(xlsxWorkbook)
  348. rc, error = f.Open()
  349. if error != nil {
  350. return nil, error
  351. }
  352. decoder = xml.NewDecoder(rc)
  353. error = decoder.Decode(workbook)
  354. if error != nil {
  355. return nil, error
  356. }
  357. sheetCount = len(workbook.Sheets.Sheet)
  358. sheets := make(map[string]*Sheet, sheetCount)
  359. sheetChan := make(chan *indexedSheet, sheetCount)
  360. for i, rawsheet := range workbook.Sheets.Sheet {
  361. go readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap)
  362. }
  363. for j := 0; j < sheetCount; j++ {
  364. sheet := <-sheetChan
  365. if sheet.Error != nil {
  366. return nil, sheet.Error
  367. }
  368. sheets[workbook.Sheets.Sheet[sheet.Index].Name] = sheet.Sheet
  369. }
  370. return sheets, nil
  371. }
  372. // readSharedStringsFromZipFile() is an internal helper function to
  373. // extract a reference table from the sharedStrings.xml file within
  374. // the XLSX zip file.
  375. func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
  376. var sst *xlsxSST
  377. var error error
  378. var rc io.ReadCloser
  379. var decoder *xml.Decoder
  380. var reftable *RefTable
  381. rc, error = f.Open()
  382. if error != nil {
  383. return nil, error
  384. }
  385. sst = new(xlsxSST)
  386. decoder = xml.NewDecoder(rc)
  387. error = decoder.Decode(sst)
  388. if error != nil {
  389. return nil, error
  390. }
  391. reftable = MakeSharedStringRefTable(sst)
  392. return reftable, nil
  393. }
  394. // readStylesFromZipFile() is an internal helper function to
  395. // extract a style table from the style.xml file within
  396. // the XLSX zip file.
  397. func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
  398. var style *xlsxStyles
  399. var error error
  400. var rc io.ReadCloser
  401. var decoder *xml.Decoder
  402. rc, error = f.Open()
  403. if error != nil {
  404. return nil, error
  405. }
  406. style = new(xlsxStyles)
  407. decoder = xml.NewDecoder(rc)
  408. error = decoder.Decode(style)
  409. if error != nil {
  410. return nil, error
  411. }
  412. return style, nil
  413. }
  414. type WorkBookRels map[string]string
  415. func (w *WorkBookRels) MakeXLSXWorkbookRels() xlsxWorkbookRels {
  416. xWorkbookRels := xlsxWorkbookRels{}
  417. xWorkbookRels.Relationships = make([]xlsxWorkbookRelation, len(*w))
  418. index := 0
  419. for k, v := range(*w) {
  420. xWorkbookRels.Relationships[index] = xlsxWorkbookRelation{
  421. Id: k,
  422. Target: v,
  423. Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"}
  424. index++
  425. }
  426. return xWorkbookRels
  427. }
  428. // readWorkbookRelationsFromZipFile is an internal helper function to
  429. // extract a map of relationship ID strings to the name of the
  430. // worksheet.xml file they refer to. The resulting map can be used to
  431. // reliably derefence the worksheets in the XLSX file.
  432. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (WorkBookRels, error) {
  433. var sheetXMLMap WorkBookRels
  434. var wbRelationships *xlsxWorkbookRels
  435. var rc io.ReadCloser
  436. var decoder *xml.Decoder
  437. var err error
  438. rc, err = workbookRels.Open()
  439. if err != nil {
  440. return nil, err
  441. }
  442. decoder = xml.NewDecoder(rc)
  443. wbRelationships = new(xlsxWorkbookRels)
  444. err = decoder.Decode(wbRelationships)
  445. if err != nil {
  446. return nil, err
  447. }
  448. sheetXMLMap = make(WorkBookRels)
  449. for _, rel := range wbRelationships.Relationships {
  450. if strings.HasSuffix(rel.Target, ".xml") && strings.HasPrefix(rel.Target, "worksheets/") {
  451. sheetXMLMap[rel.Id] = strings.Replace(rel.Target[len("worksheets/"):], ".xml", "", 1)
  452. }
  453. }
  454. return sheetXMLMap, nil
  455. }
  456. // ReadZip() takes a pointer to a zip.ReadCloser and returns a
  457. // xlsx.File struct populated with its contents. In most cases
  458. // ReadZip is not used directly, but is called internally by OpenFile.
  459. func ReadZip(f *zip.ReadCloser) (*File, error) {
  460. defer f.Close()
  461. return ReadZipReader(&f.Reader)
  462. }
  463. // ReadZipReader() can be used to read an XLSX in memory without
  464. // touching the filesystem.
  465. func ReadZipReader(r *zip.Reader) (*File, error) {
  466. var err error
  467. var file *File
  468. var reftable *RefTable
  469. var sharedStrings *zip.File
  470. var sheetXMLMap map[string]string
  471. var sheets map[string]*Sheet
  472. var style *xlsxStyles
  473. var styles *zip.File
  474. var v *zip.File
  475. var workbook *zip.File
  476. var workbookRels *zip.File
  477. var worksheets map[string]*zip.File
  478. file = new(File)
  479. worksheets = make(map[string]*zip.File, len(r.File))
  480. for _, v = range r.File {
  481. switch v.Name {
  482. case "xl/sharedStrings.xml":
  483. sharedStrings = v
  484. case "xl/workbook.xml":
  485. workbook = v
  486. case "xl/_rels/workbook.xml.rels":
  487. workbookRels = v
  488. case "xl/styles.xml":
  489. styles = v
  490. default:
  491. if len(v.Name) > 14 {
  492. if v.Name[0:13] == "xl/worksheets" {
  493. worksheets[v.Name[14:len(v.Name)-4]] = v
  494. }
  495. }
  496. }
  497. }
  498. sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels)
  499. if err != nil {
  500. return nil, err
  501. }
  502. file.worksheets = worksheets
  503. reftable, err = readSharedStringsFromZipFile(sharedStrings)
  504. if err != nil {
  505. return nil, err
  506. }
  507. if reftable == nil {
  508. readerErr := new(XLSXReaderError)
  509. readerErr.Err = "No valid sharedStrings.xml found in XLSX file"
  510. return nil, readerErr
  511. }
  512. file.referenceTable = reftable
  513. style, err = readStylesFromZipFile(styles)
  514. if err != nil {
  515. return nil, err
  516. }
  517. file.styles = style
  518. sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
  519. if err != nil {
  520. return nil, err
  521. }
  522. if sheets == nil {
  523. readerErr := new(XLSXReaderError)
  524. readerErr.Err = "No sheets found in XLSX File"
  525. return nil, readerErr
  526. }
  527. file.Sheets = sheets
  528. return file, nil
  529. }