| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404 |
- package xlsx
- import (
- "archive/zip"
- "encoding/xml"
- "errors"
- "fmt"
- "io"
- "math"
- "strconv"
- "strings"
- )
- // XLSXReaderError is the standard error type for otherwise undefined
- // errors in the XSLX reading process.
- type XLSXReaderError struct {
- Err string
- }
- // String() returns a string value from an XLSXReaderError struct in
- // order that it might comply with the os.Error interface.
- func (e *XLSXReaderError) Error() string {
- return e.Err
- }
- // Cell is a high level structure intended to provide user access to
- // the contents of Cell within an xlsx.Row.
- type Cell struct {
- Value string
- }
- // CellInterface defines the public API of the Cell.
- type CellInterface interface {
- String() string
- }
- func (c *Cell) String() string {
- return c.Value
- }
- // Row is a high level structure indended to provide user access to a
- // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
- type Row struct {
- Cells []*Cell
- }
- // Sheet is a high level structure intended to provide user access to
- // the contents of a particular sheet within an XLSX file.
- type Sheet struct {
- Rows []*Row
- MaxRow int
- MaxCol int
- }
- // File is a high level structure providing a slice of Sheet structs
- // to the user.
- type File struct {
- worksheets map[string]*zip.File
- referenceTable []string
- Sheets []*Sheet // sheet access by index
- Sheet map[string]*Sheet // sheet access by name
- }
- // getRangeFromString is an internal helper function that converts
- // XLSX internal range syntax to a pair of integers. For example,
- // the range string "1:3" yield the upper and lower intergers 1 and 3.
- func getRangeFromString(rangeString string) (lower int, upper int, error error) {
- var parts []string
- parts = strings.SplitN(rangeString, ":", 2)
- if parts[0] == "" {
- error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
- }
- if parts[1] == "" {
- error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
- }
- lower, error = strconv.Atoi(parts[0])
- if error != nil {
- error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
- }
- upper, error = strconv.Atoi(parts[1])
- if error != nil {
- error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
- }
- return lower, upper, error
- }
- // positionalLetterMultiplier gives an integer multiplier to use for a
- // position in a letter based column identifer. For example, the
- // column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
- // 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
- // ((26**2)*1)+((26**1)*2)+((26**0))*1
- func positionalLetterMultiplier(extent, pos int) int {
- var result float64
- var power float64
- var offset int
- offset = pos + 1
- power = float64(extent - offset)
- result = math.Pow(26, power)
- return int(result)
- }
- // lettersToNumeric is used to convert a character based column
- // reference to a zero based numeric column identifier.
- func lettersToNumeric(letters string) int {
- var sum int = 0
- var shift int
- extent := len(letters)
- for i, c := range letters {
- // Just to make life akward. If we think of this base
- // 26 notation as being like HEX or binary we hit a
- // nasty little problem. The issue is that we have no
- // 0s and therefore A can be both a 1 and a 0. The
- // value range of a letter is different in the most
- // significant position if (and only if) there is more
- // than one positions. For example:
- // "A" = 0
- // 676 | 26 | 0
- // ----+----+----
- // 0 | 0 | 0
- //
- // "Z" = 25
- // 676 | 26 | 0
- // ----+----+----
- // 0 | 0 | 25
- // "AA" = 26
- // 676 | 26 | 0
- // ----+----+----
- // 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
- if i == 0 && extent > 1 {
- shift = 1
- } else {
- shift = 0
- }
- multiplier := positionalLetterMultiplier(extent, i)
- switch {
- case 'A' <= c && c <= 'Z':
- sum += multiplier * (int((c - 'A')) + shift)
- case 'a' <= c && c <= 'z':
- sum += multiplier * (int((c - 'a')) + shift)
- }
- }
- return sum
- }
- // letterOnlyMapF is used in conjunction with strings.Map to return
- // only the characters A-Z and a-z in a string
- func letterOnlyMapF(rune rune) rune {
- switch {
- case 'A' <= rune && rune <= 'Z':
- return rune
- case 'a' <= rune && rune <= 'z':
- return rune - 32
- }
- return -1
- }
- // intOnlyMapF is used in conjunction with strings.Map to return only
- // the numeric portions of a string.
- func intOnlyMapF(rune rune) rune {
- if rune >= 48 && rune < 58 {
- return rune
- }
- return -1
- }
- // getCoordsFromCellIDString returns the zero based cartesian
- // coordinates from a cell name in Excel format, e.g. the cellIDString
- // "A1" returns 0, 0 and the "B3" return 1, 2.
- func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
- var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
- y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
- if error != nil {
- return x, y, error
- }
- y -= 1 // Zero based
- x = lettersToNumeric(letterPart)
- return x, y, error
- }
- // makeRowFromSpan will, when given a span expressed as a string,
- // return an empty Row large enough to encompass that span and
- // populate it with empty cells. All rows start from cell 1 -
- // regardless of the lower bound of the span.
- func makeRowFromSpan(spans string) *Row {
- var error error
- var upper int
- var row *Row
- var cell *Cell
- row = new(Row)
- _, upper, error = getRangeFromString(spans)
- if error != nil {
- panic(error)
- }
- error = nil
- row.Cells = make([]*Cell, upper)
- for i := 0; i < upper; i++ {
- cell = new(Cell)
- cell.Value = ""
- row.Cells[i] = cell
- }
- return row
- }
- // get the max column
- // return the cells of columns
- func makeRowFromRaw(rawrow xlsxRow) *Row {
- var upper int
- var row *Row
- var cell *Cell
- row = new(Row)
- upper = 0
- for _, rawcell := range rawrow.C {
- x, _, error := getCoordsFromCellIDString(rawcell.R)
- if error != nil {
- panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
- }
- if x > upper {
- upper = x
- }
- }
- row.Cells = make([]*Cell, upper)
- for i := 0; i < upper; i++ {
- cell = new(Cell)
- cell.Value = ""
- row.Cells[i] = cell
- }
- return row
- }
- // getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
- // Note - this is not actually general enough - we should support retaining tabs and newlines.
- func getValueFromCellData(rawcell xlsxC, reftable []string) string {
- var value string = ""
- var data string = rawcell.V
- if len(data) > 0 {
- vval := strings.Trim(data, " \t\n\r")
- if rawcell.T == "s" {
- ref, error := strconv.Atoi(vval)
- if error != nil {
- panic(error)
- }
- value = reftable[ref]
- } else {
- value = vval
- }
- }
- return value
- }
- // readRowsFromSheet is an internal helper function that extracts the
- // rows from a XSLXWorksheet, poulates them with Cells and resolves
- // the value references from the reference table and stores them in
- func readRowsFromSheet(Worksheet *xlsxWorksheet, reftable []string) ([]*Row ,int) {
- var rows []*Row
- var row *Row
- var maxCol int
- rows = make([]*Row, len(Worksheet.SheetData.Row))
- maxCol = 0
- for i, rawrow := range Worksheet.SheetData.Row {
- // range is not empty
- if len(rawrow.Spans) != 0 {
- row = makeRowFromSpan(rawrow.Spans)
- } else {
- row = makeRowFromRaw(rawrow)
- }
- for _, rawcell := range rawrow.C {
- x, _, error := getCoordsFromCellIDString(rawcell.R)
- if error != nil {
- panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
- }
- if x > maxCol {
- maxCol = x
- }
- row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
- }
- rows[i] = row
- }
- return rows,maxCol
- }
- // readSheetsFromZipFile is an internal helper function that loops
- // over the Worksheets defined in the XSLXWorkbook and loads them into
- // Sheet objects stored in the Sheets slice of a xlsx.File struct.
- func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
- var workbook *xlsxWorkbook
- var error error
- var rc io.ReadCloser
- var decoder *xml.Decoder
- workbook = new(xlsxWorkbook)
- rc, error = f.Open()
- if error != nil {
- return nil, nil, error
- }
- decoder = xml.NewDecoder(rc)
- error = decoder.Decode(workbook)
- if error != nil {
- return nil, nil, error
- }
- sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
- names := make([]string, len(workbook.Sheets.Sheet))
- for i, rawsheet := range workbook.Sheets.Sheet {
- worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
- if error != nil {
- return nil, nil, error
- }
- sheet := new(Sheet)
- sheet.Rows,sheet.MaxCol = readRowsFromSheet(worksheet, file.referenceTable)
- sheets[i] = sheet
- sheet.MaxRow = len(sheet.Rows)
- names[i] = rawsheet.Name
- }
- return sheets, names, nil
- }
- // readSharedStringsFromZipFile() is an internal helper function to
- // extract a reference table from the sharedStrings.xml file within
- // the XLSX zip file.
- func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
- var sst *xlsxSST
- var error error
- var rc io.ReadCloser
- var decoder *xml.Decoder
- var reftable []string
- rc, error = f.Open()
- if error != nil {
- return nil, error
- }
- sst = new(xlsxSST)
- decoder = xml.NewDecoder(rc)
- error = decoder.Decode(sst)
- if error != nil {
- return nil, error
- }
- reftable = MakeSharedStringRefTable(sst)
- return reftable, nil
- }
- // OpenFile() take the name of an XLSX file and returns a populated
- // xlsx.File struct for it.
- func OpenFile(filename string) (x *File, e error) {
- var f *zip.ReadCloser
- var error error
- var file *File
- var v *zip.File
- var workbook *zip.File
- var sharedStrings *zip.File
- var reftable []string
- var worksheets map[string]*zip.File
- f, error = zip.OpenReader(filename)
- var sheetMap map[string]*Sheet
- if error != nil {
- return nil, error
- }
- file = new(File)
- worksheets = make(map[string]*zip.File, len(f.File))
- for _, v = range f.File {
- switch v.Name {
- case "xl/sharedStrings.xml":
- sharedStrings = v
- case "xl/workbook.xml":
- workbook = v
- default:
- if len(v.Name) > 12 {
- if v.Name[0:13] == "xl/worksheets" {
- worksheets[v.Name[14:len(v.Name)-4]] = v
- }
- }
- }
- }
- file.worksheets = worksheets
- reftable, error = readSharedStringsFromZipFile(sharedStrings)
- if error != nil {
- return nil, error
- }
- if reftable == nil {
- error := new(XLSXReaderError)
- error.Err = "No valid sharedStrings.xml found in XLSX file"
- return nil, error
- }
- file.referenceTable = reftable
- sheets, names, error := readSheetsFromZipFile(workbook, file)
- if error != nil {
- return nil, error
- }
- if sheets == nil {
- error := new(XLSXReaderError)
- error.Err = "No sheets found in XLSX File"
- return nil, error
- }
- file.Sheets = sheets
- sheetMap = make(map[string]*Sheet,len(names))
- for i := 0; i < len(names); i++ {
- sheetMap[names[i]] = sheets[i]
- }
- file.Sheet = sheetMap
- f.Close()
- return file, nil
- }
|