| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345 |
- package xlsx
- import (
- "archive/zip"
- "fmt"
- "io"
- "math"
- "os"
- "strconv"
- "strings"
- "xml"
- )
- // XLSXReaderError is the standard error type for otherwise undefined
- // errors in the XSLX reading process.
- type XLSXReaderError struct {
- Error string
- }
- // String() returns a string value from an XLSXReaderError struct in
- // order that it might comply with the os.Error interface.
- func (e *XLSXReaderError) String() string {
- return e.Error
- }
- // Cell is a high level structure intended to provide user access to
- // the contents of Cell within an xlsx.Row.
- type Cell struct {
- data string
- }
- // CellInterface defines the public API of the Cell.
- type CellInterface interface {
- String() string
- }
- func (c *Cell) String() string {
- return c.data
- }
- // Row is a high level structure indended to provide user access to a
- // row within a xlsx.Sheet. An xlsx.Row contains a slice of xlsx.Cell.
- type Row struct {
- Cells []*Cell
- }
- // Sheet is a high level structure intended to provide user access to
- // the contents of a particular sheet within an XLSX file.
- type Sheet struct {
- Rows []*Row
- }
- // File is a high level structure providing a slice of Sheet structs
- // to the user.
- type File struct {
- worksheets map[string]*zip.File
- referenceTable []string
- Sheets []*Sheet
- }
- // getRangeFromString is an internal helper function that converts
- // XLSX internal range syntax to a pair of integers. For example,
- // the range string "1:3" yield the upper and lower intergers 1 and 3.
- func getRangeFromString(rangeString string) (lower int, upper int, error os.Error) {
- var parts []string
- parts = strings.SplitN(rangeString, ":", 2)
- if parts[0] == "" {
- error = os.NewError(fmt.Sprintf("Invalid range '%s'\n", rangeString))
- }
- if parts[1] == "" {
- error = os.NewError(fmt.Sprintf("Invalid range '%s'\n", rangeString))
- }
- lower, error = strconv.Atoi(parts[0])
- if error != nil {
- error = os.NewError(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
- }
- upper, error = strconv.Atoi(parts[1])
- if error != nil {
- error = os.NewError(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
- }
- return lower, upper, error
- }
- // positionalLetterMultiplier gives an integer multiplier to use for a
- // position in a letter based column identifer. For example, the
- // column ID "AA" is equivalent to 26*1 + 1, "BA" is equivalent to
- // 26*2 + 1 and "ABA" is equivalent to (676 * 1)+(26 * 2)+1 or
- // ((26**2)*1)+((26**1)*2)+((26**0))*1
- func positionalLetterMultiplier(extent, pos int) int {
- var result float64
- var power float64
- var offset int
- offset = pos + 1
- power = float64(extent - offset)
- result = math.Pow(26, power)
- return int(result)
- }
- // lettersToNumeric is used to convert a character based column
- // reference to a zero based numeric column identifier.
- func lettersToNumeric(letters string) int {
- var sum int = 0
- var shift int
- extent := len(letters)
- for i, c := range letters {
- // Just to make life akward. If we think of this base
- // 26 notation as being like HEX or binary we hit a
- // nasty little problem. The issue is that we have no
- // 0s and therefore A can be both a 1 and a 0. The
- // value range of a letter is different in the most
- // significant position if (and only if) there is more
- // than one positions. For example:
- // "A" = 0
- // 676 | 26 | 0
- // ----+----+----
- // 0 | 0 | 0
- //
- // "Z" = 25
- // 676 | 26 | 0
- // ----+----+----
- // 0 | 0 | 25
- // "AA" = 26
- // 676 | 26 | 0
- // ----+----+----
- // 0 | 1 | 0 <--- note here - the value of "A" maps to both 1 and 0.
- if i == 0 && extent > 1 {
- shift = 1
- } else {
- shift = 0
- }
- multiplier := positionalLetterMultiplier(extent, i)
- switch {
- case 'A' <= c && c <= 'Z':
- sum += multiplier * ((c - 'A') + shift)
- case 'a' <= c && c <= 'z':
- sum += multiplier * ((c - 'a') + shift)
- }
- }
- return sum
- }
- // letterOnlyMapF is used in conjunction with strings.Map to return
- // only the characters A-Z and a-z in a string
- func letterOnlyMapF(rune int) int {
- switch {
- case 'A' <= rune && rune <= 'Z':
- return rune
- case 'a' <= rune && rune <= 'z':
- return rune - 32
- }
- return -1
- }
- // intOnlyMapF is used in conjunction with strings.Map to return only
- // the numeric portions of a string.
- func intOnlyMapF(rune int) int {
- if rune >= 48 && rune < 58 {
- return rune
- }
- return -1
- }
- // getCoordsFromCellIDString returns the zero based cartesian
- // coordinates from a cell name in Excel format, e.g. the cellIDString
- // "A1" returns 0, 0 and the "B3" return 1, 2.
- func getCoordsFromCellIDString(cellIDString string) (x, y int, error os.Error) {
- var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
- y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
- if error != nil {
- return x, y, error
- }
- y -= 1 // Zero based
- x = lettersToNumeric(letterPart)
- return x, y, error
- }
- // readRowsFromSheet is an internal helper function that extracts the
- // rows from a XSLXWorksheet, poulates them with Cells and resolves
- // the value references from the reference table and stores them in
- func readRowsFromSheet(worksheet *XLSXWorksheet, reftable []string) []*Row {
- // Note, this function needs tidying up!
- var rows []*Row
- var error os.Error
- var upper int
- var row *Row
- var cell *Cell
- rows = make([]*Row, len(worksheet.SheetData.Row))
- for i, rawrow := range worksheet.SheetData.Row {
- row = new(Row)
- _, upper, error = getRangeFromString(rawrow.Spans)
- if error != nil {
- panic(error)
- }
- error = nil
- row.Cells = make([]*Cell, upper)
- for i := 0; i < upper; i++ {
- cell = new(Cell)
- cell.data = ""
- row.Cells[i] = cell
- }
- for _, rawcell := range rawrow.C {
- x, _, error := getCoordsFromCellIDString(rawcell.R)
- if error != nil {
- panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
- }
- error = nil
- cell = new(Cell)
- cell.data = ""
- if len(rawcell.V.Data) > 0 {
- vval := strings.Trim(rawcell.V.Data, " \t\n\r")
- if rawcell.T == "s" {
- ref, error := strconv.Atoi(vval)
- if error != nil {
- panic(error)
- panic(fmt.Sprintf("Invalid reference in Excel Cell (not found in sharedStrings.xml) - the reference was %v\n", rawcell.V.Data))
- }
- cell.data = reftable[ref]
- } else {
- cell.data = vval
- }
- }
- row.Cells[x] = cell
- }
- rows[i] = row
- }
- return rows
- }
- // readSheetsFromZipFile is an internal helper function that loops
- // over the Worksheets defined in the XSLXWorkbook and loads them into
- // Sheet objects stored in the Sheets slice of a xlsx.File struct.
- func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, os.Error) {
- var workbook *XLSXWorkbook
- var error os.Error
- var rc io.ReadCloser
- workbook = new(XLSXWorkbook)
- rc, error = f.Open()
- if error != nil {
- return nil, error
- }
- error = xml.Unmarshal(rc, workbook)
- if error != nil {
- return nil, error
- }
- sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
- for i, rawsheet := range workbook.Sheets.Sheet {
- worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets) //
- if error != nil {
- return nil, error
- }
- sheet := new(Sheet)
- sheet.Rows = readRowsFromSheet(worksheet, file.referenceTable)
- sheets[i] = sheet
- }
- return sheets, nil
- }
- // readSharedStringsFromZipFile() is an internal helper function to
- // extract a reference table from the sharedStrings.xml file within
- // the XLSX zip file.
- func readSharedStringsFromZipFile(f *zip.File) ([]string, os.Error) {
- var sst *XLSXSST
- var error os.Error
- var rc io.ReadCloser
- var reftable []string
- rc, error = f.Open()
- if error != nil {
- return nil, error
- }
- sst = new(XLSXSST)
- error = xml.Unmarshal(rc, sst)
- if error != nil {
- return nil, error
- }
- reftable = MakeSharedStringRefTable(sst)
- return reftable, nil
- }
- // OpenFile() take the name of an XLSX file and returns a populated
- // xlsx.File struct for it.
- func OpenFile(filename string) (x *File, e os.Error) {
- var f *zip.ReadCloser
- var error os.Error
- var file *File
- var v *zip.File
- var workbook *zip.File
- var sharedStrings *zip.File
- var reftable []string
- var worksheets map[string]*zip.File
- f, error = zip.OpenReader(filename)
- if error != nil {
- return nil, error
- }
- file = new(File)
- worksheets = make(map[string]*zip.File, len(f.File))
- for _, v = range f.File {
- switch v.Name {
- case "xl/sharedStrings.xml":
- sharedStrings = v
- case "xl/workbook.xml":
- workbook = v
- default:
- if len(v.Name) > 12 {
- if v.Name[0:13] == "xl/worksheets" {
- worksheets[v.Name[14:len(v.Name)-4]] = v
- }
- }
- }
- }
- file.worksheets = worksheets
- reftable, error = readSharedStringsFromZipFile(sharedStrings)
- if error != nil {
- return nil, error
- }
- if reftable == nil {
- error := new(XLSXReaderError)
- error.Error = "No valid sharedStrings.xml found in XLSX file"
- return nil, error
- }
- file.referenceTable = reftable
- sheets, error := readSheetsFromZipFile(workbook, file)
- if error != nil {
- return nil, error
- }
- if sheets == nil {
- error := new(XLSXReaderError)
- error.Error = "No sheets found in XLSX File"
- return nil, error
- }
- file.Sheets = sheets
- f.Close()
- return file, nil
- }
|