Browse Source

Whoops

Don't obliterate the file, dummy.
Jason Hall 12 years ago
parent
commit
8481ccd3f9
1 changed files with 460 additions and 69 deletions
  1. 460 69
      lib.go

+ 460 - 69
lib.go

@@ -1,74 +1,465 @@
-// OpenFile take the name of an XLSX file and returns a populated xlsx.File struct for it.
+package xlsx
+
+import (
+  "archive/zip"
+	"encoding/xml"
+	"errors"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+)
+
+// XLSXReaderError is the standard error type for otherwise undefined
+// errors in the XSLX reading process.
+type XLSXReaderError struct {
+	Err string
+}
+
+// String() returns a string value from an XLSXReaderError struct in
+// order that it might comply with the os.Error interface.
+func (e *XLSXReaderError) Error() string {
+	return e.Err
+}
+
+// Cell is a high level structure intended to provide user access to
+// the contents of Cell within an xlsx.Row.
+type Cell struct {
+	Value      string
+	styleIndex int
+	styles     *xlsxStyles
+}
+
+// CellInterface defines the public API of the Cell.
+type CellInterface interface {
+	String() string
+}
+
+func (c *Cell) String() string {
+	return c.Value
+}
+
+func (c *Cell) GetStyle() *Style {
+	style := new(Style)
+	if c.styleIndex > 0 && c.styleIndex < len(c.styles.CellXfs) {
+		xf := c.styles.CellXfs[c.styleIndex]
+		if xf.ApplyBorder != "0" {
+			var border Border
+			border.Left = c.styles.Borders[xf.BorderId].Left.Style
+			border.Right = c.styles.Borders[xf.BorderId].Right.Style
+			border.Top = c.styles.Borders[xf.BorderId].Top.Style
+			border.Bottom = c.styles.Borders[xf.BorderId].Bottom.Style
+			style.Boders = border
+		}
+		if xf.ApplyFill != "0" {
+			var fill Fill
+			fill.BgColorIndex = c.styles.Fills[xf.FillId].BgColorIndex
+			style.Fills = fill
+		}
+	}
+	return style
+}
+
+// Row is a high level structure indended to provide user access to a
+// row within a xlsx.Sheet.  An xlsx.Row contains a slice of xlsx.Cell.
+type Row struct {
+	Cells []*Cell
+}
+
+// Sheet is a high level structure intended to provide user access to
+// the contents of a particular sheet within an XLSX file.
+type Sheet struct {
+	Rows   []*Row
+	MaxRow int
+	MaxCol int
+}
+
+// Style is a high level structure intended to provide user access to
+// the contents of Style within an XLSX file.
+type Style struct {
+	Boders Border
+	Fills  Fill
+}
+
+// Border is a high level structure intended to provide user access to
+// the contents of Border Style within an Sheet.
+type Border struct {
+	Left   string
+	Right  string
+	Top    string
+	Bottom string
+}
+
+// Fill is a high level structure intended to provide user access to
+// the contents of background and foreground color index within an Sheet.
+type Fill struct {
+	BgColorIndex string
+	FgColorIndex string
+}
+
+// File is a high level structure providing a slice of Sheet structs
+// to the user.
+type File struct {
+	worksheets     map[string]*zip.File
+	referenceTable []string
+	styles         *xlsxStyles
+	Sheets         []*Sheet          // sheet access by index
+	Sheet          map[string]*Sheet // sheet access by name
+}
+
+// getRangeFromString is an internal helper function that converts
+// XLSX internal range syntax to a pair of integers.  For example,
+// the range string "1:3" yield the upper and lower intergers 1 and 3.
+func getRangeFromString(rangeString string) (lower int, upper int, error error) {
+	var parts []string
+	parts = strings.SplitN(rangeString, ":", 2)
+	if parts[0] == "" {
+		error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
+	}
+	if parts[1] == "" {
+		error = errors.New(fmt.Sprintf("Invalid range '%s'\n", rangeString))
+	}
+	lower, error = strconv.Atoi(parts[0])
+	if error != nil {
+		error = errors.New(fmt.Sprintf("Invalid range (not integer in lower bound) %s\n", rangeString))
+	}
+	upper, error = strconv.Atoi(parts[1])
+	if error != nil {
+		error = errors.New(fmt.Sprintf("Invalid range (not integer in upper bound) %s\n", rangeString))
+	}
+	return lower, upper, error
+}
+
+// lettersToNumeric is used to convert a character based column
+// reference to a zero based numeric column identifier.
+func lettersToNumeric(letters string) int {
+	sum, mul, n := 0, 1, 0
+	for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
+		c := letters[i]
+		switch {
+		case 'A' <= c && c <= 'Z':
+			n += int(c - 'A')
+		case 'a' <= c && c <= 'z':
+			n += int(c - 'a')
+		}
+		sum += n * mul
+	}
+	return sum
+}
+
+// letterOnlyMapF is used in conjunction with strings.Map to return
+// only the characters A-Z and a-z in a string
+func letterOnlyMapF(rune rune) rune {
+	switch {
+	case 'A' <= rune && rune <= 'Z':
+		return rune
+	case 'a' <= rune && rune <= 'z':
+		return rune - 32
+	}
+	return -1
+}
+
+// intOnlyMapF is used in conjunction with strings.Map to return only
+// the numeric portions of a string.
+func intOnlyMapF(rune rune) rune {
+	if rune >= 48 && rune < 58 {
+		return rune
+	}
+	return -1
+}
+
+// getCoordsFromCellIDString returns the zero based cartesian
+// coordinates from a cell name in Excel format, e.g. the cellIDString
+// "A1" returns 0, 0 and the "B3" return 1, 2.
+func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
+	var letterPart string = strings.Map(letterOnlyMapF, cellIDString)
+	y, error = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString))
+	if error != nil {
+		return x, y, error
+	}
+	y -= 1 // Zero based
+	x = lettersToNumeric(letterPart)
+	return x, y, error
+}
+
+// makeRowFromSpan will, when given a span expressed as a string,
+// return an empty Row large enough to encompass that span and
+// populate it with empty cells.  All rows start from cell 1 -
+// regardless of the lower bound of the span.
+func makeRowFromSpan(spans string) *Row {
+	var error error
+	var upper int
+	var row *Row
+	var cell *Cell
+
+	row = new(Row)
+	_, upper, error = getRangeFromString(spans)
+	if error != nil {
+		panic(error)
+	}
+	error = nil
+	row.Cells = make([]*Cell, upper)
+	for i := 0; i < upper; i++ {
+		cell = new(Cell)
+		cell.Value = ""
+		row.Cells[i] = cell
+	}
+	return row
+}
+
+// get the max column
+// return the cells of columns
+func makeRowFromRaw(rawrow xlsxRow) *Row {
+	var upper int
+	var row *Row
+	var cell *Cell
+
+	row = new(Row)
+	upper = -1
+
+	for _, rawcell := range rawrow.C {
+		x, _, error := getCoordsFromCellIDString(rawcell.R)
+		if error != nil {
+			panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
+		}
+		if x > upper {
+			upper = x
+		}
+	}
+	upper++
+
+	row.Cells = make([]*Cell, upper)
+	for i := 0; i < upper; i++ {
+		cell = new(Cell)
+		cell.Value = ""
+		row.Cells[i] = cell
+	}
+	return row
+}
+
+// getValueFromCellData attempts to extract a valid value, usable in CSV form from the raw cell value.
+// Note - this is not actually general enough - we should support retaining tabs and newlines.
+func getValueFromCellData(rawcell xlsxC, reftable []string) string {
+	var value string = ""
+	var data string = rawcell.V
+	if len(data) > 0 {
+		vval := strings.Trim(data, " \t\n\r")
+		if rawcell.T == "s" {
+			ref, error := strconv.Atoi(vval)
+			if error != nil {
+				panic(error)
+			}
+			value = reftable[ref]
+		} else {
+			value = vval
+		}
+	}
+	return value
+}
+
+// readRowsFromSheet is an internal helper function that extracts the
+// rows from a XSLXWorksheet, poulates them with Cells and resolves
+// the value references from the reference table and stores them in
+func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
+	var rows []*Row
+	var row *Row
+	var maxCol int
+	var maxRow int
+	var reftable []string
+
+	reftable = file.referenceTable
+	maxCol = 0
+	maxRow = 0
+	for _, rawrow := range Worksheet.SheetData.Row {
+		for _, rawcell := range rawrow.C {
+			x, y, error := getCoordsFromCellIDString(rawcell.R)
+			if error != nil {
+				panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
+			}
+			if x > maxCol {
+				maxCol = x
+			}
+			if y > maxRow {
+				maxRow = y
+			}
+		}
+	}
+	maxCol += 1
+	maxRow += 1
+	rows = make([]*Row, maxRow)
+	for _, rawrow := range Worksheet.SheetData.Row {
+		// range is not empty
+		if len(rawrow.Spans) != 0 {
+			row = makeRowFromSpan(rawrow.Spans)
+		} else {
+			row = makeRowFromRaw(rawrow)
+		}
+		rowno := 0
+		for _, rawcell := range rawrow.C {
+			x, y, _ := getCoordsFromCellIDString(rawcell.R)
+			if y != 0 && rowno == 0 {
+				rowno = y
+			}
+			if x < len(row.Cells) {
+				row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
+				row.Cells[x].styleIndex = rawcell.S
+				row.Cells[x].styles = file.styles
+			}
+		}
+		rows[rowno] = row
+	}
+	return rows, maxCol, maxRow
+}
+
+// readSheetsFromZipFile is an internal helper function that loops
+// over the Worksheets defined in the XSLXWorkbook and loads them into
+// Sheet objects stored in the Sheets slice of a xlsx.File struct.
+func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, []string, error) {
+	var workbook *xlsxWorkbook
+	var error error
+	var rc io.ReadCloser
+	var decoder *xml.Decoder
+	workbook = new(xlsxWorkbook)
+	rc, error = f.Open()
+	if error != nil {
+		return nil, nil, error
+	}
+	decoder = xml.NewDecoder(rc)
+	error = decoder.Decode(workbook)
+	if error != nil {
+		return nil, nil, error
+	}
+	sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
+	names := make([]string, len(workbook.Sheets.Sheet))
+	for i, rawsheet := range workbook.Sheets.Sheet {
+		worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
+		if error != nil {
+			return nil, nil, error
+		}
+		sheet := new(Sheet)
+		sheet.Rows, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, file)
+		sheets[i] = sheet
+		names[i] = rawsheet.Name
+	}
+	return sheets, names, nil
+}
+
+// readSharedStringsFromZipFile() is an internal helper function to
+// extract a reference table from the sharedStrings.xml file within
+// the XLSX zip file.
+func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
+	var sst *xlsxSST
+	var error error
+	var rc io.ReadCloser
+	var decoder *xml.Decoder
+	var reftable []string
+	rc, error = f.Open()
+	if error != nil {
+		return nil, error
+	}
+	sst = new(xlsxSST)
+	decoder = xml.NewDecoder(rc)
+	error = decoder.Decode(sst)
+	if error != nil {
+		return nil, error
+	}
+	reftable = MakeSharedStringRefTable(sst)
+	return reftable, nil
+}
+
+// readStylesFromZipFile() is an internal helper function to
+// extract a style table from the style.xml file within
+// the XLSX zip file.
+func readStylesFromZipFile(f *zip.File) (*xlsxStyles, error) {
+	var style *xlsxStyles
+	var error error
+	var rc io.ReadCloser
+	var decoder *xml.Decoder
+	rc, error = f.Open()
+	if error != nil {
+		return nil, error
+	}
+	style = new(xlsxStyles)
+	decoder = xml.NewDecoder(rc)
+	error = decoder.Decode(style)
+	if error != nil {
+		return nil, error
+	}
+	return style, nil
+}
+
+
+// OpenFile() take the name of an XLSX file and returns a populated
+// xlsx.File struct for it.
 func OpenFile(filename string) (*File, error) {
-  var f *zip.ReadCloser
-  f, err := zip.OpenReader(filename)
-  if err != nil {
-    return nil, err
-  }
-  return ReadZip(f)
+	var f *zip.ReadCloser
+	f, err := zip.OpenReader(filename)
+	if err != nil {
+		return nil, err
+	}
+	return ReadZip(f)
 }
 
-// ReadZip takes a zip file of an XLSX file and returns a populated xlsx.File struct for it.
 func ReadZip(f *zip.ReadCloser) (*File, error) {
-  var error error
-  var file *File
-  var v *zip.File
-  var workbook *zip.File
-  var styles *zip.File
-  var sharedStrings *zip.File
-  var reftable []string
-  var worksheets map[string]*zip.File
-  var sheetMap map[string]*Sheet
-
-  file = new(File)
-  worksheets = make(map[string]*zip.File, len(f.File))
-  for _, v = range f.File {
-    switch v.Name {
-    case "xl/sharedStrings.xml":
-      sharedStrings = v
-    case "xl/workbook.xml":
-      workbook = v
-    case "xl/styles.xml":
-      styles = v
-    default:
-      if len(v.Name) > 12 {
-        if v.Name[0:13] == "xl/worksheets" {
-          worksheets[v.Name[14:len(v.Name)-4]] = v
-        }
-      }
-    }
-  }
-  file.worksheets = worksheets
-  reftable, error = readSharedStringsFromZipFile(sharedStrings)
-  if error != nil {
-    return nil, error
-  }
-  if reftable == nil {
-    error := new(XLSXReaderError)
-    error.Err = "No valid sharedStrings.xml found in XLSX file"
-    return nil, error
-  }
-  file.referenceTable = reftable
-  style, error := readStylesFromZipFile(styles)
-  if error != nil {
-    return nil, error
-  }
-  file.styles = style
-  sheets, names, error := readSheetsFromZipFile(workbook, file)
-  if error != nil {
-    return nil, error
-  }
-  if sheets == nil {
-    error := new(XLSXReaderError)
-    error.Err = "No sheets found in XLSX File"
-    return nil, error
-  }
-  file.Sheets = sheets
-  sheetMap = make(map[string]*Sheet, len(names))
-  for i := 0; i < len(names); i++ {
-    sheetMap[names[i]] = sheets[i]
-  }
-  file.Sheet = sheetMap
-  f.Close()
-  return file, nil
+	var error error
+	var file *File
+	var v *zip.File
+	var workbook *zip.File
+	var styles *zip.File
+	var sharedStrings *zip.File
+	var reftable []string
+	var worksheets map[string]*zip.File
+	var sheetMap map[string]*Sheet
+
+	file = new(File)
+	worksheets = make(map[string]*zip.File, len(f.File))
+	for _, v = range f.File {
+		switch v.Name {
+		case "xl/sharedStrings.xml":
+			sharedStrings = v
+		case "xl/workbook.xml":
+			workbook = v
+		case "xl/styles.xml":
+			styles = v
+		default:
+			if len(v.Name) > 12 {
+				if v.Name[0:13] == "xl/worksheets" {
+					worksheets[v.Name[14:len(v.Name)-4]] = v
+				}
+			}
+		}
+	}
+	file.worksheets = worksheets
+	reftable, error = readSharedStringsFromZipFile(sharedStrings)
+	if error != nil {
+		return nil, error
+	}
+	if reftable == nil {
+		error := new(XLSXReaderError)
+		error.Err = "No valid sharedStrings.xml found in XLSX file"
+		return nil, error
+	}
+	file.referenceTable = reftable
+	style, error := readStylesFromZipFile(styles)
+	if error != nil {
+		return nil, error
+	}
+	file.styles = style
+	sheets, names, error := readSheetsFromZipFile(workbook, file)
+	if error != nil {
+		return nil, error
+	}
+	if sheets == nil {
+		error := new(XLSXReaderError)
+		error.Err = "No sheets found in XLSX File"
+		return nil, error
+	}
+	file.Sheets = sheets
+	sheetMap = make(map[string]*Sheet, len(names))
+	for i := 0; i < len(names); i++ {
+		sheetMap[names[i]] = sheets[i]
+	}
+	file.Sheet = sheetMap
+	f.Close()
+	return file, nil
 }