소스 검색

Merge pull request #324 from ryho/master

Stream Writing, Partial Reads, Improved FormattedValue() in General mode
Geoffrey J. Teale 8 년 전
부모
커밋
31d1ae382f
19개의 변경된 파일2264개의 추가작업 그리고 265개의 파일을 삭제
  1. 1 7
      .travis.yml
  2. 79 145
      cell.go
  3. 129 24
      cell_test.go
  4. 1 0
      col.go
  5. 26 6
      file.go
  6. 104 3
      file_test.go
  7. 561 0
      format_code.go
  8. 242 0
      format_code_test.go
  9. 84 21
      lib.go
  10. 14 14
      lib_test.go
  11. 8 9
      sheet.go
  12. 6 7
      sheet_test.go
  13. 217 0
      stream_file.go
  14. 245 0
      stream_file_builder.go
  15. 487 0
      stream_test.go
  16. BIN
      testdocs/large_sheet_large_sharedstrings_dimension_tag.xlsx
  17. BIN
      testdocs/large_sheet_no_shared_strings_no_dimension_tag.xlsx
  18. 39 18
      xmlStyle.go
  19. 21 11
      xmlWorkbook.go

+ 1 - 7
.travis.yml

@@ -1,19 +1,13 @@
 language: go
 
-install:
-  - go get -d -t -v ./... && go build -v ./...
-
 go:
-  - 1.5.x
-  - 1.6.x
-  - 1.7.x
   - 1.8.x
   - 1.9.x
   - tip
 
 script:
   - go vet ./...
-  - go test -v -coverprofile=coverage.txt -covermode=atomic
+  - go test -v -coverprofile=coverage.txt -covermode=atomic .
 
 after_success:
   - bash <(curl -s https://codecov.io/bash)

+ 79 - 145
cell.go

@@ -1,13 +1,18 @@
 package xlsx
 
 import (
+	"errors"
 	"fmt"
 	"math"
 	"strconv"
-	"strings"
 	"time"
 )
 
+const (
+	maxNonScientificNumber = 1e11
+	minNonScientificNumber = 1e-9
+)
+
 // CellType is an int type for storing metadata about the data type in the cell.
 type CellType int
 
@@ -23,19 +28,24 @@ const (
 	CellTypeGeneral
 )
 
+func (ct CellType) Ptr() *CellType {
+	return &ct
+}
+
 // Cell is a high level structure intended to provide user access to
 // the contents of Cell within an xlsx.Row.
 type Cell struct {
-	Row      *Row
-	Value    string
-	formula  string
-	style    *Style
-	NumFmt   string
-	date1904 bool
-	Hidden   bool
-	HMerge   int
-	VMerge   int
-	cellType CellType
+	Row          *Row
+	Value        string
+	formula      string
+	style        *Style
+	NumFmt       string
+	parsedNumFmt *parsedNumberFormat
+	date1904     bool
+	Hidden       bool
+	HMerge       int
+	VMerge       int
+	cellType     CellType
 }
 
 // CellInterface defines the public API of the Cell.
@@ -46,7 +56,7 @@ type CellInterface interface {
 
 // NewCell creates a cell and adds it to a row.
 func NewCell(r *Row) *Cell {
-	return &Cell{Row: r}
+	return &Cell{Row: r, NumFmt: "general"}
 }
 
 // Merge with other cells, horizontally and/or vertically.
@@ -197,6 +207,20 @@ func (c *Cell) Int64() (int64, error) {
 	return f, nil
 }
 
+// GeneralNumeric returns the value of the cell as a string. It is formatted very closely to the the XLSX spec for how
+// to display values when the storage type is Number and the format type is General. It is not 100% identical to the
+// spec but is as close as you can get using the built in Go formatting tools.
+func (c *Cell) GeneralNumeric() (string, error) {
+	return generalNumericScientific(c.Value, true)
+}
+
+// GeneralNumericWithoutScientific returns numbers that are always formatted as numbers, but it does not follow
+// the rules for when XLSX should switch to scientific notation, since sometimes scientific notation is not desired,
+// even if that is how the document is supposed to be formatted.
+func (c *Cell) GeneralNumericWithoutScientific() (string, error) {
+	return generalNumericScientific(c.Value, false)
+}
+
 // SetInt sets a cell's value to an integer.
 func (c *Cell) SetInt(n int) {
 	c.SetValue(n)
@@ -311,143 +335,53 @@ func (c *Cell) formatToInt(format string) (string, error) {
 	return fmt.Sprintf(format, int(f)), nil
 }
 
+func (c *Cell) getNumberFormat() *parsedNumberFormat {
+	if c.parsedNumFmt == nil || c.parsedNumFmt.numFmt != c.NumFmt {
+		c.parsedNumFmt = parseFullNumberFormatString(c.NumFmt)
+	}
+	return c.parsedNumFmt
+}
+
 // FormattedValue returns a value, and possibly an error condition
 // from a Cell.  If it is possible to apply a format to the cell
 // value, it will do so, if not then an error will be returned, along
 // with the raw value of the Cell.
+//
+// This is the documentation of the "General" Format in the Office Open XML spec:
+//
+// Numbers
+// The application shall attempt to display the full number up to 11 digits (inc. decimal point). If the number is too
+// large*, the application shall attempt to show exponential format. If the number has too many significant digits, the
+// display shall be truncated. The optimal method of display is based on the available cell width. If the number cannot
+// be displayed using any of these formats in the available width, the application shall show "#" across the width of
+// the cell.
+//
+// Conditions for switching to exponential format:
+// 1. The cell value shall have at least five digits for xE-xx
+// 2. If the exponent is bigger than the size allowed, a floating point number cannot fit, so try exponential notation.
+// 3. Similarly, for negative exponents, check if there is space for even one (non-zero) digit in floating point format**.
+// 4. Finally, if there isn't room for all of the significant digits in floating point format (for a negative exponent),
+// exponential format shall display more digits if the exponent is less than -3. (The 3 is because E-xx takes 4
+// characters, and the leading 0 in floating point takes only 1 character. Thus, for an exponent less than -3, there is
+// more than 3 additional leading 0's, more than enough to compensate for the size of the E-xx.)
+//
+// Floating point rule:
+// For general formatting in cells, max overall length for cell display is 11, not including negative sign, but includes
+// leading zeros and decimal separator.***
+//
+// Added Notes:
+// * "If the number is too large" can also mean "if the number has more than 11 digits", so greater than or equal to
+// 1e11 and less than 1e-9.
+// ** Means that you should switch to scientific if there would be 9 zeros after the decimal (the decimal and first zero
+// count against the 11 character limit), so less than 1e9.
+// *** The way this is written, you can get numbers that are more than 11 characters because the golang Float fmt
+// does not support adjusting the precision while not padding with zeros, while also not switching to scientific
+// notation too early.
 func (c *Cell) FormattedValue() (string, error) {
-	var numberFormat = c.GetNumberFormat()
-	if isTimeFormat(numberFormat) {
-		return parseTime(c)
-	}
-	switch numberFormat {
-	case builtInNumFmt[builtInNumFmtIndex_GENERAL], builtInNumFmt[builtInNumFmtIndex_STRING]:
-		return c.Value, nil
-	case builtInNumFmt[builtInNumFmtIndex_INT], "#,##0":
-		return c.formatToInt("%d")
-	case builtInNumFmt[builtInNumFmtIndex_FLOAT], "#,##0.00":
-		return c.formatToFloat("%.2f")
-	case "#,##0 ;(#,##0)", "#,##0 ;[red](#,##0)":
-		f, err := strconv.ParseFloat(c.Value, 64)
-		if err != nil {
-			return c.Value, err
-		}
-		if f < 0 {
-			i := int(math.Abs(f))
-			return fmt.Sprintf("(%d)", i), nil
-		}
-		i := int(f)
-		return fmt.Sprintf("%d", i), nil
-	case "#,##0.00;(#,##0.00)", "#,##0.00;[red](#,##0.00)":
-		f, err := strconv.ParseFloat(c.Value, 64)
-		if err != nil {
-			return c.Value, err
-		}
-		if f < 0 {
-			return fmt.Sprintf("(%.2f)", f), nil
-		}
-		return fmt.Sprintf("%.2f", f), nil
-	case "0%":
-		f, err := strconv.ParseFloat(c.Value, 64)
-		if err != nil {
-			return c.Value, err
-		}
-		f = f * 100
-		return fmt.Sprintf("%d%%", int(f)), nil
-	case "0.00%":
-		f, err := strconv.ParseFloat(c.Value, 64)
-		if err != nil {
-			return c.Value, err
-		}
-		f = f * 100
-		return fmt.Sprintf("%.2f%%", f), nil
-	case "0.00e+00", "##0.0e+0":
-		return c.formatToFloat("%e")
-	}
-	return c.Value, nil
-
-}
-
-// parseTime returns a string parsed using time.Time
-func parseTime(c *Cell) (string, error) {
-	f, err := strconv.ParseFloat(c.Value, 64)
-	if err != nil {
-		return c.Value, err
-	}
-	val := TimeFromExcelTime(f, c.date1904)
-	format := c.GetNumberFormat()
-
-	// Replace Excel placeholders with Go time placeholders.
-	// For example, replace yyyy with 2006. These are in a specific order,
-	// due to the fact that m is used in month, minute, and am/pm. It would
-	// be easier to fix that with regular expressions, but if it's possible
-	// to keep this simple it would be easier to maintain.
-	// Full-length month and days (e.g. March, Tuesday) have letters in them that would be replaced
-	// by other characters below (such as the 'h' in March, or the 'd' in Tuesday) below.
-	// First we convert them to arbitrary characters unused in Excel Date formats, and then at the end,
-	// turn them to what they should actually be.
-	// Based off: http://www.ozgrid.com/Excel/CustomFormats.htm
-	replacements := []struct{ xltime, gotime string }{
-		{"yyyy", "2006"},
-		{"yy", "06"},
-		{"mmmm", "%%%%"},
-		{"dddd", "&&&&"},
-		{"dd", "02"},
-		{"d", "2"},
-		{"mmm", "Jan"},
-		{"mmss", "0405"},
-		{"ss", "05"},
-		{"mm:", "04:"},
-		{":mm", ":04"},
-		{"mm", "01"},
-		{"am/pm", "pm"},
-		{"m/", "1/"},
-		{"%%%%", "January"},
-		{"&&&&", "Monday"},
-	}
-	// It is the presence of the "am/pm" indicator that determins
-	// if this is a 12 hour or 24 hours time format, not the
-	// number of 'h' characters.
-	if is12HourTime(format) {
-		format = strings.Replace(format, "hh", "03", 1)
-		format = strings.Replace(format, "h", "3", 1)
-	} else {
-		format = strings.Replace(format, "hh", "15", 1)
-		format = strings.Replace(format, "h", "15", 1)
-	}
-	for _, repl := range replacements {
-		format = strings.Replace(format, repl.xltime, repl.gotime, 1)
+	fullFormat := c.getNumberFormat()
+	returnVal, err := fullFormat.FormatValue(c)
+	if fullFormat.parseEncounteredError {
+		return returnVal, errors.New("invalid number format")
 	}
-	// If the hour is optional, strip it out, along with the
-	// possible dangling colon that would remain.
-	if val.Hour() < 1 {
-		format = strings.Replace(format, "]:", "]", 1)
-		format = strings.Replace(format, "[03]", "", 1)
-		format = strings.Replace(format, "[3]", "", 1)
-		format = strings.Replace(format, "[15]", "", 1)
-	} else {
-		format = strings.Replace(format, "[3]", "3", 1)
-		format = strings.Replace(format, "[15]", "15", 1)
-	}
-	return val.Format(format), nil
-}
-
-// isTimeFormat checks whether an Excel format string represents
-// a time.Time.
-func isTimeFormat(format string) bool {
-	dateParts := []string{
-		"yy", "hh", "h", "am/pm", "AM/PM", "A/P", "a/p", "ss", "mm", ":",
-	}
-	for _, part := range dateParts {
-		if strings.Contains(format, part) {
-			return true
-		}
-	}
-	return false
-}
-
-// is12HourTime checks whether an Excel time format string is a 12
-// hours form.
-func is12HourTime(format string) bool {
-	return strings.Contains(format, "am/pm") || strings.Contains(format, "AM/PM") || strings.Contains(format, "a/p") || strings.Contains(format, "A/P")
+	return returnVal, err
 }

+ 129 - 24
cell_test.go

@@ -2,6 +2,7 @@ package xlsx
 
 import (
 	"math"
+	"testing"
 	"time"
 
 	. "gopkg.in/check.v1"
@@ -117,6 +118,112 @@ func (l *CellSuite) TestSetFloat(c *C) {
 	c.Assert(cell.Value, Equals, "37947.75334343")
 }
 
+func (l *CellSuite) TestGeneralNumberHandling(c *C) {
+	// If you go to Excel, make a new file, type 18.99 in a cell, and save, what you will get is a
+	// cell where the format is General and the storage type is Number, that contains the value 18.989999999999998.
+	// The correct way to format this should be 18.99.
+	// 1.1 will get you the same, with a stored value of 1.1000000000000001.
+	// Also, numbers greater than 1e11 and less than 1e-9 wil be shown as scientific notation.
+	testCases := []struct {
+		value                string
+		formattedValueOutput string
+		noExpValueOutput     string
+	}{
+		{
+			value:                "18.989999999999998",
+			formattedValueOutput: "18.99",
+			noExpValueOutput:     "18.99",
+		},
+		{
+			value:                "1.1000000000000001",
+			formattedValueOutput: "1.1",
+			noExpValueOutput:     "1.1",
+		},
+		{
+			value:                "0.0000000000000001",
+			formattedValueOutput: "1E-16",
+			noExpValueOutput:     "0.0000000000000001",
+		},
+		{
+			value:                "0.000000000000008",
+			formattedValueOutput: "8E-15",
+			noExpValueOutput:     "0.000000000000008",
+		},
+		{
+			value:                "1000000000000000000",
+			formattedValueOutput: "1E+18",
+			noExpValueOutput:     "1000000000000000000",
+		},
+		{
+			value:                "1230000000000000000",
+			formattedValueOutput: "1.23E+18",
+			noExpValueOutput:     "1230000000000000000",
+		},
+		{
+			value:                "12345678",
+			formattedValueOutput: "12345678",
+			noExpValueOutput:     "12345678",
+		},
+		{
+			value:                "0",
+			formattedValueOutput: "0",
+			noExpValueOutput:     "0",
+		},
+		{
+			value:                "-18.989999999999998",
+			formattedValueOutput: "-18.99",
+			noExpValueOutput:     "-18.99",
+		},
+		{
+			value:                "-1.1000000000000001",
+			formattedValueOutput: "-1.1",
+			noExpValueOutput:     "-1.1",
+		},
+		{
+			value:                "-0.0000000000000001",
+			formattedValueOutput: "-1E-16",
+			noExpValueOutput:     "-0.0000000000000001",
+		},
+		{
+			value:                "-0.000000000000008",
+			formattedValueOutput: "-8E-15",
+			noExpValueOutput:     "-0.000000000000008",
+		},
+		{
+			value:                "-1000000000000000000",
+			formattedValueOutput: "-1E+18",
+			noExpValueOutput:     "-1000000000000000000",
+		},
+		{
+			value:                "-1230000000000000000",
+			formattedValueOutput: "-1.23E+18",
+			noExpValueOutput:     "-1230000000000000000",
+		},
+		{
+			value:                "-12345678",
+			formattedValueOutput: "-12345678",
+			noExpValueOutput:     "-12345678",
+		},
+	}
+	for _, testCase := range testCases {
+		cell := Cell{
+			cellType: CellTypeNumeric,
+			NumFmt:   builtInNumFmt[builtInNumFmtIndex_GENERAL],
+			Value:    testCase.value,
+		}
+		val, err := cell.FormattedValue()
+		if err != nil {
+			c.Fatal(err)
+		}
+		c.Assert(val, Equals, testCase.formattedValueOutput)
+		val, err = cell.GeneralNumericWithoutScientific()
+		if err != nil {
+			c.Fatal(err)
+		}
+		c.Assert(val, Equals, testCase.noExpValueOutput)
+	}
+}
+
 func (s *CellSuite) TestGetTime(c *C) {
 	cell := Cell{}
 	cell.SetFloat(0)
@@ -134,7 +241,7 @@ func (s *CellSuite) TestGetTime(c *C) {
 
 // FormattedValue returns an error for formatting errors
 func (l *CellSuite) TestFormattedValueErrorsOnBadFormat(c *C) {
-	cell := Cell{Value: "Fudge Cake"}
+	cell := Cell{Value: "Fudge Cake", cellType: CellTypeNumeric}
 	cell.NumFmt = "#,##0 ;(#,##0)"
 	value, err := cell.FormattedValue()
 	c.Assert(value, Equals, "Fudge Cake")
@@ -142,14 +249,6 @@ func (l *CellSuite) TestFormattedValueErrorsOnBadFormat(c *C) {
 	c.Assert(err.Error(), Equals, "strconv.ParseFloat: parsing \"Fudge Cake\": invalid syntax")
 }
 
-// FormattedValue returns a string containing error text for formatting errors
-func (l *CellSuite) TestFormattedValueReturnsErrorAsValueForBadFormat(c *C) {
-	cell := Cell{Value: "Fudge Cake"}
-	cell.NumFmt = "#,##0 ;(#,##0)"
-	_, err := cell.FormattedValue()
-	c.Assert(err.Error(), Equals, "strconv.ParseFloat: parsing \"Fudge Cake\": invalid syntax")
-}
-
 // formattedValueChecker removes all the boilerplate for testing Cell.FormattedValue
 // after its change from returning one value (a string) to two values (string, error)
 // This allows all the old one-line asserts in the test to continue to be one
@@ -165,16 +264,22 @@ func (fvc *formattedValueChecker) Equals(cell Cell, expected string) {
 	}
 	fvc.c.Assert(val, Equals, expected)
 }
+func cellsFormattedValueEquals(t *testing.T, cell *Cell, expected string) {
+	val, err := cell.FormattedValue()
+	if err != nil {
+		t.Error(err)
+	}
+	if val != expected {
+		t.Errorf("Expected cell.FormattedValue() to be %v, got %v", expected, val)
+	}
+}
 
 // We can return a string representation of the formatted data
 func (l *CellSuite) TestFormattedValue(c *C) {
-	// XXX TODO, this test should probably be split down, and made
-	// in terms of SafeFormattedValue, as FormattedValue wraps
-	// that function now.
-	cell := Cell{Value: "37947.7500001"}
-	negativeCell := Cell{Value: "-37947.7500001"}
-	smallCell := Cell{Value: "0.007"}
-	earlyCell := Cell{Value: "2.1"}
+	cell := Cell{Value: "37947.7500001", cellType: CellTypeNumeric}
+	negativeCell := Cell{Value: "-37947.7500001", cellType: CellTypeNumeric}
+	smallCell := Cell{Value: "0.007", cellType: CellTypeNumeric}
+	earlyCell := Cell{Value: "2.1", cellType: CellTypeNumeric}
 
 	fvc := formattedValueChecker{c: c}
 
@@ -187,12 +292,12 @@ func (l *CellSuite) TestFormattedValue(c *C) {
 	// don't think FormattedValue() should be doing a numeric conversion on the value
 	// before returning the string.
 	cell.NumFmt = "0"
-	fvc.Equals(cell, "37947")
+	fvc.Equals(cell, "37948")
 
 	cell.NumFmt = "#,##0" // For the time being we're not doing
 	// this comma formatting, so it'll fall back to the related
 	// non-comma form.
-	fvc.Equals(cell, "37947")
+	fvc.Equals(cell, "37948")
 
 	cell.NumFmt = "#,##0.00;(#,##0.00)"
 	fvc.Equals(cell, "37947.75")
@@ -206,17 +311,17 @@ func (l *CellSuite) TestFormattedValue(c *C) {
 	fvc.Equals(cell, "37947.75")
 
 	cell.NumFmt = "#,##0 ;(#,##0)"
-	fvc.Equals(cell, "37947")
+	fvc.Equals(cell, "37948")
 	negativeCell.NumFmt = "#,##0 ;(#,##0)"
-	fvc.Equals(negativeCell, "(37947)")
+	fvc.Equals(negativeCell, "(37948)")
 
 	cell.NumFmt = "#,##0 ;[red](#,##0)"
-	fvc.Equals(cell, "37947")
+	fvc.Equals(cell, "37948")
 	negativeCell.NumFmt = "#,##0 ;[red](#,##0)"
-	fvc.Equals(negativeCell, "(37947)")
+	fvc.Equals(negativeCell, "(37948)")
 
 	negativeCell.NumFmt = "#,##0.00;(#,##0.00)"
-	fvc.Equals(negativeCell, "(-37947.75)")
+	fvc.Equals(negativeCell, "(37947.75)")
 
 	cell.NumFmt = "0%"
 	fvc.Equals(cell, "3794775%")
@@ -546,7 +651,7 @@ func (s *CellSuite) TestIsTimeFormat(c *C) {
 	c.Assert(isTimeFormat("a/p"), Equals, true)
 	c.Assert(isTimeFormat("ss"), Equals, true)
 	c.Assert(isTimeFormat("mm"), Equals, true)
-	c.Assert(isTimeFormat(":"), Equals, true)
+	c.Assert(isTimeFormat(":"), Equals, false)
 	c.Assert(isTimeFormat("z"), Equals, false)
 }
 

+ 1 - 0
col.go

@@ -11,6 +11,7 @@ type Col struct {
 	Collapsed    bool
 	OutlineLevel uint8
 	numFmt       string
+	parsedNumFmt *parsedNumberFormat
 	style        *Style
 }
 

+ 26 - 6
file.go

@@ -25,6 +25,8 @@ type File struct {
 	DefinedNames   []*xlsxDefinedName
 }
 
+const NoRowLimit int = -1
+
 // Create a new File
 func NewFile() *File {
 	return &File{
@@ -36,31 +38,49 @@ func NewFile() *File {
 
 // OpenFile() take the name of an XLSX file and returns a populated
 // xlsx.File struct for it.
-func OpenFile(filename string) (file *File, err error) {
-	var f *zip.ReadCloser
-	f, err = zip.OpenReader(filename)
+func OpenFile(fileName string) (file *File, err error) {
+	return OpenFileWithRowLimit(fileName, NoRowLimit)
+}
+
+// OpenFileWithRowLimit() will open the file, but will only read the specified number of rows.
+// If you save this file, it will be truncated to the number of rows specified.
+func OpenFileWithRowLimit(fileName string, rowLimit int) (file *File, err error) {
+	var z *zip.ReadCloser
+	z, err = zip.OpenReader(fileName)
 	if err != nil {
 		return nil, err
 	}
-	file, err = ReadZip(f)
+	file, err = ReadZipWithRowLimit(z, rowLimit)
 	return
 }
 
 // OpenBinary() take bytes of an XLSX file and returns a populated
 // xlsx.File struct for it.
 func OpenBinary(bs []byte) (*File, error) {
+	return OpenBinaryWithRowLimit(bs, NoRowLimit)
+}
+
+// OpenBinaryWithRowLimit() take bytes of an XLSX file and returns a populated
+// xlsx.File struct for it.
+func OpenBinaryWithRowLimit(bs []byte, rowLimit int) (*File, error) {
 	r := bytes.NewReader(bs)
-	return OpenReaderAt(r, int64(r.Len()))
+	return OpenReaderAtWithRowLimit(r, int64(r.Len()), rowLimit)
 }
 
 // OpenReaderAt() take io.ReaderAt of an XLSX file and returns a populated
 // xlsx.File struct for it.
 func OpenReaderAt(r io.ReaderAt, size int64) (*File, error) {
+	return OpenReaderAtWithRowLimit(r, size, NoRowLimit)
+}
+
+// OpenReaderAtWithRowLimit() take io.ReaderAt of an XLSX file and returns a populated
+// xlsx.File struct for it.
+func OpenReaderAtWithRowLimit(r io.ReaderAt, size int64, rowLimit int) (*File, error) {
 	file, err := zip.NewReader(r, size)
 	if err != nil {
 		return nil, err
 	}
-	return ReadZipReader(file)
+	return ReadZipReaderWithRowLimit(file, rowLimit)
 }
 
 // A convenient wrapper around File.ToSlice, FileToSlice will

+ 104 - 3
file_test.go

@@ -2,11 +2,49 @@ package xlsx
 
 import (
 	"encoding/xml"
+	"io"
+	"os"
 	"path/filepath"
 
 	. "gopkg.in/check.v1"
 )
 
+// ReaderAtCounter wraps a ReaderAt and counts the number of bytes that are read out of it
+type ReaderAtCounter struct {
+	readerAt  io.ReaderAt
+	bytesRead int
+}
+
+var _ io.ReaderAt = &ReaderAtCounter{}
+
+// NewReaderAtCounter creates a ReaderAtCounter by opening the file name, and provides the size which is needed for
+// opening as XLSX.
+func NewReaderAtCounter(name string) (*ReaderAtCounter, int64, error) {
+	f, err := os.Open(name)
+	if err != nil {
+		return nil, -1, err
+	}
+	fi, err := f.Stat()
+	if err != nil {
+		f.Close()
+		return nil, -1, err
+	}
+	readerAtCounter := &ReaderAtCounter{
+		readerAt: f,
+	}
+	return readerAtCounter, fi.Size(), nil
+}
+
+func (r *ReaderAtCounter) ReadAt(p []byte, off int64) (n int, err error) {
+	n, err = r.readerAt.ReadAt(p, off)
+	r.bytesRead += n
+	return n, err
+}
+
+func (r *ReaderAtCounter) GetBytesRead() int {
+	return r.bytesRead
+}
+
 type FileSuite struct{}
 
 var _ = Suite(&FileSuite{})
@@ -15,13 +53,76 @@ var _ = Suite(&FileSuite{})
 // struct.
 func (l *FileSuite) TestOpenFile(c *C) {
 	var xlsxFile *File
-	var error error
+	var err error
 
-	xlsxFile, error = OpenFile("./testdocs/testfile.xlsx")
-	c.Assert(error, IsNil)
+	xlsxFile, err = OpenFile("./testdocs/testfile.xlsx")
+	c.Assert(err, IsNil)
 	c.Assert(xlsxFile, NotNil)
 }
 
+func (l *FileSuite) TestPartialReadsWithFewSharedStringsOnlyPartiallyReads(c *C) {
+	// This test verifies that a large file is only partially read when using a small row limit.
+	// This file is 11,228,530 bytes, but only 14,020 bytes get read out when using a row limit of 10.
+	// I'm specifying a limit of 20,000 to prevent test flakiness if the bytes read fluctuates with small code changes.
+	rowLimit := 10
+	// It is possible that readLimit will need to be increased by a small amount in the future, but do not increase it
+	// to anywhere near a significant amount of 11 million. We're testing that this number is low, to ensure that partial
+	// reads are fast.
+	readLimit := 20 * 1000
+	reader, size, err := NewReaderAtCounter("testdocs/large_sheet_no_shared_strings_no_dimension_tag.xlsx")
+	if err != nil {
+		c.Fatal(err)
+	}
+	file, err := OpenReaderAtWithRowLimit(reader, size, rowLimit)
+	if reader.bytesRead > readLimit {
+		// If this test begins failing, do not increase readLimit dramatically. Instead investigate why the number of
+		// bytes read went up and fix this issue.
+		c.Errorf("Reading %v rows from a sheet with ~31,000 rows and few shared strings read %v bytes, must read less than %v bytes", rowLimit, reader.bytesRead, readLimit)
+	}
+	if len(file.Sheets[0].Rows) != rowLimit {
+		c.Errorf("Expected sheet to have %v rows, but found %v rows", rowLimit, len(file.Sheets[0].Rows))
+	}
+}
+
+func (l *FileSuite) TestPartialReadsWithLargeSharedStringsOnlyPartiallyReads(c *C) {
+	// This test verifies that a large file is only partially read when using a small row limit.
+	// This file is 7,055,632 bytes, but only 1,092,839 bytes get read out when using a row limit of 10.
+	// I'm specifying a limit of 1.2 MB to prevent test flakiness if the bytes read fluctuates with small code changes.
+	// The reason that this test has a much larger limit than TestPartialReadsWithFewSharedStringsOnlyPartiallyReads
+	// is that this file has a Shared Strings file that is a little over 1 MB.
+	rowLimit := 10
+	// It is possible that readLimit will need to be increased by a small amount in the future, but do not increase it
+	// to anywhere near a significant amount of 7 million. We're testing that this number is low, to ensure that partial
+	// reads are fast.
+	readLimit := int(1.2 * 1000 * 1000)
+	reader, size, err := NewReaderAtCounter("testdocs/large_sheet_large_sharedstrings_dimension_tag.xlsx")
+	if err != nil {
+		c.Fatal(err)
+	}
+	file, err := OpenReaderAtWithRowLimit(reader, size, rowLimit)
+	if reader.bytesRead > readLimit {
+		// If this test begins failing, do not increase readLimit dramatically. Instead investigate why the number of
+		// bytes read went up and fix this issue.
+		c.Errorf("Reading %v rows from a sheet with ~31,000 rows and a large shared strings read %v bytes, must read less than %v bytes", rowLimit, reader.bytesRead, readLimit)
+	}
+	// This is testing that the sheet was truncated, but it is also testing that the dimension tag was ignored.
+	// If the dimension tag is not correctly ignored, there will be 10 rows of the data, plus ~31k empty rows tacked on.
+	if len(file.Sheets[0].Rows) != rowLimit {
+		c.Errorf("Expected sheet to have %v rows, but found %v rows", rowLimit, len(file.Sheets[0].Rows))
+	}
+}
+
+func (l *FileSuite) TestPartialReadsWithFewerRowsThanRequested(c *C) {
+	rowLimit := 10
+	file, err := OpenFileWithRowLimit("testdocs/testfile.xlsx", rowLimit)
+	if err != nil {
+		c.Fatal(err)
+	}
+	if len(file.Sheets[0].Rows) != 2 {
+		c.Errorf("Expected sheet to have %v rows, but found %v rows", 2, len(file.Sheets[0].Rows))
+	}
+}
+
 func (l *FileSuite) TestOpenFileWithoutStyleAndSharedStrings(c *C) {
 	var xlsxFile *File
 	var error error

+ 561 - 0
format_code.go

@@ -0,0 +1,561 @@
+package xlsx
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"strconv"
+	"strings"
+)
+
+// Do not edit these attributes once this struct is created. This struct should only be created by
+// parseFullNumberFormatString() from a number format string. If the format for a cell needs to change, change
+// the number format string and getNumberFormat() will invalidate the old struct and re-parse the string.
+type parsedNumberFormat struct {
+	numFmt                        string
+	isTimeFormat                  bool
+	negativeFormatExpectsPositive bool
+	positiveFormat                *formatOptions
+	negativeFormat                *formatOptions
+	zeroFormat                    *formatOptions
+	textFormat                    *formatOptions
+	parseEncounteredError         bool
+}
+
+type formatOptions struct {
+	isTimeFormat        bool
+	showPercent         bool
+	fullFormatString    string
+	reducedFormatString string
+	prefix              string
+	suffix              string
+}
+
+func (fullFormat *parsedNumberFormat) FormatValue(cell *Cell) (string, error) {
+	if cell.cellType != CellTypeNumeric {
+		textFormat := cell.parsedNumFmt.textFormat
+		// This switch statement is only for String formats
+		switch textFormat.reducedFormatString {
+		case builtInNumFmt[builtInNumFmtIndex_GENERAL]: // General is literally "general"
+			return cell.Value, nil
+		case builtInNumFmt[builtInNumFmtIndex_STRING]: // String is "@"
+			return textFormat.prefix + cell.Value + textFormat.suffix, nil
+		case "":
+			return textFormat.prefix + textFormat.suffix, nil
+		default:
+			return cell.Value, errors.New("invalid or unsupported format")
+		}
+	}
+	if fullFormat.isTimeFormat {
+		return fullFormat.parseTime(cell.Value, cell.date1904)
+	}
+	var numberFormat *formatOptions
+	floatVal, floatErr := strconv.ParseFloat(cell.Value, 64)
+	if floatErr != nil {
+		return cell.Value, floatErr
+	}
+	if floatVal > 0 {
+		numberFormat = fullFormat.positiveFormat
+	} else if floatVal < 0 {
+		if fullFormat.negativeFormatExpectsPositive {
+			floatVal = math.Abs(floatVal)
+		}
+		numberFormat = fullFormat.negativeFormat
+	} else {
+		numberFormat = fullFormat.zeroFormat
+	}
+
+	if numberFormat.showPercent {
+		floatVal = 100 * floatVal
+	}
+
+	// Only the most common format strings are supported here.
+	// Eventually this switch needs to be replaced with a more general solution.
+	// Some of these "supported" formats should have thousand separators, but don't get them since Go fmt
+	// doesn't have a way to request thousands separators.
+	// The only things that should be supported here are in the array formattingCharacters,
+	// everything else has been stripped out before.
+	// The formatting characters can have non-formatting characters mixed in with them and those should be maintained.
+	// However, at this time we fail to parse those formatting codes and they get replaced with "General"
+
+	// This switch statement is only for number formats
+	var formattedNum string
+	switch numberFormat.reducedFormatString {
+	case builtInNumFmt[builtInNumFmtIndex_GENERAL]: // General is literally "general"
+		// prefix, showPercent, and suffix cannot apply to the general format
+		// The logic for showing numbers when the format is "general" is much more complicated than the rest of these.
+		val, err := generalNumericScientific(cell.Value, true)
+		if err != nil {
+			return cell.Value, nil
+		}
+		return val, nil
+	case builtInNumFmt[builtInNumFmtIndex_STRING]: // String is "@"
+		formattedNum = cell.Value
+	case builtInNumFmt[builtInNumFmtIndex_INT], "#,##0": // Int is "0"
+		// Previously this case would cast to int and print with %d, but that will not round the value correctly.
+		formattedNum = fmt.Sprintf("%.0f", floatVal)
+	case "0.0", "#,##0.0":
+		formattedNum = fmt.Sprintf("%.1f", floatVal)
+	case builtInNumFmt[builtInNumFmtIndex_FLOAT], "#,##0.00": // Float is "0.00"
+		formattedNum = fmt.Sprintf("%.2f", floatVal)
+	case "0.000", "#,##0.000":
+		formattedNum = fmt.Sprintf("%.3f", floatVal)
+	case "0.0000", "#,##0.0000":
+		formattedNum = fmt.Sprintf("%.4f", floatVal)
+	case "0.00e+00", "##0.0e+0":
+		formattedNum = fmt.Sprintf("%e", floatVal)
+	case "":
+		// Do nothing.
+	default:
+		return cell.Value, nil
+	}
+	return numberFormat.prefix + formattedNum + numberFormat.suffix, nil
+}
+
+func generalNumericScientific(value string, allowScientific bool) (string, error) {
+	if strings.TrimSpace(value) == "" {
+		return "", nil
+	}
+	f, err := strconv.ParseFloat(value, 64)
+	if err != nil {
+		return value, err
+	}
+	if allowScientific {
+		absF := math.Abs(f)
+		// When using General format, numbers that are less than 1e-9 (0.000000001) and greater than or equal to
+		// 1e11 (100,000,000,000) should be shown in scientific notation.
+		// Numbers less than the number after zero, are assumed to be zero.
+		if (absF >= math.SmallestNonzeroFloat64 && absF < minNonScientificNumber) || absF >= maxNonScientificNumber {
+			return strconv.FormatFloat(f, 'E', -1, 64), nil
+		}
+	}
+	// This format (fmt="f", prec=-1) will prevent padding with zeros and will never switch to scientific notation.
+	// However, it will show more than 11 characters for very precise numbers, and this cannot be changed.
+	// You could also use fmt="g", prec=11, which doesn't pad with zeros and allows the correct precision,
+	// but it will use scientific notation on numbers less than 1e-4. That value is hardcoded in Go and cannot be
+	// configured or disabled.
+	return strconv.FormatFloat(f, 'f', -1, 64), nil
+}
+
+// Format strings are a little strange to compare because empty string needs to be taken as general, and general needs
+// to be compared case insensitively.
+func compareFormatString(fmt1, fmt2 string) bool {
+	if fmt1 == fmt2 {
+		return true
+	}
+	if fmt1 == "" || strings.EqualFold(fmt1, "general") {
+		fmt1 = "general"
+	}
+	if fmt2 == "" || strings.EqualFold(fmt2, "general") {
+		fmt2 = "general"
+	}
+	return fmt1 == fmt2
+}
+
+func parseFullNumberFormatString(numFmt string) *parsedNumberFormat {
+	parsedNumFmt := &parsedNumberFormat{
+		numFmt: numFmt,
+	}
+	if isTimeFormat(numFmt) {
+		// Time formats cannot have multiple groups separated by semicolons, there is only one format.
+		// Strings are unaffected by the time format.
+		parsedNumFmt.isTimeFormat = true
+		parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
+		return parsedNumFmt
+	}
+
+	var fmtOptions []*formatOptions
+	formats, err := splitFormatOnSemicolon(numFmt)
+	if err == nil {
+		for _, formatSection := range formats {
+			parsedFormat, err := parseNumberFormatSection(formatSection)
+			if err != nil {
+				// If an invalid number section is found, fall back to general
+				parsedFormat = fallbackErrorFormat
+				parsedNumFmt.parseEncounteredError = true
+			}
+			fmtOptions = append(fmtOptions, parsedFormat)
+		}
+	} else {
+		fmtOptions = append(fmtOptions, fallbackErrorFormat)
+		parsedNumFmt.parseEncounteredError = true
+	}
+	if len(fmtOptions) > 4 {
+		fmtOptions = []*formatOptions{fallbackErrorFormat}
+		parsedNumFmt.parseEncounteredError = true
+	}
+
+	if len(fmtOptions) == 1 {
+		// If there is only one option, it is used for all
+		parsedNumFmt.positiveFormat = fmtOptions[0]
+		parsedNumFmt.negativeFormat = fmtOptions[0]
+		parsedNumFmt.zeroFormat = fmtOptions[0]
+		if strings.Contains(fmtOptions[0].fullFormatString, "@") {
+			parsedNumFmt.textFormat = fmtOptions[0]
+		} else {
+			parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
+		}
+	} else if len(fmtOptions) == 2 {
+		// If there are two formats, the first is used for positive and zeros, the second gets used as a negative format,
+		// and strings are not formatted.
+		// When negative numbers now have their own format, they should become positive before having the format applied.
+		// The format will contain a negative sign if it is desired, but they may be colored red or wrapped in
+		// parenthesis instead.
+		parsedNumFmt.negativeFormatExpectsPositive = true
+		parsedNumFmt.positiveFormat = fmtOptions[0]
+		parsedNumFmt.negativeFormat = fmtOptions[1]
+		parsedNumFmt.zeroFormat = fmtOptions[0]
+		parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
+	} else if len(fmtOptions) == 3 {
+		// If there are three formats, the first is used for positive, the second gets used as a negative format,
+		// the third is for negative, and strings are not formatted.
+		parsedNumFmt.negativeFormatExpectsPositive = true
+		parsedNumFmt.positiveFormat = fmtOptions[0]
+		parsedNumFmt.negativeFormat = fmtOptions[1]
+		parsedNumFmt.zeroFormat = fmtOptions[2]
+		parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
+	} else {
+		// With four options, the first is positive, the second is negative, the third is zero, and the fourth is strings
+		// Negative numbers should be still become positive before having the negative formatting applied.
+		parsedNumFmt.negativeFormatExpectsPositive = true
+		parsedNumFmt.positiveFormat = fmtOptions[0]
+		parsedNumFmt.negativeFormat = fmtOptions[1]
+		parsedNumFmt.zeroFormat = fmtOptions[2]
+		parsedNumFmt.textFormat = fmtOptions[3]
+	}
+	return parsedNumFmt
+}
+
+// splitFormatOnSemicolon will split the format string into the format sections
+// This logic to split the different formats on semicolon is fully correct, and will skip all literal semicolons,
+// and will catch all breaking semicolons.
+func splitFormatOnSemicolon(format string) ([]string, error) {
+	var formats []string
+	prevIndex := 0
+	for i := 0; i < len(format); i++ {
+		if format[i] == ';' {
+			formats = append(formats, format[prevIndex:i])
+			prevIndex = i + 1
+		} else if format[i] == '\\' {
+			i++
+		} else if format[i] == '"' {
+			endQuoteIndex := strings.Index(format[i+1:], "\"")
+			if endQuoteIndex == -1 {
+				// This is an invalid format string, fall back to general
+				return nil, errors.New("invalid format string")
+			}
+			i += endQuoteIndex + 1
+		}
+	}
+	return append(formats, format[prevIndex:]), nil
+}
+
+var fallbackErrorFormat = &formatOptions{
+	fullFormatString:    "general",
+	reducedFormatString: "general",
+}
+
+// parseNumberFormatSection takes in individual format and parses out most of the options.
+// Some options are parsed, removed from the string, and set as settings on formatOptions.
+// There remainder of the format string is put in the reducedFormatString attribute, and supported values for these
+// are handled in a switch in the Cell.FormattedValue() function.
+// Ideally more and more of the format string would be parsed out here into settings until there is no remainder string
+// at all.
+// Features that this supports:
+// - Time formats are detected, and marked in the options. Time format strings are handled when doing the formatting.
+//   The logic to detect time formats is currently not correct, and can catch formats that are not time formats as well
+//   as miss formats that are time formats.
+// - Color formats are detected and removed.
+// - Currency annotations are handled properly.
+// - Literal strings wrapped in quotes are handled and put into prefix or suffix.
+// - Numbers that should be percent are detected and marked in the options.
+// - Conditionals are detected and removed, but they are not obeyed. The conditional groups will be used just like the
+//   positive;negative;zero;string format groups. Here is an example of a conditional format: "[Red][<=100];[Blue][>100]"
+// Decoding the actual number formatting portion is out of scope, that is placed into reducedFormatString and is used
+// when formatting the string. The string there will be reduced to only the things in the formattingCharacters array.
+// Everything not in that array has been parsed out and put into formatOptions.
+func parseNumberFormatSection(fullFormat string) (*formatOptions, error) {
+	reducedFormat := strings.TrimSpace(fullFormat)
+
+	// general is the only format that does not use the normal format symbols notations
+	if compareFormatString(reducedFormat, "general") {
+		return &formatOptions{
+			fullFormatString:    "general",
+			reducedFormatString: "general",
+		}, nil
+	}
+
+	prefix, reducedFormat, showPercent1, err := parseLiterals(reducedFormat)
+	if err != nil {
+		return nil, err
+	}
+
+	reducedFormat, suffixFormat := splitFormatAndSuffixFormat(reducedFormat)
+
+	suffix, remaining, showPercent2, err := parseLiterals(suffixFormat)
+	if err != nil {
+		return nil, err
+	}
+	if len(remaining) > 0 {
+		// This paradigm of codes consisting of literals, number formats, then more literals is not always correct, they can
+		// actually be intertwined. Though 99% of the time number formats will not do this.
+		// Excel uses this format string for Social Security Numbers: 000\-00\-0000
+		// and this for US phone numbers: [<=9999999]###\-####;\(###\)\ ###\-####
+		return nil, errors.New("invalid or unsupported format string")
+	}
+
+	return &formatOptions{
+		fullFormatString:    fullFormat,
+		isTimeFormat:        false,
+		reducedFormatString: reducedFormat,
+		prefix:              prefix,
+		suffix:              suffix,
+		showPercent:         showPercent1 || showPercent2,
+	}, nil
+}
+
+// formattingCharacters will be left in the reducedNumberFormat
+// It is important that these be looked for in order so that the slash cases are handled correctly.
+// / (slash) is a fraction format if preceded by 0, #, or ?, otherwise it is not a formatting character
+// E- E+ e- e+ are scientific notation, but E, e, -, + are not formatting characters independently
+// \ (back slash) makes the next character a literal (not formatting)
+// " Anything in double quotes is not a formatting character
+// _ (underscore) skips the width of the next character, so the next character cannot be formatting
+var formattingCharacters = []string{"0/", "#/", "?/", "E-", "E+", "e-", "e+", "0", "#", "?", ".", ",", "@", "*"}
+
+// The following are also time format characters, but since this is only used for detecting, not decoding, they are
+// redundant here: ee, gg, ggg, rr, ss, mm, hh, yyyy, dd, ddd, dddd, mm, mmm, mmmm, mmmmm, ss.0000, ss.000, ss.00, ss.0
+// The .00 type format is very tricky, because it only counts if it comes after ss or s or [ss] or [s]
+// .00 is actually a valid number format by itself.
+var timeFormatCharacters = []string{"m", "d", "yy", "h", "m", "AM/PM", "A/P", "am/pm", "a/p", "r", "g", "e", "b1", "b2", "[hh]", "[h]", "[mm]", "[m]",
+	"s.0000", "s.000", "s.00", "s.0", "s", "[ss].0000", "[ss].000", "[ss].00", "[ss].0", "[ss]", "[s].0000", "[s].000", "[s].00", "[s].0", "[s]"}
+
+func splitFormatAndSuffixFormat(format string) (string, string) {
+	var i int
+	for ; i < len(format); i++ {
+		curReducedFormat := format[i:]
+		var found bool
+		for _, special := range formattingCharacters {
+			if strings.HasPrefix(curReducedFormat, special) {
+				// Skip ahead if the special character was longer than length 1
+				i += len(special) - 1
+				found = true
+				break
+			}
+		}
+		if !found {
+			break
+		}
+	}
+	suffixFormat := format[i:]
+	format = format[:i]
+	return format, suffixFormat
+}
+
+func parseLiterals(format string) (string, string, bool, error) {
+	var prefix string
+	showPercent := false
+	for i := 0; i < len(format); i++ {
+		curReducedFormat := format[i:]
+		switch curReducedFormat[0] {
+		case '\\':
+			// If there is a slash, skip the next character, and add it to the prefix
+			if len(curReducedFormat) > 1 {
+				i++
+				prefix += curReducedFormat[1:2]
+			}
+		case '_':
+			// If there is an underscore, skip the next character, but don't add it to the prefix
+			if len(curReducedFormat) > 1 {
+				i++
+			}
+		case '*':
+			// Asterisks are used to repeat the next character to fill the full cell width.
+			// There isn't really a cell size in this context, so this will be ignored.
+		case '"':
+			// If there is a quote skip to the next quote, and add the quoted characters to the prefix
+			endQuoteIndex := strings.Index(curReducedFormat[1:], "\"")
+			if endQuoteIndex == -1 {
+				return "", "", false, errors.New("invalid formatting code")
+			}
+			prefix = prefix + curReducedFormat[1:endQuoteIndex+1]
+			i += endQuoteIndex + 1
+		case '%':
+			showPercent = true
+			prefix += "%"
+		case '[':
+			// Brackets can be currency annotations (e.g. [$$-409])
+			// color formats (e.g. [color1] through [color56], as well as [red] etc.)
+			// conditionals (e.g. [>100], the valid conditionals are =, >, <, >=, <=, <>)
+			bracketIndex := strings.Index(curReducedFormat, "]")
+			if bracketIndex == -1 {
+				return "", "", false, errors.New("invalid formatting code")
+			}
+			// Currencies in Excel are annotated with this format: [$<Currency String>-<Language Info>]
+			// Currency String is something like $, ¥, €, or £
+			// Language Info is three hexadecimal characters
+			if len(curReducedFormat) > 2 && curReducedFormat[1] == '$' {
+				dashIndex := strings.Index(curReducedFormat, "-")
+				if dashIndex != -1 && dashIndex < bracketIndex {
+					// Get the currency symbol, and skip to the end of the currency format
+					prefix += curReducedFormat[2:dashIndex]
+				} else {
+					return "", "", false, errors.New("invalid formatting code")
+				}
+			}
+			i += bracketIndex
+		case '$', '-', '+', '/', '(', ')', ':', '!', '^', '&', '\'', '~', '{', '}', '<', '>', '=', ' ':
+			// These symbols are allowed to be used as literal without escaping
+			prefix += curReducedFormat[0:1]
+		default:
+			for _, special := range formattingCharacters {
+				if strings.HasPrefix(curReducedFormat, special) {
+					// This means we found the start of the actual number formatting portion, and should return.
+					return prefix, format[i:], showPercent, nil
+				}
+			}
+			// Symbols that don't have meaning and aren't in the exempt literal characters, but be escaped.
+			return "", "", false, errors.New("invalid formatting code")
+		}
+	}
+	return prefix, "", showPercent, nil
+}
+
+// parseTime returns a string parsed using time.Time
+func (fullFormat *parsedNumberFormat) parseTime(value string, date1904 bool) (string, error) {
+	f, err := strconv.ParseFloat(value, 64)
+	if err != nil {
+		return value, err
+	}
+	val := TimeFromExcelTime(f, date1904)
+	format := fullFormat.numFmt
+	// Replace Excel placeholders with Go time placeholders.
+	// For example, replace yyyy with 2006. These are in a specific order,
+	// due to the fact that m is used in month, minute, and am/pm. It would
+	// be easier to fix that with regular expressions, but if it's possible
+	// to keep this simple it would be easier to maintain.
+	// Full-length month and days (e.g. March, Tuesday) have letters in them that would be replaced
+	// by other characters below (such as the 'h' in March, or the 'd' in Tuesday) below.
+	// First we convert them to arbitrary characters unused in Excel Date formats, and then at the end,
+	// turn them to what they should actually be.
+	// Based off: http://www.ozgrid.com/Excel/CustomFormats.htm
+	replacements := []struct{ xltime, gotime string }{
+		{"yyyy", "2006"},
+		{"yy", "06"},
+		{"mmmm", "%%%%"},
+		{"dddd", "&&&&"},
+		{"dd", "02"},
+		{"d", "2"},
+		{"mmm", "Jan"},
+		{"mmss", "0405"},
+		{"ss", "05"},
+		{"mm:", "04:"},
+		{":mm", ":04"},
+		{"mm", "01"},
+		{"am/pm", "pm"},
+		{"m/", "1/"},
+		{"%%%%", "January"},
+		{"&&&&", "Monday"},
+	}
+	// It is the presence of the "am/pm" indicator that determins
+	// if this is a 12 hour or 24 hours time format, not the
+	// number of 'h' characters.
+	if is12HourTime(format) {
+		format = strings.Replace(format, "hh", "03", 1)
+		format = strings.Replace(format, "h", "3", 1)
+	} else {
+		format = strings.Replace(format, "hh", "15", 1)
+		format = strings.Replace(format, "h", "15", 1)
+	}
+	for _, repl := range replacements {
+		format = strings.Replace(format, repl.xltime, repl.gotime, 1)
+	}
+	// If the hour is optional, strip it out, along with the
+	// possible dangling colon that would remain.
+	if val.Hour() < 1 {
+		format = strings.Replace(format, "]:", "]", 1)
+		format = strings.Replace(format, "[03]", "", 1)
+		format = strings.Replace(format, "[3]", "", 1)
+		format = strings.Replace(format, "[15]", "", 1)
+	} else {
+		format = strings.Replace(format, "[3]", "3", 1)
+		format = strings.Replace(format, "[15]", "15", 1)
+	}
+	return val.Format(format), nil
+}
+
+// isTimeFormat checks whether an Excel format string represents a time.Time.
+// This function is now correct, but it can detect time format strings that cannot be correctly handled by parseTime()
+func isTimeFormat(format string) bool {
+	var foundTimeFormatCharacters bool
+	for i := 0; i < len(format); i++ {
+		curReducedFormat := format[i:]
+		switch curReducedFormat[0] {
+		case '\\', '_':
+			// If there is a slash, skip the next character, and add it to the prefix
+			// If there is an underscore, skip the next character, but don't add it to the prefix
+			if len(curReducedFormat) > 1 {
+				i++
+			}
+		case '*':
+			// Asterisks are used to repeat the next character to fill the full cell width.
+			// There isn't really a cell size in this context, so this will be ignored.
+		case '"':
+			// If there is a quote skip to the next quote, and add the quoted characters to the prefix
+			endQuoteIndex := strings.Index(curReducedFormat[1:], "\"")
+			if endQuoteIndex == -1 {
+				// This is not any type of valid format.
+				return false
+			}
+			i += endQuoteIndex + 1
+		case '$', '-', '+', '/', '(', ')', ':', '!', '^', '&', '\'', '~', '{', '}', '<', '>', '=', ' ':
+			// These symbols are allowed to be used as literal without escaping
+		case ',':
+			// This is not documented in the XLSX spec as far as I can tell, but Excel and Numbers will include
+			// commas in number formats without escaping them, so this should be supported.
+		default:
+			foundInThisLoop := false
+			for _, special := range timeFormatCharacters {
+				if strings.HasPrefix(curReducedFormat, special) {
+					foundTimeFormatCharacters = true
+					foundInThisLoop = true
+					i += len(special) - 1
+					break
+				}
+			}
+			if foundInThisLoop {
+				continue
+			}
+			if curReducedFormat[0] == '[' {
+				// For number formats, this code would happen above in a case '[': section.
+				// However, for time formats it must happen after looking for occurrences in timeFormatCharacters
+				// because there are a few time formats that can be wrapped in brackets.
+
+				// Brackets can be currency annotations (e.g. [$$-409])
+				// color formats (e.g. [color1] through [color56], as well as [red] etc.)
+				// conditionals (e.g. [>100], the valid conditionals are =, >, <, >=, <=, <>)
+				bracketIndex := strings.Index(curReducedFormat, "]")
+				if bracketIndex == -1 {
+					// This is not any type of valid format.
+					return false
+				}
+				i += bracketIndex
+				continue
+			}
+			// Symbols that don't have meaning, aren't in the exempt literal characters, and aren't escaped are invalid.
+			// The string could still be a valid number format string.
+			return false
+		}
+	}
+	// If the string doesn't have any time formatting characters, it could technically be a time format, but it
+	// would be a pretty weak time format. A valid time format with no time formatting symbols will also be a number
+	// format with no number formatting symbols, which is essentially a constant string that does not depend on the
+	// cell's value in anyway. The downstream logic will do the right thing in that case if this returns false.
+	return foundTimeFormatCharacters
+}
+
+// is12HourTime checks whether an Excel time format string is a 12
+// hours form.
+func is12HourTime(format string) bool {
+	return strings.Contains(format, "am/pm") || strings.Contains(format, "AM/PM") || strings.Contains(format, "a/p") || strings.Contains(format, "A/P")
+}

+ 242 - 0
format_code_test.go

@@ -0,0 +1,242 @@
+package xlsx
+
+import (
+	"time"
+
+	. "gopkg.in/check.v1"
+)
+
+func (s *CellSuite) TestMoreFormattingFeatures(c *C) {
+
+	cell := Cell{}
+	cell.SetFloat(0)
+	date, err := cell.GetTime(false)
+	c.Assert(err, Equals, nil)
+	c.Assert(date, Equals, time.Date(1899, 12, 30, 0, 0, 0, 0, time.UTC))
+	cell.SetFloat(39813.0)
+	date, err = cell.GetTime(true)
+	c.Assert(err, Equals, nil)
+	c.Assert(date, Equals, time.Date(2013, 1, 1, 0, 0, 0, 0, time.UTC))
+	cell.Value = "d"
+	_, err = cell.GetTime(false)
+	c.Assert(err, NotNil)
+}
+
+func (l *CellSuite) TestFormatStringSupport(c *C) {
+	testCases := []struct {
+		formatString         string
+		value                string
+		formattedValueOutput string
+		cellType             CellType
+		expectError          bool
+	}{
+		{
+			formatString:         `[red]0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `[blue]0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `[color50]0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `[$$-409]0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "$19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `[$¥-409]0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "¥19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `[$€-409]0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "€19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `[$£-409]0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "£19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `[$USD-409] 0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "USD 19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `0[$USD-409]`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "19USD",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `-[$USD-409]0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "-USD19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `\[0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "[19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `"["0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "[19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         "_[0",
+			value:                "18.989999999999998",
+			formattedValueOutput: "19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `"asdf"0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "asdf19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `"$"0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "$19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `$0`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "$19",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `%0`, // The percent sign can be anywhere in the format.
+			value:                "18.989999999999998",
+			formattedValueOutput: "%1899",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `$-+/()!^&'~{}<>=: 0 :=><}{~'&^)(/+-$`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "$-+/()!^&'~{}<>=: 19 :=><}{~'&^)(/+-$",
+			cellType:             CellTypeNumeric,
+		},
+		{
+			formatString:         `0;-0;"zero"`,
+			value:                "18.989999999999998",
+			formattedValueOutput: "19",
+			cellType:             CellTypeNumeric,
+		},
+		{ // 2 formats
+			formatString:         `0;(0)`,
+			value:                "0",
+			formattedValueOutput: "0",
+			cellType:             CellTypeNumeric,
+		},
+		{ // 2 formats
+			formatString:         `0;(0)`,
+			value:                "4.1",
+			formattedValueOutput: "4",
+			cellType:             CellTypeNumeric,
+		},
+		{ // 2 formats
+			formatString:         `0;(0)`,
+			value:                "-1",
+			formattedValueOutput: "(1)",
+			cellType:             CellTypeNumeric,
+		},
+		{ // 2 formats
+			formatString:         `0;(0)`,
+			value:                "asdf",
+			formattedValueOutput: "asdf",
+			cellType:             CellTypeNumeric,
+			expectError:          true,
+		},
+		{ // 2 formats
+			formatString:         `0;(0)`,
+			value:                "asdf",
+			formattedValueOutput: "asdf",
+			cellType:             CellTypeString,
+		},
+		{ // 3 formats
+			formatString:         `0;(0);"zero"`,
+			value:                "59.6",
+			formattedValueOutput: "60",
+			cellType:             CellTypeNumeric,
+		},
+		{ // 3 formats
+			formatString:         `0;(0);"zero"`,
+			value:                "-39",
+			formattedValueOutput: "(39)",
+			cellType:             CellTypeNumeric,
+		},
+		{ // 3 formats
+			formatString:         `0;(0);"zero"`,
+			value:                "0",
+			formattedValueOutput: "zero",
+			cellType:             CellTypeNumeric,
+		},
+		{ // 3 formats
+			formatString:         `0;(0);"zero"`,
+			value:                "asdf",
+			formattedValueOutput: "asdf",
+			cellType:             CellTypeNumeric,
+			expectError:          true,
+		},
+		{ // 3 formats
+			formatString:         `0;(0);"zero"`,
+			value:                "asdf",
+			formattedValueOutput: "asdf",
+			cellType:             CellTypeString,
+		},
+		{ // 4 formats, also note that the case of the format is maintained. Format codes should not be lower cased.
+			formatString:         `0;(0);"zero";"Behold: "@`,
+			value:                "asdf",
+			formattedValueOutput: "Behold: asdf",
+			cellType:             CellTypeString,
+		},
+		{ // 4 formats
+			formatString:         `0;(0);"zero";"Behold": @`,
+			value:                "asdf",
+			formattedValueOutput: "Behold: asdf",
+			cellType:             CellTypeString,
+		},
+		{ // 4 formats. This format contains an extra
+			formatString:         `0;(0);"zero";"Behold; "@`,
+			value:                "asdf",
+			formattedValueOutput: "Behold; asdf",
+			cellType:             CellTypeString,
+		},
+	}
+	for _, testCase := range testCases {
+		cell := &Cell{
+			cellType: testCase.cellType,
+			NumFmt:   testCase.formatString,
+			Value:    testCase.value,
+		}
+		val, err := cell.FormattedValue()
+		if err != nil != testCase.expectError {
+			c.Fatal(err, testCase)
+		}
+		if val != testCase.formattedValueOutput {
+			c.Fatalf("Expected %v but got %v", testCase.formattedValueOutput, val)
+		}
+	}
+}

+ 84 - 21
lib.go

@@ -2,6 +2,7 @@ package xlsx
 
 import (
 	"archive/zip"
+	"bytes"
 	"encoding/xml"
 	"errors"
 	"fmt"
@@ -11,6 +12,10 @@ import (
 	"strings"
 )
 
+const (
+	sheetEnding = `</sheetData></worksheet>`
+)
+
 // XLSXReaderError is the standard error type for otherwise undefined
 // errors in the XSLX reading process.
 type XLSXReaderError struct {
@@ -25,7 +30,7 @@ func (e *XLSXReaderError) Error() string {
 
 // getRangeFromString is an internal helper function that converts
 // XLSX internal range syntax to a pair of integers.  For example,
-// the range string "1:3" yield the upper and lower intergers 1 and 3.
+// the range string "1:3" yield the upper and lower integers 1 and 3.
 func getRangeFromString(rangeString string) (lower int, upper int, error error) {
 	var parts []string
 	parts = strings.SplitN(rangeString, ":", 2)
@@ -46,9 +51,9 @@ func getRangeFromString(rangeString string) (lower int, upper int, error error)
 	return lower, upper, error
 }
 
-// lettersToNumeric is used to convert a character based column
+// ColLettersToIndex is used to convert a character based column
 // reference to a zero based numeric column identifier.
-func lettersToNumeric(letters string) int {
+func ColLettersToIndex(letters string) int {
 	sum, mul, n := 0, 1, 0
 	for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 {
 		c := letters[i]
@@ -134,9 +139,9 @@ func intToBase26(x int) (parts []int) {
 	return parts
 }
 
-// numericToLetters is used to convert a zero based, numeric column
+// ColIndexToLetters is used to convert a zero based, numeric column
 // indentifier into a character code.
-func numericToLetters(colRef int) string {
+func ColIndexToLetters(colRef int) string {
 	parts := intToBase26(colRef)
 	return formatColumnName(smooshBase26Slice(parts))
 }
@@ -172,14 +177,14 @@ func GetCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
 		return x, y, error
 	}
 	y -= 1 // Zero based
-	x = lettersToNumeric(letterPart)
+	x = ColLettersToIndex(letterPart)
 	return x, y, error
 }
 
 // GetCellIDStringFromCoords returns the Excel format cell name that
 // represents a pair of zero based cartesian coordinates.
 func GetCellIDStringFromCoords(x, y int) string {
-	letterPart := numericToLetters(x)
+	letterPart := ColIndexToLetters(x)
 	numericPart := y + 1
 	return fmt.Sprintf("%s%d", letterPart, numericPart)
 }
@@ -205,6 +210,7 @@ func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err erro
 // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
 // that doesn't have a DimensionRef set.  The only case currently
 // known where this is true is with XLSX exported from Google Docs.
+// This is also true for XLSX files created through the streaming APIs.
 func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
 	// Note, this method could be very slow for large spreadsheets.
 	var x, y int
@@ -492,7 +498,7 @@ func fillCellDataFromInlineString(rawcell xlsxC, cell *Cell) {
 // rows from a XSLXWorksheet, populates them with Cells and resolves
 // the value references from the reference table and stores them in
 // the rows and columns.
-func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet) ([]*Row, []*Col, int, int) {
+func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLimit int) ([]*Row, []*Col, int, int) {
 	var rows []*Row
 	var cols []*Col
 	var row *Row
@@ -506,7 +512,7 @@ func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet) ([]*R
 		return nil, nil, 0, 0
 	}
 	reftable = file.referenceTable
-	if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, ":")) == 2 {
+	if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, ":")) == 2 && rowLimit == NoRowLimit {
 		minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
 	} else {
 		minCol, minRow, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
@@ -544,7 +550,7 @@ func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet) ([]*R
 				cols[i-1] = col
 				if file.styles != nil {
 					col.style = file.styles.getStyle(rawcol.Style)
-					col.numFmt = file.styles.getNumberFormat(rawcol.Style)
+					col.numFmt, col.parsedNumFmt = file.styles.getNumberFormat(rawcol.Style)
 				}
 			}
 		}
@@ -611,7 +617,7 @@ func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet) ([]*R
 				fillCellData(rawcell, reftable, sharedFormulas, cell)
 				if file.styles != nil {
 					cell.style = file.styles.getStyle(rawcell.S)
-					cell.NumFmt = file.styles.getNumberFormat(rawcell.S)
+					cell.NumFmt, cell.parsedNumFmt = file.styles.getNumberFormat(rawcell.S)
 				}
 				cell.date1904 = file.Date1904
 				// Cell is considered hidden if the row or the column of this cell is hidden
@@ -659,7 +665,7 @@ func readSheetViews(xSheetViews xlsxSheetViews) []SheetView {
 // into a Sheet struct.  This work can be done in parallel and so
 // readSheetsFromZipFile will spawn an instance of this function per
 // sheet and get the results back on the provided channel.
-func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) (errRes error) {
+func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string, rowLimit int) (errRes error) {
 	result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
 	defer func() {
 		if e := recover(); e != nil {
@@ -676,15 +682,15 @@ func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *F
 		}
 	}()
 
-	worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
-	if error != nil {
-		result.Error = error
+	worksheet, err := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap, rowLimit)
+	if err != nil {
+		result.Error = err
 		sc <- result
-		return error
+		return err
 	}
 	sheet := new(Sheet)
 	sheet.File = fi
-	sheet.Rows, sheet.Cols, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi, sheet)
+	sheet.Rows, sheet.Cols, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi, sheet, rowLimit)
 	sheet.Hidden = rsheet.State == sheetStateHidden || rsheet.State == sheetStateVeryHidden
 	sheet.SheetViews = readSheetViews(worksheet.SheetViews)
 
@@ -701,7 +707,7 @@ func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *F
 // readSheetsFromZipFile is an internal helper function that loops
 // over the Worksheets defined in the XSLXWorkbook and loads them into
 // Sheet objects stored in the Sheets slice of a xlsx.File struct.
-func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) (map[string]*Sheet, []*Sheet, error) {
+func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string, rowLimit int) (map[string]*Sheet, []*Sheet, error) {
 	var workbook *xlsxWorkbook
 	var err error
 	var rc io.ReadCloser
@@ -740,7 +746,7 @@ func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]strin
 		defer close(sheetChan)
 		err = nil
 		for i, rawsheet := range workbookSheets {
-			if err := readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap); err != nil {
+			if err := readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap, rowLimit); err != nil {
 				return
 			}
 		}
@@ -909,13 +915,28 @@ func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (WorkBookRels, err
 // xlsx.File struct populated with its contents.  In most cases
 // ReadZip is not used directly, but is called internally by OpenFile.
 func ReadZip(f *zip.ReadCloser) (*File, error) {
+	return ReadZipWithRowLimit(f, NoRowLimit)
+}
+
+// ReadZipWithRowLimit() takes a pointer to a zip.ReadCloser and returns a
+// xlsx.File struct populated with its contents.  In most cases
+// ReadZip is not used directly, but is called internally by OpenFile.
+func ReadZipWithRowLimit(f *zip.ReadCloser, rowLimit int) (*File, error) {
 	defer f.Close()
-	return ReadZipReader(&f.Reader)
+	return ReadZipReaderWithRowLimit(&f.Reader, rowLimit)
 }
 
 // ReadZipReader() can be used to read an XLSX in memory without
 // touching the filesystem.
 func ReadZipReader(r *zip.Reader) (*File, error) {
+	return ReadZipReaderWithRowLimit(r, NoRowLimit)
+}
+
+// ReadZipReaderWithRowLimit() can be used to read an XLSX in memory without
+// touching the filesystem.
+// rowLimit is the number of rows that should be read from the file. If rowLimit is -1, no limit is applied.
+// You can specify this with the constant NoRowLimit.
+func ReadZipReaderWithRowLimit(r *zip.Reader, rowLimit int) (*File, error) {
 	var err error
 	var file *File
 	var reftable *RefTable
@@ -986,7 +1007,7 @@ func ReadZipReader(r *zip.Reader) (*File, error) {
 
 		file.styles = style
 	}
-	sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
+	sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap, rowLimit)
 	if err != nil {
 		return nil, err
 	}
@@ -999,3 +1020,45 @@ func ReadZipReader(r *zip.Reader) (*File, error) {
 	file.Sheets = sheets
 	return file, nil
 }
+
+// truncateSheetXML will take in a reader to an XML sheet file and will return a reader that will read an equivalent
+// XML sheet file with only the number of rows specified. This greatly speeds up XML unmarshalling when only
+// a few rows need to be read from a large sheet.
+// When sheets are truncated, all formatting present after the sheetData tag will be lost, but all of this formatting
+// is related to printing and visibility, and is out of scope for most purposes of this library.
+func truncateSheetXML(r io.Reader, rowLimit int) (io.Reader, error) {
+	var rowCount int
+	var token xml.Token
+	var readErr error
+
+	output := new(bytes.Buffer)
+	r = io.TeeReader(r, output)
+	decoder := xml.NewDecoder(r)
+
+	for {
+		token, readErr = decoder.Token()
+		if readErr == io.EOF {
+			break
+		} else if readErr != nil {
+			return nil, readErr
+		}
+		end, ok := token.(xml.EndElement)
+		if ok && end.Name.Local == "row" {
+			rowCount++
+			if rowCount >= rowLimit {
+				break
+			}
+		}
+	}
+
+	offset := decoder.InputOffset()
+	output.Truncate(int(offset))
+
+	if readErr != io.EOF {
+		_, err := output.Write([]byte(sheetEnding))
+		if err != nil {
+			return nil, err
+		}
+	}
+	return output, nil
+}

+ 14 - 14
lib_test.go

@@ -141,7 +141,7 @@ func (l *LibSuite) TestLettersToNumeric(c *C) {
 		"BA": 52, "BZ": 77, "ZA": 26*26 + 0, "ZZ": 26*26 + 25,
 		"AAA": 26*26 + 26 + 0, "AMI": 1022}
 	for input, ans := range cases {
-		output := lettersToNumeric(input)
+		output := ColLettersToIndex(input)
 		c.Assert(output, Equals, ans)
 	}
 }
@@ -158,7 +158,7 @@ func (l *LibSuite) TestNumericToLetters(c *C) {
 		"ZZ":  26*26 + 25,
 		"AAA": 26*26 + 26 + 0, "AMI": 1022}
 	for ans, input := range cases {
-		output := numericToLetters(input)
+		output := ColIndexToLetters(input)
 		c.Assert(output, Equals, ans)
 	}
 
@@ -348,7 +348,7 @@ func (l *LibSuite) TestReadRowsFromSheet(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, cols, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet)
+	rows, cols, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxRows, Equals, 2)
 	c.Assert(maxCols, Equals, 2)
 	row := rows[0]
@@ -426,7 +426,7 @@ func (l *LibSuite) TestReadRowsFromSheetWithMergeCells(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, _, _, _ := readRowsFromSheet(worksheet, file, sheet)
+	rows, _, _, _ := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	row := rows[0] //
 	cell1 := row.Cells[0]
 	c.Assert(cell1.HMerge, Equals, 1)
@@ -503,7 +503,7 @@ func (l *LibSuite) TestReadRowsFromSheetBadR(c *C) {
 	sheet := new(Sheet)
 	// Discarding all return values; this test is a regression for
 	// a panic due to an "index out of range."
-	readRowsFromSheet(worksheet, file, sheet)
+	readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 }
 
 func (l *LibSuite) TestReadRowsFromSheetWithLeadingEmptyRows(c *C) {
@@ -549,7 +549,7 @@ func (l *LibSuite) TestReadRowsFromSheetWithLeadingEmptyRows(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet)
+	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxRows, Equals, 5)
 	c.Assert(maxCols, Equals, 1)
 
@@ -615,7 +615,7 @@ func (l *LibSuite) TestReadRowsFromSheetWithLeadingEmptyCols(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, cols, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet)
+	rows, cols, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxRows, Equals, 2)
 	c.Assert(maxCols, Equals, 4)
 
@@ -754,7 +754,7 @@ func (l *LibSuite) TestReadRowsFromSheetWithEmptyCells(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, cols, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet)
+	rows, cols, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxRows, Equals, 3)
 	c.Assert(maxCols, Equals, 3)
 
@@ -798,7 +798,7 @@ func (l *LibSuite) TestReadRowsFromSheetWithTrailingEmptyCells(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, _, maxCol, maxRow := readRowsFromSheet(worksheet, file, sheet)
+	rows, _, maxCol, maxRow := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxCol, Equals, 4)
 	c.Assert(maxRow, Equals, 8)
 
@@ -908,7 +908,7 @@ func (l *LibSuite) TestReadRowsFromSheetWithMultipleSpans(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet)
+	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxRows, Equals, 2)
 	c.Assert(maxCols, Equals, 4)
 	row := rows[0]
@@ -983,7 +983,7 @@ func (l *LibSuite) TestReadRowsFromSheetWithMultipleTypes(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet)
+	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxRows, Equals, 1)
 	c.Assert(maxCols, Equals, 6)
 	row := rows[0]
@@ -1056,7 +1056,7 @@ func (l *LibSuite) TestReadRowsFromSheetWithHiddenColumn(c *C) {
 	file := new(File)
 	file.referenceTable = MakeSharedStringRefTable(sst)
 	sheet := new(Sheet)
-	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet)
+	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxRows, Equals, 1)
 	c.Assert(maxCols, Equals, 2)
 	row := rows[0]
@@ -1192,7 +1192,7 @@ func (l *LibSuite) TestSharedFormulas(c *C) {
 
 	file := new(File)
 	sheet := new(Sheet)
-	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet)
+	rows, _, maxCols, maxRows := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	c.Assert(maxCols, Equals, 3)
 	c.Assert(maxRows, Equals, 2)
 
@@ -1331,7 +1331,7 @@ func (l *LibSuite) TestRowNotOverwrittenWhenFollowedByEmptyRow(c *C) {
 	file.referenceTable = MakeSharedStringRefTable(sst)
 
 	sheet := new(Sheet)
-	rows, _, _, _ := readRowsFromSheet(worksheet, file, sheet)
+	rows, _, _, _ := readRowsFromSheet(worksheet, file, sheet, NoRowLimit)
 	cells := rows[3].Cells
 
 	c.Assert(cells, HasLen, 1)

+ 8 - 9
sheet.go

@@ -129,7 +129,7 @@ func (s *Sheet) handleMerged() {
 	for r, row := range s.Rows {
 		for c, cell := range row.Cells {
 			if cell.HMerge > 0 || cell.VMerge > 0 {
-				coord := fmt.Sprintf("%s%d", numericToLetters(c), r+1)
+				coord := GetCellIDStringFromCoords(c, r)
 				merged[coord] = cell
 			}
 		}
@@ -280,7 +280,7 @@ func (s *Sheet) makeXLSXSheet(refTable *RefTable, styles *xlsxStyleSheet) *xlsxW
 			style := cell.style
 			if style != nil {
 				XfId = handleStyleForXLSX(style, xNumFmt.NumFmtId, styles)
-			} else if len(cell.NumFmt) > 0 && s.Cols[c].numFmt != cell.NumFmt {
+			} else if len(cell.NumFmt) > 0 && !compareFormatString(s.Cols[c].numFmt, cell.NumFmt) {
 				XfId = handleNumFmtIdForXLSX(xNumFmt.NumFmtId, styles)
 			}
 
@@ -288,7 +288,7 @@ func (s *Sheet) makeXLSXSheet(refTable *RefTable, styles *xlsxStyleSheet) *xlsxW
 				maxCell = c
 			}
 			xC := xlsxC{}
-			xC.R = fmt.Sprintf("%s%d", numericToLetters(c), r+1)
+			xC.R = GetCellIDStringFromCoords(c, r)
 			switch cell.cellType {
 			case CellTypeString:
 				if len(cell.Value) > 0 {
@@ -325,10 +325,10 @@ func (s *Sheet) makeXLSXSheet(refTable *RefTable, styles *xlsxStyleSheet) *xlsxW
 			if cell.HMerge > 0 || cell.VMerge > 0 {
 				// r == rownum, c == colnum
 				mc := xlsxMergeCell{}
-				start := fmt.Sprintf("%s%d", numericToLetters(c), r+1)
-				endcol := c + cell.HMerge
-				endrow := r + cell.VMerge + 1
-				end := fmt.Sprintf("%s%d", numericToLetters(endcol), endrow)
+				start := GetCellIDStringFromCoords(c, r)
+				endCol := c + cell.HMerge
+				endRow := r + cell.VMerge
+				end := GetCellIDStringFromCoords(endCol, endRow)
 				mc.Ref = start + ":" + end
 				if worksheet.MergeCells == nil {
 					worksheet.MergeCells = &xlsxMergeCells{}
@@ -356,8 +356,7 @@ func (s *Sheet) makeXLSXSheet(refTable *RefTable, styles *xlsxStyleSheet) *xlsxW
 
 	worksheet.SheetData = xSheet
 	dimension := xlsxDimension{}
-	dimension.Ref = fmt.Sprintf("A1:%s%d",
-		numericToLetters(maxCell), maxRow+1)
+	dimension.Ref = "A1:" + GetCellIDStringFromCoords(maxCell, maxRow)
 	if dimension.Ref == "A1:A1" {
 		dimension.Ref = "A1"
 	}

+ 6 - 7
sheet_test.go

@@ -77,20 +77,19 @@ func (s *SheetSuite) TestMakeXLSXSheetWithNumFormats(c *C) {
 
 	c.Assert(styles.CellStyleXfs, IsNil)
 
-	c.Assert(styles.CellXfs.Count, Equals, 5)
+	c.Assert(styles.CellXfs.Count, Equals, 4)
 	c.Assert(styles.CellXfs.Xf[0].NumFmtId, Equals, 0)
-	c.Assert(styles.CellXfs.Xf[1].NumFmtId, Equals, 0)
-	c.Assert(styles.CellXfs.Xf[2].NumFmtId, Equals, 1)
-	c.Assert(styles.CellXfs.Xf[3].NumFmtId, Equals, 14)
-	c.Assert(styles.CellXfs.Xf[4].NumFmtId, Equals, 164)
+	c.Assert(styles.CellXfs.Xf[1].NumFmtId, Equals, 1)
+	c.Assert(styles.CellXfs.Xf[2].NumFmtId, Equals, 14)
+	c.Assert(styles.CellXfs.Xf[3].NumFmtId, Equals, 164)
 	c.Assert(styles.NumFmts.Count, Equals, 1)
 	c.Assert(styles.NumFmts.NumFmt[0].NumFmtId, Equals, 164)
 	c.Assert(styles.NumFmts.NumFmt[0].FormatCode, Equals, "hh:mm:ss")
 
 	// Finally we check that the cell points to the right CellXf /
 	// CellStyleXf.
-	c.Assert(worksheet.SheetData.Row[0].C[0].S, Equals, 1)
-	c.Assert(worksheet.SheetData.Row[0].C[1].S, Equals, 2)
+	c.Assert(worksheet.SheetData.Row[0].C[0].S, Equals, 0)
+	c.Assert(worksheet.SheetData.Row[0].C[1].S, Equals, 1)
 }
 
 // When we create the xlsxSheet we also populate the xlsxStyles struct

+ 217 - 0
stream_file.go

@@ -0,0 +1,217 @@
+package xlsx
+
+import (
+	"archive/zip"
+	"encoding/xml"
+	"errors"
+	"io"
+	"strconv"
+)
+
+type StreamFile struct {
+	xlsxFile       *File
+	sheetXmlPrefix []string
+	sheetXmlSuffix []string
+	zipWriter      *zip.Writer
+	currentSheet   *streamSheet
+	styleIds       [][]int
+	err            error
+}
+
+type streamSheet struct {
+	// sheetIndex is the XLSX sheet index, which starts at 1
+	index int
+	// The number of rows that have been written to the sheet so far
+	rowCount int
+	// The number of columns in the sheet
+	columnCount int
+	// The writer to write to this sheet's file in the XLSX Zip file
+	writer   io.Writer
+	styleIds []int
+}
+
+var (
+	NoCurrentSheetError     = errors.New("no Current Sheet")
+	WrongNumberOfRowsError  = errors.New("invalid number of cells passed to Write. All calls to Write on the same sheet must have the same number of cells")
+	AlreadyOnLastSheetError = errors.New("NextSheet() called, but already on last sheet")
+)
+
+// Write will write a row of cells to the current sheet. Every call to Write on the same sheet must contain the
+// same number of cells as the header provided when the sheet was created or an error will be returned. This function
+// will always trigger a flush on success. Currently the only supported data type is string data.
+func (sf *StreamFile) Write(cells []string) error {
+	if sf.err != nil {
+		return sf.err
+	}
+	err := sf.write(cells)
+	if err != nil {
+		sf.err = err
+		return err
+	}
+	return sf.zipWriter.Flush()
+}
+
+func (sf *StreamFile) WriteAll(records [][]string) error {
+	if sf.err != nil {
+		return sf.err
+	}
+	for _, row := range records {
+		err := sf.write(row)
+		if err != nil {
+			sf.err = err
+			return err
+		}
+	}
+	return sf.zipWriter.Flush()
+}
+
+func (sf *StreamFile) write(cells []string) error {
+	if sf.currentSheet == nil {
+		return NoCurrentSheetError
+	}
+	if len(cells) != sf.currentSheet.columnCount {
+		return WrongNumberOfRowsError
+	}
+	sf.currentSheet.rowCount++
+	if err := sf.currentSheet.write(`<row r="` + strconv.Itoa(sf.currentSheet.rowCount) + `">`); err != nil {
+		return err
+	}
+	for colIndex, cellData := range cells {
+		// documentation for the c.t (cell.Type) attribute:
+		// b (Boolean): Cell containing a boolean.
+		// d (Date): Cell contains a date in the ISO 8601 format.
+		// e (Error): Cell containing an error.
+		// inlineStr (Inline String): Cell containing an (inline) rich string, i.e., one not in the shared string table.
+		// If this cell type is used, then the cell value is in the is element rather than the v element in the cell (c element).
+		// n (Number): Cell containing a number.
+		// s (Shared String): Cell containing a shared string.
+		// str (String): Cell containing a formula string.
+		cellCoordinate := GetCellIDStringFromCoords(colIndex, sf.currentSheet.rowCount-1)
+		cellType := "inlineStr"
+		cellOpen := `<c r="` + cellCoordinate + `" t="` + cellType + `"`
+		// Add in the style id if the cell isn't using the default style
+		if colIndex < len(sf.currentSheet.styleIds) && sf.currentSheet.styleIds[colIndex] != 0 {
+			cellOpen += ` s="` + strconv.Itoa(sf.currentSheet.styleIds[colIndex]) + `"`
+		}
+		cellOpen += `><is><t>`
+		cellClose := `</t></is></c>`
+
+		if err := sf.currentSheet.write(cellOpen); err != nil {
+			return err
+		}
+		if err := xml.EscapeText(sf.currentSheet.writer, []byte(cellData)); err != nil {
+			return err
+		}
+		if err := sf.currentSheet.write(cellClose); err != nil {
+			return err
+		}
+	}
+	if err := sf.currentSheet.write(`</row>`); err != nil {
+		return err
+	}
+	return sf.zipWriter.Flush()
+}
+
+// Error reports any error that has occurred during a previous Write or Flush.
+func (sf *StreamFile) Error() error {
+	return sf.err
+}
+
+func (sf *StreamFile) Flush() {
+	if sf.err != nil {
+		sf.err = sf.zipWriter.Flush()
+	}
+}
+
+// NextSheet will switch to the next sheet. Sheets are selected in the same order they were added.
+// Once you leave a sheet, you cannot return to it.
+func (sf *StreamFile) NextSheet() error {
+	if sf.err != nil {
+		return sf.err
+	}
+	var sheetIndex int
+	if sf.currentSheet != nil {
+		if sf.currentSheet.index >= len(sf.xlsxFile.Sheets) {
+			sf.err = AlreadyOnLastSheetError
+			return AlreadyOnLastSheetError
+		}
+		if err := sf.writeSheetEnd(); err != nil {
+			sf.currentSheet = nil
+			sf.err = err
+			return err
+		}
+		sheetIndex = sf.currentSheet.index
+	}
+	sheetIndex++
+	sf.currentSheet = &streamSheet{
+		index:       sheetIndex,
+		columnCount: len(sf.xlsxFile.Sheets[sheetIndex-1].Cols),
+		styleIds:    sf.styleIds[sheetIndex-1],
+		rowCount:    1,
+	}
+	sheetPath := sheetFilePathPrefix + strconv.Itoa(sf.currentSheet.index) + sheetFilePathSuffix
+	fileWriter, err := sf.zipWriter.Create(sheetPath)
+	if err != nil {
+		sf.err = err
+		return err
+	}
+	sf.currentSheet.writer = fileWriter
+
+	if err := sf.writeSheetStart(); err != nil {
+		sf.err = err
+		return err
+	}
+	return nil
+}
+
+// Close closes the Stream File.
+// Any sheets that have not yet been written to will have an empty sheet created for them.
+func (sf *StreamFile) Close() error {
+	if sf.err != nil {
+		return sf.err
+	}
+	// If there are sheets that have not been written yet, call NextSheet() which will add files to the zip for them.
+	// XLSX readers may error if the sheets registered in the metadata are not present in the file.
+	if sf.currentSheet != nil {
+		for sf.currentSheet.index < len(sf.xlsxFile.Sheets) {
+			if err := sf.NextSheet(); err != nil {
+				sf.err = err
+				return err
+			}
+		}
+		// Write the end of the last sheet.
+		if err := sf.writeSheetEnd(); err != nil {
+			sf.err = err
+			return err
+		}
+	}
+	err := sf.zipWriter.Close()
+	if err != nil {
+		sf.err = err
+	}
+	return err
+}
+
+// writeSheetStart will write the start of the Sheet's XML
+func (sf *StreamFile) writeSheetStart() error {
+	if sf.currentSheet == nil {
+		return NoCurrentSheetError
+	}
+	return sf.currentSheet.write(sf.sheetXmlPrefix[sf.currentSheet.index-1])
+}
+
+// writeSheetEnd will write the end of the Sheet's XML
+func (sf *StreamFile) writeSheetEnd() error {
+	if sf.currentSheet == nil {
+		return NoCurrentSheetError
+	}
+	if err := sf.currentSheet.write(endSheetDataTag); err != nil {
+		return err
+	}
+	return sf.currentSheet.write(sf.sheetXmlSuffix[sf.currentSheet.index-1])
+}
+
+func (ss *streamSheet) write(data string) error {
+	_, err := ss.writer.Write([]byte(data))
+	return err
+}

+ 245 - 0
stream_file_builder.go

@@ -0,0 +1,245 @@
+// Authors: Ryan Hollis (ryanh@)
+
+// The purpose of StreamFileBuilder and StreamFile is to allow streamed writing of XLSX files.
+// Directions:
+// 1. Create a StreamFileBuilder with NewStreamFileBuilder() or NewStreamFileBuilderForPath().
+// 2. Add the sheets and their first row of data by calling AddSheet().
+// 3. Call Build() to get a StreamFile. Once built, all functions on the builder will return an error.
+// 4. Write to the StreamFile with Write(). Writes begin on the first sheet. New rows are always written and flushed
+// to the io. All rows written to the same sheet must have the same number of cells as the header provided when the sheet
+// was created or an error will be returned.
+// 5. Call NextSheet() to proceed to the next sheet. Once NextSheet() is called, the previous sheet can not be edited.
+// 6. Call Close() to finish.
+
+// Future work suggestions:
+// Currently the only supported cell type is string, since the main reason this library was written was to prevent
+// strings from being interpreted as numbers. It would be nice to have support for numbers and money so that the exported
+// files could better take advantage of XLSX's features.
+// All text is written with the same text style. Support for additional text styles could be added to highlight certain
+// data in the file.
+// The current default style uses fonts that are not on Macs by default so opening the XLSX files in Numbers causes a
+// pop up that says there are missing fonts. The font could be changed to something that is usually found on Mac and PC.
+
+package xlsx
+
+import (
+	"archive/zip"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+	"strings"
+)
+
+type StreamFileBuilder struct {
+	built              bool
+	xlsxFile           *File
+	zipWriter          *zip.Writer
+	cellTypeToStyleIds map[CellType]int
+	maxStyleId         int
+	styleIds           [][]int
+}
+
+const (
+	sheetFilePathPrefix = "xl/worksheets/sheet"
+	sheetFilePathSuffix = ".xml"
+	endSheetDataTag     = "</sheetData>"
+	dimensionTag        = `<dimension ref="%s"></dimension>`
+	// This is the index of the max style that this library will insert into XLSX sheets by default.
+	// This allows us to predict what the style id of styles that we add will be.
+	// TestXlsxStyleBehavior tests that this behavior continues to be what we expect.
+	initMaxStyleId = 1
+)
+
+var BuiltStreamFileBuilderError = errors.New("StreamFileBuilder has already been built, functions may no longer be used")
+
+// NewStreamFileBuilder creates an StreamFileBuilder that will write to the the provided io.writer
+func NewStreamFileBuilder(writer io.Writer) *StreamFileBuilder {
+	return &StreamFileBuilder{
+		zipWriter:          zip.NewWriter(writer),
+		xlsxFile:           NewFile(),
+		cellTypeToStyleIds: make(map[CellType]int),
+		maxStyleId:         initMaxStyleId,
+	}
+}
+
+// NewStreamFileBuilderForPath takes the name of an XLSX file and returns a builder for it.
+// The file will be created if it does not exist, or truncated if it does.
+func NewStreamFileBuilderForPath(path string) (*StreamFileBuilder, error) {
+	file, err := os.Create(path)
+	if err != nil {
+		return nil, err
+	}
+	return NewStreamFileBuilder(file), nil
+}
+
+// AddSheet will add sheets with the given name with the provided headers. The headers cannot be edited later, and all
+// rows written to the sheet must contain the same number of cells as the header. Sheet names must be unique, or an
+// error will be thrown.
+func (sb *StreamFileBuilder) AddSheet(name string, headers []string, cellTypes []*CellType) error {
+	if sb.built {
+		return BuiltStreamFileBuilderError
+	}
+	if len(cellTypes) > len(headers) {
+		return errors.New("cellTypes is longer than headers")
+	}
+	sheet, err := sb.xlsxFile.AddSheet(name)
+	if err != nil {
+		// Set built on error so that all subsequent calls to the builder will also fail.
+		sb.built = true
+		return err
+	}
+	sb.styleIds = append(sb.styleIds, []int{})
+	row := sheet.AddRow()
+	if count := row.WriteSlice(&headers, -1); count != len(headers) {
+		// Set built on error so that all subsequent calls to the builder will also fail.
+		sb.built = true
+		return errors.New("failed to write headers")
+	}
+	for i, cellType := range cellTypes {
+		var cellStyleIndex int
+		var ok bool
+		if cellType != nil {
+			// The cell type is one of the attributes of a Style.
+			// Since it is the only attribute of Style that we use, we can assume that cell types
+			// map one to one with Styles and their Style ID.
+			// If a new cell type is used, a new style gets created with an increased id, if an existing cell type is
+			// used, the pre-existing style will also be used.
+			cellStyleIndex, ok = sb.cellTypeToStyleIds[*cellType]
+			if !ok {
+				sb.maxStyleId++
+				cellStyleIndex = sb.maxStyleId
+				sb.cellTypeToStyleIds[*cellType] = sb.maxStyleId
+			}
+			sheet.Cols[i].SetType(*cellType)
+		}
+		sb.styleIds[len(sb.styleIds)-1] = append(sb.styleIds[len(sb.styleIds)-1], cellStyleIndex)
+	}
+	return nil
+}
+
+// Build begins streaming the XLSX file to the io, by writing all the XLSX metadata. It creates a StreamFile struct
+// that can be used to write the rows to the sheets.
+func (sb *StreamFileBuilder) Build() (*StreamFile, error) {
+	if sb.built {
+		return nil, BuiltStreamFileBuilderError
+	}
+	sb.built = true
+	parts, err := sb.xlsxFile.MarshallParts()
+	if err != nil {
+		return nil, err
+	}
+	es := &StreamFile{
+		zipWriter:      sb.zipWriter,
+		xlsxFile:       sb.xlsxFile,
+		sheetXmlPrefix: make([]string, len(sb.xlsxFile.Sheets)),
+		sheetXmlSuffix: make([]string, len(sb.xlsxFile.Sheets)),
+		styleIds:       sb.styleIds,
+	}
+	for path, data := range parts {
+		// If the part is a sheet, don't write it yet. We only want to write the XLSX metadata files, since at this
+		// point the sheets are still empty. The sheet files will be written later as their rows come in.
+		if strings.HasPrefix(path, sheetFilePathPrefix) {
+			if err := sb.processEmptySheetXML(es, path, data); err != nil {
+				return nil, err
+			}
+			continue
+		}
+		metadataFile, err := sb.zipWriter.Create(path)
+		if err != nil {
+			return nil, err
+		}
+		_, err = metadataFile.Write([]byte(data))
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if err := es.NextSheet(); err != nil {
+		return nil, err
+	}
+	return es, nil
+}
+
+// processEmptySheetXML will take in the path and XML data of an empty sheet, and will save the beginning and end of the
+// XML file so that these can be written at the right time.
+func (sb *StreamFileBuilder) processEmptySheetXML(sf *StreamFile, path, data string) error {
+	// Get the sheet index from the path
+	sheetIndex, err := getSheetIndex(sf, path)
+	if err != nil {
+		return err
+	}
+
+	// Remove the Dimension tag. Since more rows are going to be written to the sheet, it will be wrong.
+	// It is valid to for a sheet to be missing a Dimension tag, but it is not valid for it to be wrong.
+	data, err = removeDimensionTag(data, sf.xlsxFile.Sheets[sheetIndex])
+	if err != nil {
+		return err
+	}
+
+	// Split the sheet at the end of its SheetData tag so that more rows can be added inside.
+	prefix, suffix, err := splitSheetIntoPrefixAndSuffix(data)
+	if err != nil {
+		return err
+	}
+	sf.sheetXmlPrefix[sheetIndex] = prefix
+	sf.sheetXmlSuffix[sheetIndex] = suffix
+	return nil
+}
+
+// getSheetIndex parses the path to the XLSX sheet data and returns the index
+// The files that store the data for each sheet must have the format:
+// xl/worksheets/sheet123.xml
+// where 123 is the index of the sheet. This file path format is part of the XLSX file standard.
+func getSheetIndex(sf *StreamFile, path string) (int, error) {
+	indexString := path[len(sheetFilePathPrefix) : len(path)-len(sheetFilePathSuffix)]
+	sheetXLSXIndex, err := strconv.Atoi(indexString)
+	if err != nil {
+		return -1, errors.New("Unexpected sheet file name from xlsx package")
+	}
+	if sheetXLSXIndex < 1 || len(sf.sheetXmlPrefix) < sheetXLSXIndex ||
+		len(sf.sheetXmlSuffix) < sheetXLSXIndex || len(sf.xlsxFile.Sheets) < sheetXLSXIndex {
+		return -1, errors.New("Unexpected sheet index")
+	}
+	sheetArrayIndex := sheetXLSXIndex - 1
+	return sheetArrayIndex, nil
+}
+
+// removeDimensionTag will return the passed in XLSX Spreadsheet XML with the dimension tag removed.
+// data is the XML data for the sheet
+// sheet is the Sheet struct that the XML was created from.
+// Can return an error if the XML's dimension tag does not match was is expected based on the provided Sheet
+func removeDimensionTag(data string, sheet *Sheet) (string, error) {
+	x := len(sheet.Cols) - 1
+	y := len(sheet.Rows) - 1
+	if x < 0 {
+		x = 0
+	}
+	if y < 0 {
+		y = 0
+	}
+	var dimensionRef string
+	if x == 0 && y == 0 {
+		dimensionRef = "A1"
+	} else {
+		endCoordinate := GetCellIDStringFromCoords(x, y)
+		dimensionRef = "A1:" + endCoordinate
+	}
+	dataParts := strings.Split(data, fmt.Sprintf(dimensionTag, dimensionRef))
+	if len(dataParts) != 2 {
+		return "", errors.New("unexpected Sheet XML: dimension tag not found")
+	}
+	return dataParts[0] + dataParts[1], nil
+}
+
+// splitSheetIntoPrefixAndSuffix will split the provided XML sheet into a prefix and a suffix so that
+// more spreadsheet rows can be inserted in between.
+func splitSheetIntoPrefixAndSuffix(data string) (string, string, error) {
+	// Split the sheet at the end of its SheetData tag so that more rows can be added inside.
+	sheetParts := strings.Split(data, endSheetDataTag)
+	if len(sheetParts) != 2 {
+		return "", "", errors.New("unexpected Sheet XML: SheetData close tag not found")
+	}
+	return sheetParts[0], sheetParts[1], nil
+}

+ 487 - 0
stream_test.go

@@ -0,0 +1,487 @@
+package xlsx
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"reflect"
+	"strings"
+
+	. "gopkg.in/check.v1"
+)
+
+const (
+	TestsShouldMakeRealFiles = false
+)
+
+type StreamSuite struct{}
+
+var _ = Suite(&SheetSuite{})
+
+func (s *StreamSuite) TestTestsShouldMakeRealFilesShouldBeFalse(t *C) {
+	if TestsShouldMakeRealFiles {
+		t.Fatal("TestsShouldMakeRealFiles should only be true for local debugging. Don't forget to switch back before commiting.")
+	}
+}
+
+func (s *StreamSuite) TestXlsxStreamWrite(t *C) {
+	// When shouldMakeRealFiles is set to true this test will make actual XLSX files in the file system.
+	// This is useful to ensure files open in Excel, Numbers, Google Docs, etc.
+	// In case of issues you can use "Open XML SDK 2.5" to diagnose issues in generated XLSX files:
+	// https://www.microsoft.com/en-us/download/details.aspx?id=30425
+	testCases := []struct {
+		testName      string
+		sheetNames    []string
+		workbookData  [][][]string
+		headerTypes   [][]*CellType
+		expectedError error
+	}{
+		{
+			testName: "One Sheet",
+			sheetNames: []string{
+				"Sheet1",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300", "0000000123"},
+				},
+			},
+			headerTypes: [][]*CellType{
+				{nil, CellTypeString.Ptr(), nil, CellTypeString.Ptr()},
+			},
+		},
+		{
+			testName: "One Column",
+			sheetNames: []string{
+				"Sheet1",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token"},
+					{"123"},
+				},
+			},
+		},
+		{
+			testName: "Several Sheets, with different numbers of columns and rows",
+			sheetNames: []string{
+				"Sheet 1", "Sheet 2", "Sheet3",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300", "0000000123"},
+				},
+				{
+					{"Token", "Name", "Price", "SKU", "Stock"},
+					{"456", "Salsa", "200", "0346", "1"},
+					{"789", "Burritos", "400", "754", "3"},
+				},
+				{
+					{"Token", "Name", "Price"},
+					{"9853", "Guacamole", "500"},
+					{"2357", "Margarita", "700"},
+				},
+			},
+		},
+		{
+			testName: "Two Sheets with same the name",
+			sheetNames: []string{
+				"Sheet 1", "Sheet 1",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300", "0000000123"},
+				},
+				{
+					{"Token", "Name", "Price", "SKU", "Stock"},
+					{"456", "Salsa", "200", "0346", "1"},
+					{"789", "Burritos", "400", "754", "3"},
+				},
+			},
+			expectedError: fmt.Errorf("duplicate sheet name '%s'.", "Sheet 1"),
+		},
+		{
+			testName: "One Sheet Registered, tries to write to two",
+			sheetNames: []string{
+				"Sheet 1",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300", "0000000123"},
+				},
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"456", "Salsa", "200", "0346"},
+				},
+			},
+			expectedError: AlreadyOnLastSheetError,
+		},
+		{
+			testName: "One Sheet, too many columns in row 1",
+			sheetNames: []string{
+				"Sheet 1",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300", "0000000123", "asdf"},
+				},
+			},
+			expectedError: WrongNumberOfRowsError,
+		},
+		{
+			testName: "One Sheet, too few columns in row 1",
+			sheetNames: []string{
+				"Sheet 1",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300"},
+				},
+			},
+			expectedError: WrongNumberOfRowsError,
+		},
+		{
+			testName: "Lots of Sheets, only writes rows to one, only writes headers to one, should not error and should still create a valid file",
+			sheetNames: []string{
+				"Sheet 1", "Sheet 2", "Sheet 3", "Sheet 4", "Sheet 5", "Sheet 6",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300", "0000000123"},
+				},
+				{{}},
+				{{"Id", "Unit Cost"}},
+				{{}},
+				{{}},
+				{{}},
+			},
+		},
+		{
+			testName: "Two Sheets, only writes to one, should not error and should still create a valid file",
+			sheetNames: []string{
+				"Sheet 1", "Sheet 2",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300", "0000000123"},
+				},
+				{{}},
+			},
+		},
+		{
+			testName: "Larger Sheet",
+			sheetNames: []string{
+				"Sheet 1",
+			},
+			workbookData: [][][]string{
+				{
+					{"Token", "Name", "Price", "SKU", "Token", "Name", "Price", "SKU", "Token", "Name", "Price", "SKU", "Token", "Name", "Price", "SKU", "Token", "Name", "Price", "SKU", "Token", "Name", "Price", "SKU"},
+					{"123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123"},
+					{"456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346"},
+					{"789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754"},
+					{"123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123"},
+					{"456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346"},
+					{"789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754"},
+					{"123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123"},
+					{"456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346"},
+					{"789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754"},
+					{"123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123"},
+					{"456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346"},
+					{"789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754"},
+					{"123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123"},
+					{"456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346"},
+					{"789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754"},
+					{"123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123"},
+					{"456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346"},
+					{"789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754"},
+					{"123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123"},
+					{"456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346"},
+					{"789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754"},
+					{"123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123", "123", "Taco", "300", "0000000123"},
+					{"456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346", "456", "Salsa", "200", "0346"},
+					{"789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754", "789", "Burritos", "400", "754"},
+				},
+			},
+		},
+		{
+			testName: "UTF-8 Characters. This XLSX File loads correctly with Excel, Numbers, and Google Docs. It also passes Microsoft's Office File Format Validator.",
+			sheetNames: []string{
+				"Sheet1",
+			},
+			workbookData: [][][]string{
+				{
+					// String courtesy of https://github.com/minimaxir/big-list-of-naughty-strings/
+					// Header row contains the tags that I am filtering on
+					{"Token", endSheetDataTag, "Price", fmt.Sprintf(dimensionTag, "A1:D1")},
+					// Japanese and emojis
+					{"123", "パーティーへ行かないか", "300", "🍕🐵 🙈 🙉 🙊"},
+					// XML encoder/parser test strings
+					{"123", `<?xml version="1.0" encoding="ISO-8859-1"?>`, "300", `<?xml version="1.0" encoding="ISO-8859-1"?><!DOCTYPE foo [ <!ELEMENT foo ANY ><!ENTITY xxe SYSTEM "file:///etc/passwd" >]><foo>&xxe;</foo>`},
+					// Upside down text and Right to Left Arabic text
+					{"123", `˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs 'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ 'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥
+					00˙Ɩ$-`, "300", `ﷺ`},
+					{"123", "Taco", "300", "0000000123"},
+				},
+			},
+		},
+	}
+	for i, testCase := range testCases {
+		var filePath string
+		var buffer bytes.Buffer
+		if TestsShouldMakeRealFiles {
+			filePath = fmt.Sprintf("Workbook%d.xlsx", i)
+		}
+		err := writeStreamFile(filePath, &buffer, testCase.sheetNames, testCase.workbookData, testCase.headerTypes, TestsShouldMakeRealFiles)
+		if err != testCase.expectedError && err.Error() != testCase.expectedError.Error() {
+			t.Fatalf("Error differs from expected error. Error: %v, Expected Error: %v ", err, testCase.expectedError)
+		}
+		if testCase.expectedError != nil {
+			return
+		}
+		// read the file back with the xlsx package
+		var bufReader *bytes.Reader
+		var size int64
+		if !TestsShouldMakeRealFiles {
+			bufReader = bytes.NewReader(buffer.Bytes())
+			size = bufReader.Size()
+		}
+		actualSheetNames, actualWorkbookData := readXLSXFile(t, filePath, bufReader, size, TestsShouldMakeRealFiles)
+		// check if data was able to be read correctly
+		if !reflect.DeepEqual(actualSheetNames, testCase.sheetNames) {
+			t.Fatal("Expected sheet names to be equal")
+		}
+		if !reflect.DeepEqual(actualWorkbookData, testCase.workbookData) {
+			t.Fatal("Expected workbook data to be equal")
+		}
+	}
+}
+
+// The purpose of TestXlsxStyleBehavior is to ensure that initMaxStyleId has the correct starting value
+// and that the logic in AddSheet() that predicts Style IDs is correct.
+func (s *StreamSuite) TestXlsxStyleBehavior(t *C) {
+	file := NewFile()
+	sheet, err := file.AddSheet("Sheet 1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	row := sheet.AddRow()
+	rowData := []string{"testing", "1", "2", "3"}
+	if count := row.WriteSlice(&rowData, -1); count != len(rowData) {
+		t.Fatal("not enough cells written")
+	}
+	parts, err := file.MarshallParts()
+	styleSheet, ok := parts["xl/styles.xml"]
+	if !ok {
+		t.Fatal("no style sheet")
+	}
+	// Created an XLSX file with only the default style.
+	// We expect that the number of styles is one more than our max index constant.
+	// This means the library adds two styles by default.
+	if !strings.Contains(styleSheet, fmt.Sprintf(`<cellXfs count="%d">`, initMaxStyleId+1)) {
+		t.Fatal("Expected sheet to have two styles")
+	}
+
+	file = NewFile()
+	sheet, err = file.AddSheet("Sheet 1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	row = sheet.AddRow()
+	rowData = []string{"testing", "1", "2", "3", "4"}
+	if count := row.WriteSlice(&rowData, -1); count != len(rowData) {
+		t.Fatal("not enough cells written")
+	}
+	sheet.Cols[0].SetType(CellTypeString)
+	sheet.Cols[1].SetType(CellTypeString)
+	sheet.Cols[3].SetType(CellTypeNumeric)
+	sheet.Cols[4].SetType(CellTypeString)
+	parts, err = file.MarshallParts()
+	styleSheet, ok = parts["xl/styles.xml"]
+	if !ok {
+		t.Fatal("no style sheet")
+	}
+	// Created an XLSX file with two distinct cell types, which should create two new styles.
+	// The same cell type was added three times, this should be coalesced into the same style rather than
+	// recreating the style. This XLSX stream library depends on this behavior when predicting the next style id.
+	if !strings.Contains(styleSheet, fmt.Sprintf(`<cellXfs count="%d">`, initMaxStyleId+1+2)) {
+		t.Fatal("Expected sheet to have four styles")
+	}
+}
+
+// writeStreamFile will write the file using this stream package
+func writeStreamFile(filePath string, fileBuffer io.Writer, sheetNames []string, workbookData [][][]string, headerTypes [][]*CellType, shouldMakeRealFiles bool) error {
+	var file *StreamFileBuilder
+	var err error
+	if shouldMakeRealFiles {
+		file, err = NewStreamFileBuilderForPath(filePath)
+		if err != nil {
+			return err
+		}
+	} else {
+		file = NewStreamFileBuilder(fileBuffer)
+	}
+	for i, sheetName := range sheetNames {
+		header := workbookData[i][0]
+		var sheetHeaderTypes []*CellType
+		if i < len(headerTypes) {
+			sheetHeaderTypes = headerTypes[i]
+		}
+		err := file.AddSheet(sheetName, header, sheetHeaderTypes)
+		if err != nil {
+			return err
+		}
+	}
+	streamFile, err := file.Build()
+	if err != nil {
+		return err
+	}
+	for i, sheetData := range workbookData {
+		if i != 0 {
+			err = streamFile.NextSheet()
+			if err != nil {
+				return err
+			}
+		}
+		for i, row := range sheetData {
+			if i == 0 {
+				continue
+			}
+			err = streamFile.Write(row)
+			if err != nil {
+				return err
+			}
+		}
+	}
+	err = streamFile.Close()
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// readXLSXFile will read the file using the xlsx package.
+func readXLSXFile(t *C, filePath string, fileBuffer io.ReaderAt, size int64, shouldMakeRealFiles bool) ([]string, [][][]string) {
+	var readFile *File
+	var err error
+	if shouldMakeRealFiles {
+		readFile, err = OpenFile(filePath)
+		if err != nil {
+			t.Fatal(err)
+		}
+	} else {
+		readFile, err = OpenReaderAt(fileBuffer, size)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+	var actualWorkbookData [][][]string
+	var sheetNames []string
+	for _, sheet := range readFile.Sheets {
+		sheetData := [][]string{}
+		for _, row := range sheet.Rows {
+			data := []string{}
+			for _, cell := range row.Cells {
+				str, err := cell.FormattedValue()
+				if err != nil {
+					t.Fatal(err)
+				}
+				data = append(data, str)
+			}
+			sheetData = append(sheetData, data)
+		}
+		sheetNames = append(sheetNames, sheet.Name)
+		actualWorkbookData = append(actualWorkbookData, sheetData)
+	}
+	return sheetNames, actualWorkbookData
+}
+
+func (s *StreamSuite) TestAddSheetErrorsAfterBuild(t *C) {
+	file := NewStreamFileBuilder(bytes.NewBuffer(nil))
+
+	err := file.AddSheet("Sheet1", []string{"Header"}, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = file.AddSheet("Sheet2", []string{"Header2"}, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = file.Build()
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = file.AddSheet("Sheet3", []string{"Header3"}, nil)
+	if err != BuiltStreamFileBuilderError {
+		t.Fatal(err)
+	}
+}
+
+func (s *StreamSuite) TestBuildErrorsAfterBuild(t *C) {
+	file := NewStreamFileBuilder(bytes.NewBuffer(nil))
+
+	err := file.AddSheet("Sheet1", []string{"Header"}, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = file.AddSheet("Sheet2", []string{"Header2"}, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = file.Build()
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = file.Build()
+	if err != BuiltStreamFileBuilderError {
+		t.Fatal(err)
+	}
+}
+
+func (s *StreamSuite) TestCloseWithNothingWrittenToSheets(t *C) {
+	buffer := bytes.NewBuffer(nil)
+	file := NewStreamFileBuilder(buffer)
+
+	sheetNames := []string{"Sheet1", "Sheet2"}
+	workbookData := [][][]string{
+		{{"Header1", "Header2"}},
+		{{"Header3", "Header4"}},
+	}
+	err := file.AddSheet(sheetNames[0], workbookData[0][0], nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = file.AddSheet(sheetNames[1], workbookData[1][0], nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	stream, err := file.Build()
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = stream.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+	bufReader := bytes.NewReader(buffer.Bytes())
+	size := bufReader.Size()
+
+	actualSheetNames, actualWorkbookData := readXLSXFile(t, "", bufReader, size, false)
+	// check if data was able to be read correctly
+	if !reflect.DeepEqual(actualSheetNames, sheetNames) {
+		t.Fatal("Expected sheet names to be equal")
+	}
+	if !reflect.DeepEqual(actualWorkbookData, workbookData) {
+		t.Fatal("Expected workbook data to be equal")
+	}
+}

BIN
testdocs/large_sheet_large_sharedstrings_dimension_tag.xlsx


BIN
testdocs/large_sheet_no_shared_strings_no_dimension_tag.xlsx


+ 39 - 18
xmlStyle.go

@@ -12,7 +12,6 @@ import (
 	"encoding/xml"
 	"fmt"
 	"strconv"
-	"strings"
 	"sync"
 )
 
@@ -58,6 +57,19 @@ var builtInNumFmt = map[int]string{
 	49: "@",
 }
 
+// These are the color annotations from number format codes that contain color names.
+// Also possible are [color1] through [color56]
+var numFmtColorCodes = []string{
+	"[red]",
+	"[black]",
+	"[green]",
+	"[white]",
+	"[blue]",
+	"[magenta]",
+	"[yellow]",
+	"[cyan]",
+}
+
 var builtInNumFmtInv = make(map[string]int, 40)
 
 func init() {
@@ -91,9 +103,10 @@ type xlsxStyleSheet struct {
 
 	theme *theme
 
-	sync.RWMutex   // protects the following
-	styleCache     map[int]*Style
-	numFmtRefTable map[int]xlsxNumFmt
+	sync.RWMutex      // protects the following
+	styleCache        map[int]*Style
+	numFmtRefTable    map[int]xlsxNumFmt
+	parsedNumFmtTable map[string]*parsedNumberFormat
 }
 
 func newXlsxStyleSheet(t *theme) *xlsxStyleSheet {
@@ -220,22 +233,30 @@ func getBuiltinNumberFormat(numFmtId int) string {
 	return builtInNumFmt[numFmtId]
 }
 
-func (styles *xlsxStyleSheet) getNumberFormat(styleIndex int) string {
-	if styles.CellXfs.Xf == nil {
-		return ""
-	}
-	var numberFormat string = ""
-	if styleIndex > -1 && styleIndex <= styles.CellXfs.Count {
-		xf := styles.CellXfs.Xf[styleIndex]
-		if builtin := getBuiltinNumberFormat(xf.NumFmtId); builtin != "" {
-			return builtin
+func (styles *xlsxStyleSheet) getNumberFormat(styleIndex int) (string, *parsedNumberFormat) {
+	var numberFormat string = "general"
+	if styles.CellXfs.Xf != nil {
+		if styleIndex > -1 && styleIndex <= styles.CellXfs.Count {
+			xf := styles.CellXfs.Xf[styleIndex]
+			if builtin := getBuiltinNumberFormat(xf.NumFmtId); builtin != "" {
+				numberFormat = builtin
+			} else {
+				if styles.numFmtRefTable != nil {
+					numFmt := styles.numFmtRefTable[xf.NumFmtId]
+					numberFormat = numFmt.FormatCode
+				}
+			}
 		}
-		if styles.numFmtRefTable != nil {
-			numFmt := styles.numFmtRefTable[xf.NumFmtId]
-			numberFormat = numFmt.FormatCode
+	}
+	parsedFmt, ok := styles.parsedNumFmtTable[numberFormat]
+	if !ok {
+		if styles.parsedNumFmtTable == nil {
+			styles.parsedNumFmtTable = map[string]*parsedNumberFormat{}
 		}
+		parsedFmt = parseFullNumberFormatString(numberFormat)
+		styles.parsedNumFmtTable[numberFormat] = parsedFmt
 	}
-	return strings.ToLower(numberFormat)
+	return numberFormat, parsedFmt
 }
 
 func (styles *xlsxStyleSheet) addFont(xFont xlsxFont) (index int) {
@@ -313,7 +334,7 @@ func (styles *xlsxStyleSheet) addCellXf(xCellXf xlsxXf) (index int) {
 
 // newNumFmt generate a xlsxNumFmt according the format code. When the FormatCode is built in, it will return a xlsxNumFmt with the NumFmtId defined in ECMA document, otherwise it will generate a new NumFmtId greater than 164.
 func (styles *xlsxStyleSheet) newNumFmt(formatCode string) xlsxNumFmt {
-	if formatCode == "" {
+	if compareFormatString(formatCode, "general") {
 		return xlsxNumFmt{NumFmtId: 0, FormatCode: "general"}
 	}
 	// built in NumFmts in xmlStyle.go, traverse from the const.

+ 21 - 11
xmlWorkbook.go

@@ -177,27 +177,37 @@ func worksheetFileForSheet(sheet xlsxSheet, worksheets map[string]*zip.File, she
 }
 
 // getWorksheetFromSheet() is an internal helper function to open a
-// sheetN.xml file, refered to by an xlsx.xlsxSheet struct, from the XLSX
+// sheetN.xml file, referred to by an xlsx.xlsxSheet struct, from the XLSX
 // file and unmarshal it an xlsx.xlsxWorksheet struct
-func getWorksheetFromSheet(sheet xlsxSheet, worksheets map[string]*zip.File, sheetXMLMap map[string]string) (*xlsxWorksheet, error) {
-	var rc io.ReadCloser
+func getWorksheetFromSheet(sheet xlsxSheet, worksheets map[string]*zip.File, sheetXMLMap map[string]string, rowLimit int) (*xlsxWorksheet, error) {
+	var r io.Reader
 	var decoder *xml.Decoder
 	var worksheet *xlsxWorksheet
-	var error error
+	var err error
 	worksheet = new(xlsxWorksheet)
 
 	f := worksheetFileForSheet(sheet, worksheets, sheetXMLMap)
 	if f == nil {
 		return nil, fmt.Errorf("Unable to find sheet '%s'", sheet)
 	}
-	rc, error = f.Open()
-	if error != nil {
-		return nil, error
+	if rc, err := f.Open(); err != nil {
+		return nil, err
+	} else {
+		defer rc.Close()
+		r = rc
 	}
-	decoder = xml.NewDecoder(rc)
-	error = decoder.Decode(worksheet)
-	if error != nil {
-		return nil, error
+
+	if rowLimit != NoRowLimit {
+		r, err = truncateSheetXML(r, rowLimit)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	decoder = xml.NewDecoder(r)
+	err = decoder.Decode(worksheet)
+	if err != nil {
+		return nil, err
 	}
 	return worksheet, nil
 }