|
|
@@ -2,6 +2,7 @@ package xlsx
|
|
|
|
|
|
import (
|
|
|
"archive/zip"
|
|
|
+ "bytes"
|
|
|
"encoding/xml"
|
|
|
"errors"
|
|
|
"fmt"
|
|
|
@@ -11,6 +12,10 @@ import (
|
|
|
"strings"
|
|
|
)
|
|
|
|
|
|
+const (
|
|
|
+ sheetEnding = `</sheetData></worksheet>`
|
|
|
+)
|
|
|
+
|
|
|
// XLSXReaderError is the standard error type for otherwise undefined
|
|
|
// errors in the XSLX reading process.
|
|
|
type XLSXReaderError struct {
|
|
|
@@ -205,6 +210,7 @@ func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err erro
|
|
|
// calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet
|
|
|
// that doesn't have a DimensionRef set. The only case currently
|
|
|
// known where this is true is with XLSX exported from Google Docs.
|
|
|
+// This is also true for XLSX files created through the streaming APIs.
|
|
|
func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) {
|
|
|
// Note, this method could be very slow for large spreadsheets.
|
|
|
var x, y int
|
|
|
@@ -492,7 +498,7 @@ func fillCellDataFromInlineString(rawcell xlsxC, cell *Cell) {
|
|
|
// rows from a XSLXWorksheet, populates them with Cells and resolves
|
|
|
// the value references from the reference table and stores them in
|
|
|
// the rows and columns.
|
|
|
-func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet) ([]*Row, []*Col, int, int) {
|
|
|
+func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLimit int) ([]*Row, []*Col, int, int) {
|
|
|
var rows []*Row
|
|
|
var cols []*Col
|
|
|
var row *Row
|
|
|
@@ -506,7 +512,7 @@ func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet) ([]*R
|
|
|
return nil, nil, 0, 0
|
|
|
}
|
|
|
reftable = file.referenceTable
|
|
|
- if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, ":")) == 2 {
|
|
|
+ if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, ":")) == 2 && rowLimit == NoRowLimit {
|
|
|
minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
|
|
|
} else {
|
|
|
minCol, minRow, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet)
|
|
|
@@ -659,7 +665,7 @@ func readSheetViews(xSheetViews xlsxSheetViews) []SheetView {
|
|
|
// into a Sheet struct. This work can be done in parallel and so
|
|
|
// readSheetsFromZipFile will spawn an instance of this function per
|
|
|
// sheet and get the results back on the provided channel.
|
|
|
-func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string) (errRes error) {
|
|
|
+func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string, rowLimit int) (errRes error) {
|
|
|
result := &indexedSheet{Index: index, Sheet: nil, Error: nil}
|
|
|
defer func() {
|
|
|
if e := recover(); e != nil {
|
|
|
@@ -676,15 +682,15 @@ func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *F
|
|
|
}
|
|
|
}()
|
|
|
|
|
|
- worksheet, error := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap)
|
|
|
- if error != nil {
|
|
|
- result.Error = error
|
|
|
+ worksheet, err := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap, rowLimit)
|
|
|
+ if err != nil {
|
|
|
+ result.Error = err
|
|
|
sc <- result
|
|
|
- return error
|
|
|
+ return err
|
|
|
}
|
|
|
sheet := new(Sheet)
|
|
|
sheet.File = fi
|
|
|
- sheet.Rows, sheet.Cols, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi, sheet)
|
|
|
+ sheet.Rows, sheet.Cols, sheet.MaxCol, sheet.MaxRow = readRowsFromSheet(worksheet, fi, sheet, rowLimit)
|
|
|
sheet.Hidden = rsheet.State == sheetStateHidden || rsheet.State == sheetStateVeryHidden
|
|
|
sheet.SheetViews = readSheetViews(worksheet.SheetViews)
|
|
|
|
|
|
@@ -701,7 +707,7 @@ func readSheetFromFile(sc chan *indexedSheet, index int, rsheet xlsxSheet, fi *F
|
|
|
// readSheetsFromZipFile is an internal helper function that loops
|
|
|
// over the Worksheets defined in the XSLXWorkbook and loads them into
|
|
|
// Sheet objects stored in the Sheets slice of a xlsx.File struct.
|
|
|
-func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string) (map[string]*Sheet, []*Sheet, error) {
|
|
|
+func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string, rowLimit int) (map[string]*Sheet, []*Sheet, error) {
|
|
|
var workbook *xlsxWorkbook
|
|
|
var err error
|
|
|
var rc io.ReadCloser
|
|
|
@@ -740,7 +746,7 @@ func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]strin
|
|
|
defer close(sheetChan)
|
|
|
err = nil
|
|
|
for i, rawsheet := range workbookSheets {
|
|
|
- if err := readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap); err != nil {
|
|
|
+ if err := readSheetFromFile(sheetChan, i, rawsheet, file, sheetXMLMap, rowLimit); err != nil {
|
|
|
return
|
|
|
}
|
|
|
}
|
|
|
@@ -909,13 +915,28 @@ func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (WorkBookRels, err
|
|
|
// xlsx.File struct populated with its contents. In most cases
|
|
|
// ReadZip is not used directly, but is called internally by OpenFile.
|
|
|
func ReadZip(f *zip.ReadCloser) (*File, error) {
|
|
|
+ return ReadZipWithRowLimit(f, NoRowLimit)
|
|
|
+}
|
|
|
+
|
|
|
+// ReadZipWithRowLimit() takes a pointer to a zip.ReadCloser and returns a
|
|
|
+// xlsx.File struct populated with its contents. In most cases
|
|
|
+// ReadZip is not used directly, but is called internally by OpenFile.
|
|
|
+func ReadZipWithRowLimit(f *zip.ReadCloser, rowLimit int) (*File, error) {
|
|
|
defer f.Close()
|
|
|
- return ReadZipReader(&f.Reader)
|
|
|
+ return ReadZipReaderWithRowLimit(&f.Reader, rowLimit)
|
|
|
}
|
|
|
|
|
|
// ReadZipReader() can be used to read an XLSX in memory without
|
|
|
// touching the filesystem.
|
|
|
func ReadZipReader(r *zip.Reader) (*File, error) {
|
|
|
+ return ReadZipReaderWithRowLimit(r, NoRowLimit)
|
|
|
+}
|
|
|
+
|
|
|
+// ReadZipReaderWithRowLimit() can be used to read an XLSX in memory without
|
|
|
+// touching the filesystem.
|
|
|
+// rowLimit is the number of rows that should be read from the file. If rowLimit is -1, no limit is applied.
|
|
|
+// You can specify this with the constant NoRowLimit.
|
|
|
+func ReadZipReaderWithRowLimit(r *zip.Reader, rowLimit int) (*File, error) {
|
|
|
var err error
|
|
|
var file *File
|
|
|
var reftable *RefTable
|
|
|
@@ -986,7 +1007,7 @@ func ReadZipReader(r *zip.Reader) (*File, error) {
|
|
|
|
|
|
file.styles = style
|
|
|
}
|
|
|
- sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap)
|
|
|
+ sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap, rowLimit)
|
|
|
if err != nil {
|
|
|
return nil, err
|
|
|
}
|
|
|
@@ -999,3 +1020,45 @@ func ReadZipReader(r *zip.Reader) (*File, error) {
|
|
|
file.Sheets = sheets
|
|
|
return file, nil
|
|
|
}
|
|
|
+
|
|
|
+// truncateSheetXML will take in a reader to an XML sheet file and will return a reader that will read an equivalent
|
|
|
+// XML sheet file with only the number of rows specified. This greatly speeds up XML unmarshalling when only
|
|
|
+// a few rows need to be read from a large sheet.
|
|
|
+// When sheets are truncated, all formatting present after the sheetData tag will be lost, but all of this formatting
|
|
|
+// is related to printing and visibility, and is out of scope for most purposes of this library.
|
|
|
+func truncateSheetXML(r io.Reader, rowLimit int) (io.Reader, error) {
|
|
|
+ var rowCount int
|
|
|
+ var token xml.Token
|
|
|
+ var readErr error
|
|
|
+
|
|
|
+ output := new(bytes.Buffer)
|
|
|
+ r = io.TeeReader(r, output)
|
|
|
+ decoder := xml.NewDecoder(r)
|
|
|
+
|
|
|
+ for {
|
|
|
+ token, readErr = decoder.Token()
|
|
|
+ if readErr == io.EOF {
|
|
|
+ break
|
|
|
+ } else if readErr != nil {
|
|
|
+ return nil, readErr
|
|
|
+ }
|
|
|
+ end, ok := token.(xml.EndElement)
|
|
|
+ if ok && end.Name.Local == "row" {
|
|
|
+ rowCount++
|
|
|
+ if rowCount >= rowLimit {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ offset := decoder.InputOffset()
|
|
|
+ output.Truncate(int(offset))
|
|
|
+
|
|
|
+ if readErr != io.EOF {
|
|
|
+ _, err := output.Write([]byte(sheetEnding))
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return output, nil
|
|
|
+}
|