|
|
@@ -2,12 +2,49 @@ package xlsx
|
|
|
|
|
|
import (
|
|
|
"encoding/xml"
|
|
|
+ "io"
|
|
|
+ "os"
|
|
|
"path/filepath"
|
|
|
- "time"
|
|
|
|
|
|
. "gopkg.in/check.v1"
|
|
|
)
|
|
|
|
|
|
+// ReaderAtCounter wraps a ReaderAt and counts the number of bytes that are read out of it
|
|
|
+type ReaderAtCounter struct {
|
|
|
+ readerAt io.ReaderAt
|
|
|
+ bytesRead int
|
|
|
+}
|
|
|
+
|
|
|
+var _ io.ReaderAt = &ReaderAtCounter{}
|
|
|
+
|
|
|
+// NewReaderAtCounter creates a ReaderAtCounter by opening the file name, and provides the size which is needed for
|
|
|
+// opening as XLSX.
|
|
|
+func NewReaderAtCounter(name string) (*ReaderAtCounter, int64, error) {
|
|
|
+ f, err := os.Open(name)
|
|
|
+ if err != nil {
|
|
|
+ return nil, -1, err
|
|
|
+ }
|
|
|
+ fi, err := f.Stat()
|
|
|
+ if err != nil {
|
|
|
+ f.Close()
|
|
|
+ return nil, -1, err
|
|
|
+ }
|
|
|
+ readerAtCounter := &ReaderAtCounter{
|
|
|
+ readerAt: f,
|
|
|
+ }
|
|
|
+ return readerAtCounter, fi.Size(), nil
|
|
|
+}
|
|
|
+
|
|
|
+func (r *ReaderAtCounter) ReadAt(p []byte, off int64) (n int, err error) {
|
|
|
+ n, err = r.readerAt.ReadAt(p, off)
|
|
|
+ r.bytesRead += n
|
|
|
+ return n, err
|
|
|
+}
|
|
|
+
|
|
|
+func (r *ReaderAtCounter) GetBytesRead() int {
|
|
|
+ return r.bytesRead
|
|
|
+}
|
|
|
+
|
|
|
type FileSuite struct{}
|
|
|
|
|
|
var _ = Suite(&FileSuite{})
|
|
|
@@ -16,41 +53,57 @@ var _ = Suite(&FileSuite{})
|
|
|
// struct.
|
|
|
func (l *FileSuite) TestOpenFile(c *C) {
|
|
|
var xlsxFile *File
|
|
|
- var error error
|
|
|
+ var err error
|
|
|
|
|
|
- xlsxFile, error = OpenFile("./testdocs/testfile.xlsx")
|
|
|
- c.Assert(error, IsNil)
|
|
|
+ xlsxFile, err = OpenFile("./testdocs/testfile.xlsx")
|
|
|
+ c.Assert(err, IsNil)
|
|
|
c.Assert(xlsxFile, NotNil)
|
|
|
}
|
|
|
|
|
|
-func (l *FileSuite) TestPartialReadsWithFewSharedStrings(c *C) {
|
|
|
+func (l *FileSuite) TestPartialReadsWithFewSharedStringsOnlyPartiallyReads(c *C) {
|
|
|
+ // This test verifies that a large file is only partially read when using a small row limit.
|
|
|
+ // This file is 11,228,530 bytes, but only 14,020 bytes get read out when using a row limit of 10.
|
|
|
+ // I'm specifying a limit of 20,000 to prevent test flakiness if the bytes read fluctuates with small code changes.
|
|
|
rowLimit := 10
|
|
|
- start := time.Now()
|
|
|
- file, err := OpenFileWithRowLimit("testdocs/large_sheet_no_shared_strings_no_dimension_tag.xlsx", rowLimit)
|
|
|
+ // It is possible that readLimit will need to be increased by a small amount in the future, but do not increase it
|
|
|
+ // to anywhere near a significant amount of 11 million. We're testing that this number is low, to ensure that partial
|
|
|
+ // reads are fast.
|
|
|
+ readLimit := 20 * 1000
|
|
|
+ reader, size, err := NewReaderAtCounter("testdocs/large_sheet_no_shared_strings_no_dimension_tag.xlsx")
|
|
|
if err != nil {
|
|
|
c.Fatal(err)
|
|
|
}
|
|
|
- timeSpent := time.Since(start)
|
|
|
- timeLimit := 100 * time.Millisecond
|
|
|
- if timeSpent > timeLimit {
|
|
|
- c.Errorf("Reading %v rows from a sheet with ~31,000 rows and few shared strings took %v, must take less than %v", rowLimit, timeSpent, timeLimit)
|
|
|
+ file, err := OpenReaderAtWithRowLimit(reader, size, rowLimit)
|
|
|
+ if reader.bytesRead > readLimit {
|
|
|
+ // If this test begins failing, do not increase readLimit dramatically. Instead investigate why the number of
|
|
|
+ // bytes read went up and fix this issue.
|
|
|
+ c.Errorf("Reading %v rows from a sheet with ~31,000 rows and few shared strings read %v bytes, must read less than %v bytes", rowLimit, reader.bytesRead, readLimit)
|
|
|
}
|
|
|
if len(file.Sheets[0].Rows) != rowLimit {
|
|
|
c.Errorf("Expected sheet to have %v rows, but found %v rows", rowLimit, len(file.Sheets[0].Rows))
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func (l *FileSuite) TestPartialReadsWithSharedStrings(c *C) {
|
|
|
+func (l *FileSuite) TestPartialReadsWithLargeSharedStringsOnlyPartiallyReads(c *C) {
|
|
|
+ // This test verifies that a large file is only partially read when using a small row limit.
|
|
|
+ // This file is 7,055,632 bytes, but only 1,092,839 bytes get read out when using a row limit of 10.
|
|
|
+ // I'm specifying a limit of 1.2 MB to prevent test flakiness if the bytes read fluctuates with small code changes.
|
|
|
+ // The reason that this test has a much larger limit than TestPartialReadsWithFewSharedStringsOnlyPartiallyReads
|
|
|
+ // is that this file has a Shared Strings file that is a little over 1 MB.
|
|
|
rowLimit := 10
|
|
|
- start := time.Now()
|
|
|
- file, err := OpenFileWithRowLimit("testdocs/large_sheet_large_sharedstrings_dimension_tag.xlsx", rowLimit)
|
|
|
+ // It is possible that readLimit will need to be increased by a small amount in the future, but do not increase it
|
|
|
+ // to anywhere near a significant amount of 7 million. We're testing that this number is low, to ensure that partial
|
|
|
+ // reads are fast.
|
|
|
+ readLimit := int(1.2 * 1000 * 1000)
|
|
|
+ reader, size, err := NewReaderAtCounter("testdocs/large_sheet_large_sharedstrings_dimension_tag.xlsx")
|
|
|
if err != nil {
|
|
|
c.Fatal(err)
|
|
|
}
|
|
|
- timeSpent := time.Since(start)
|
|
|
- timeLimit := time.Second
|
|
|
- if timeSpent > timeLimit {
|
|
|
- c.Errorf("Reading %v rows from a sheet with ~31,000 rows and a large shared strings took %v, must take less than %v", rowLimit, timeSpent, timeLimit)
|
|
|
+ file, err := OpenReaderAtWithRowLimit(reader, size, rowLimit)
|
|
|
+ if reader.bytesRead > readLimit {
|
|
|
+ // If this test begins failing, do not increase readLimit dramatically. Instead investigate why the number of
|
|
|
+ // bytes read went up and fix this issue.
|
|
|
+ c.Errorf("Reading %v rows from a sheet with ~31,000 rows and a large shared strings read %v bytes, must read less than %v bytes", rowLimit, reader.bytesRead, readLimit)
|
|
|
}
|
|
|
// This is testing that the sheet was truncated, but it is also testing that the dimension tag was ignored.
|
|
|
// If the dimension tag is not correctly ignored, there will be 10 rows of the data, plus ~31k empty rows tacked on.
|