Przeglądaj źródła

Merge pull request #4 from ryho/ryanh/improve_partial_read_tests

Improve partial read tests
Ryan Hollis 8 lat temu
rodzic
commit
202e87909d
1 zmienionych plików z 71 dodań i 18 usunięć
  1. 71 18
      file_test.go

+ 71 - 18
file_test.go

@@ -2,12 +2,49 @@ package xlsx
 
 import (
 	"encoding/xml"
+	"io"
+	"os"
 	"path/filepath"
-	"time"
 
 	. "gopkg.in/check.v1"
 )
 
+// ReaderAtCounter wraps a ReaderAt and counts the number of bytes that are read out of it
+type ReaderAtCounter struct {
+	readerAt  io.ReaderAt
+	bytesRead int
+}
+
+var _ io.ReaderAt = &ReaderAtCounter{}
+
+// NewReaderAtCounter creates a ReaderAtCounter by opening the file name, and provides the size which is needed for
+// opening as XLSX.
+func NewReaderAtCounter(name string) (*ReaderAtCounter, int64, error) {
+	f, err := os.Open(name)
+	if err != nil {
+		return nil, -1, err
+	}
+	fi, err := f.Stat()
+	if err != nil {
+		f.Close()
+		return nil, -1, err
+	}
+	readerAtCounter := &ReaderAtCounter{
+		readerAt: f,
+	}
+	return readerAtCounter, fi.Size(), nil
+}
+
+func (r *ReaderAtCounter) ReadAt(p []byte, off int64) (n int, err error) {
+	n, err = r.readerAt.ReadAt(p, off)
+	r.bytesRead += n
+	return n, err
+}
+
+func (r *ReaderAtCounter) GetBytesRead() int {
+	return r.bytesRead
+}
+
 type FileSuite struct{}
 
 var _ = Suite(&FileSuite{})
@@ -16,41 +53,57 @@ var _ = Suite(&FileSuite{})
 // struct.
 func (l *FileSuite) TestOpenFile(c *C) {
 	var xlsxFile *File
-	var error error
+	var err error
 
-	xlsxFile, error = OpenFile("./testdocs/testfile.xlsx")
-	c.Assert(error, IsNil)
+	xlsxFile, err = OpenFile("./testdocs/testfile.xlsx")
+	c.Assert(err, IsNil)
 	c.Assert(xlsxFile, NotNil)
 }
 
-func (l *FileSuite) TestPartialReadsWithFewSharedStrings(c *C) {
+func (l *FileSuite) TestPartialReadsWithFewSharedStringsOnlyPartiallyReads(c *C) {
+	// This test verifies that a large file is only partially read when using a small row limit.
+	// This file is 11,228,530 bytes, but only 14,020 bytes get read out when using a row limit of 10.
+	// I'm specifying a limit of 20,000 to prevent test flakiness if the bytes read fluctuates with small code changes.
 	rowLimit := 10
-	start := time.Now()
-	file, err := OpenFileWithRowLimit("testdocs/large_sheet_no_shared_strings_no_dimension_tag.xlsx", rowLimit)
+	// It is possible that readLimit will need to be increased by a small amount in the future, but do not increase it
+	// to anywhere near a significant amount of 11 million. We're testing that this number is low, to ensure that partial
+	// reads are fast.
+	readLimit := 20 * 1000
+	reader, size, err := NewReaderAtCounter("testdocs/large_sheet_no_shared_strings_no_dimension_tag.xlsx")
 	if err != nil {
 		c.Fatal(err)
 	}
-	timeSpent := time.Since(start)
-	timeLimit := 100 * time.Millisecond
-	if timeSpent > timeLimit {
-		c.Errorf("Reading %v rows from a sheet with ~31,000 rows and few shared strings took %v, must take less than %v", rowLimit, timeSpent, timeLimit)
+	file, err := OpenReaderAtWithRowLimit(reader, size, rowLimit)
+	if reader.bytesRead > readLimit {
+		// If this test begins failing, do not increase readLimit dramatically. Instead investigate why the number of
+		// bytes read went up and fix this issue.
+		c.Errorf("Reading %v rows from a sheet with ~31,000 rows and few shared strings read %v bytes, must read less than %v bytes", rowLimit, reader.bytesRead, readLimit)
 	}
 	if len(file.Sheets[0].Rows) != rowLimit {
 		c.Errorf("Expected sheet to have %v rows, but found %v rows", rowLimit, len(file.Sheets[0].Rows))
 	}
 }
 
-func (l *FileSuite) TestPartialReadsWithSharedStrings(c *C) {
+func (l *FileSuite) TestPartialReadsWithLargeSharedStringsOnlyPartiallyReads(c *C) {
+	// This test verifies that a large file is only partially read when using a small row limit.
+	// This file is 7,055,632 bytes, but only 1,092,839 bytes get read out when using a row limit of 10.
+	// I'm specifying a limit of 1.2 MB to prevent test flakiness if the bytes read fluctuates with small code changes.
+	// The reason that this test has a much larger limit than TestPartialReadsWithFewSharedStringsOnlyPartiallyReads
+	// is that this file has a Shared Strings file that is a little over 1 MB.
 	rowLimit := 10
-	start := time.Now()
-	file, err := OpenFileWithRowLimit("testdocs/large_sheet_large_sharedstrings_dimension_tag.xlsx", rowLimit)
+	// It is possible that readLimit will need to be increased by a small amount in the future, but do not increase it
+	// to anywhere near a significant amount of 7 million. We're testing that this number is low, to ensure that partial
+	// reads are fast.
+	readLimit := int(1.2 * 1000 * 1000)
+	reader, size, err := NewReaderAtCounter("testdocs/large_sheet_large_sharedstrings_dimension_tag.xlsx")
 	if err != nil {
 		c.Fatal(err)
 	}
-	timeSpent := time.Since(start)
-	timeLimit := time.Second
-	if timeSpent > timeLimit {
-		c.Errorf("Reading %v rows from a sheet with ~31,000 rows and a large shared strings took %v, must take less than %v", rowLimit, timeSpent, timeLimit)
+	file, err := OpenReaderAtWithRowLimit(reader, size, rowLimit)
+	if reader.bytesRead > readLimit {
+		// If this test begins failing, do not increase readLimit dramatically. Instead investigate why the number of
+		// bytes read went up and fix this issue.
+		c.Errorf("Reading %v rows from a sheet with ~31,000 rows and a large shared strings read %v bytes, must read less than %v bytes", rowLimit, reader.bytesRead, readLimit)
 	}
 	// This is testing that the sheet was truncated, but it is also testing that the dimension tag was ignored.
 	// If the dimension tag is not correctly ignored, there will be 10 rows of the data, plus ~31k empty rows tacked on.