Jelajahi Sumber

Resolve #146, make the GetRow function read data as streaming. Ref: #382, #515

xuri 6 tahun lalu
induk
melakukan
7965e1231b
3 mengubah file dengan 146 tambahan dan 62 penghapusan
  1. 95 27
      rows.go
  2. 17 6
      rows_test.go
  3. 34 29
      sheet.go

+ 95 - 27
rows.go

@@ -10,6 +10,7 @@
 package excelize
 
 import (
+	"bytes"
 	"encoding/xml"
 	"errors"
 	"fmt"
@@ -49,16 +50,19 @@ func (f *File) GetRows(sheet string) ([][]string, error) {
 
 // Rows defines an iterator to a sheet
 type Rows struct {
-	err    error
-	f      *File
-	rows   []xlsxRow
-	curRow int
+	err      error
+	f        *File
+	rows     []xlsxRow
+	sheet    string
+	curRow   int
+	totalRow int
+	decoder  *xml.Decoder
 }
 
 // Next will return true if find the next row element.
 func (rows *Rows) Next() bool {
 	rows.curRow++
-	return rows.curRow <= len(rows.rows)
+	return rows.curRow <= rows.totalRow
 }
 
 // Error will return the error when the find next row element
@@ -68,19 +72,57 @@ func (rows *Rows) Error() error {
 
 // Columns return the current row's column values
 func (rows *Rows) Columns() ([]string, error) {
-	curRow := rows.rows[rows.curRow-1]
-
-	columns := make([]string, len(curRow.C))
+	var (
+		err          error
+		inElement    string
+		row, cellCol int
+		columns      []string
+	)
 	d := rows.f.sharedStringsReader()
-	for _, colCell := range curRow.C {
-		col, _, err := CellNameToCoordinates(colCell.R)
-		if err != nil {
-			return columns, err
+	for {
+		token, _ := rows.decoder.Token()
+		if token == nil {
+			break
+		}
+		switch startElement := token.(type) {
+		case xml.StartElement:
+			inElement = startElement.Name.Local
+			if inElement == "row" {
+				for _, attr := range startElement.Attr {
+					if attr.Name.Local == "r" {
+						row, err = strconv.Atoi(attr.Value)
+						if err != nil {
+							return columns, err
+						}
+						if row > rows.curRow {
+							return columns, err
+						}
+					}
+				}
+			}
+			if inElement == "c" {
+				colCell := xlsxC{}
+				_ = rows.decoder.DecodeElement(&colCell, &startElement)
+				cellCol, _, err = CellNameToCoordinates(colCell.R)
+				if err != nil {
+					return columns, err
+				}
+				blank := cellCol - len(columns)
+				for i := 1; i < blank; i++ {
+					columns = append(columns, "")
+				}
+				val, _ := colCell.getValueFrom(rows.f, d)
+				columns = append(columns, val)
+			}
+		case xml.EndElement:
+			inElement = startElement.Name.Local
+			if inElement == "row" {
+				return columns, err
+			}
+		default:
 		}
-		val, _ := colCell.getValueFrom(rows.f, d)
-		columns[col-1] = val
 	}
-	return columns, nil
+	return columns, err
 }
 
 // ErrSheetNotExist defines an error of sheet is not exist
@@ -89,7 +131,7 @@ type ErrSheetNotExist struct {
 }
 
 func (err ErrSheetNotExist) Error() string {
-	return fmt.Sprintf("Sheet %s is not exist", string(err.SheetName))
+	return fmt.Sprintf("sheet %s is not exist", string(err.SheetName))
 }
 
 // Rows return a rows iterator. For example:
@@ -104,22 +146,48 @@ func (err ErrSheetNotExist) Error() string {
 //    }
 //
 func (f *File) Rows(sheet string) (*Rows, error) {
-	xlsx, err := f.workSheetReader(sheet)
-	if err != nil {
-		return nil, err
-	}
 	name, ok := f.sheetMap[trimSheetName(sheet)]
 	if !ok {
 		return nil, ErrSheetNotExist{sheet}
 	}
-	if xlsx != nil {
-		data := f.readXML(name)
-		f.saveFileList(name, replaceWorkSheetsRelationshipsNameSpaceBytes(namespaceStrictToTransitional(data)))
+	if f.Sheet[name] != nil {
+		// flush data
+		output, _ := xml.Marshal(f.Sheet[name])
+		f.saveFileList(name, replaceWorkSheetsRelationshipsNameSpaceBytes(output))
+	}
+	var (
+		err       error
+		inElement string
+		row       int
+		rows      Rows
+	)
+	decoder := xml.NewDecoder(bytes.NewReader(f.readXML(name)))
+	for {
+		token, _ := decoder.Token()
+		if token == nil {
+			break
+		}
+		switch startElement := token.(type) {
+		case xml.StartElement:
+			inElement = startElement.Name.Local
+			if inElement == "row" {
+				for _, attr := range startElement.Attr {
+					if attr.Name.Local == "r" {
+						row, err = strconv.Atoi(attr.Value)
+						if err != nil {
+							return &rows, err
+						}
+					}
+				}
+				rows.totalRow = row
+			}
+		default:
+		}
 	}
-	return &Rows{
-		f:    f,
-		rows: xlsx.SheetData.Row,
-	}, nil
+	rows.f = f
+	rows.sheet = name
+	rows.decoder = xml.NewDecoder(bytes.NewReader(f.readXML(name)))
+	return &rows, nil
 }
 
 // SetRowHeight provides a function to set the height of a single row. For

+ 17 - 6
rows_test.go

@@ -47,10 +47,10 @@ func TestRowsIterator(t *testing.T) {
 		sheet2         = "Sheet2"
 		expectedNumRow = 11
 	)
-	xlsx, err := OpenFile(filepath.Join("test", "Book1.xlsx"))
+	f, err := OpenFile(filepath.Join("test", "Book1.xlsx"))
 	require.NoError(t, err)
 
-	rows, err := xlsx.Rows(sheet2)
+	rows, err := f.Rows(sheet2)
 	require.NoError(t, err)
 	var rowCount int
 	for rows.Next() {
@@ -59,9 +59,20 @@ func TestRowsIterator(t *testing.T) {
 	}
 	assert.Equal(t, expectedNumRow, rowCount)
 
-	rows = &Rows{f: xlsx, rows: []xlsxRow{{C: []xlsxC{{R: "A"}}}}, curRow: 1}
-	_, err = rows.Columns()
-	assert.EqualError(t, err, `cannot convert cell "A" to coordinates: invalid cell name "A"`)
+	// Valued cell sparse distribution test
+	f = NewFile()
+	cells := []string{"C1", "E1", "A3", "B3", "C3", "D3", "E3"}
+	for _, cell := range cells {
+		f.SetCellValue("Sheet1", cell, 1)
+	}
+	rows, err = f.Rows("Sheet1")
+	require.NoError(t, err)
+	rowCount = 0
+	for rows.Next() {
+		rowCount++
+		require.True(t, rowCount <= 3, "rowCount is greater than expected")
+	}
+	assert.Equal(t, 3, rowCount)
 }
 
 func TestRowsError(t *testing.T) {
@@ -697,7 +708,7 @@ func TestDuplicateRowInvalidRownum(t *testing.T) {
 
 func TestErrSheetNotExistError(t *testing.T) {
 	err := ErrSheetNotExist{SheetName: "Sheet1"}
-	assert.EqualValues(t, err.Error(), "Sheet Sheet1 is not exist")
+	assert.EqualValues(t, err.Error(), "sheet Sheet1 is not exist")
 }
 
 func BenchmarkRows(b *testing.B) {

+ 34 - 29
sheet.go

@@ -699,15 +699,12 @@ func (f *File) SearchSheet(sheet, value string, reg ...bool) ([]string, error) {
 	for _, r := range reg {
 		regSearch = r
 	}
-	xlsx, err := f.workSheetReader(sheet)
-	if err != nil {
-		return result, err
-	}
 	name, ok := f.sheetMap[trimSheetName(sheet)]
 	if !ok {
-		return result, nil
+		return result, ErrSheetNotExist{sheet}
 	}
-	if xlsx != nil {
+	if f.Sheet[name] != nil {
+		// flush data
 		output, _ := xml.Marshal(f.Sheet[name])
 		f.saveFileList(name, replaceWorkSheetsRelationshipsNameSpaceBytes(output))
 	}
@@ -718,9 +715,10 @@ func (f *File) SearchSheet(sheet, value string, reg ...bool) ([]string, error) {
 // cell value, and regular expression.
 func (f *File) searchSheet(name, value string, regSearch bool) ([]string, error) {
 	var (
-		inElement string
-		result    []string
-		r         xlsxRow
+		err                 error
+		cellName, inElement string
+		result              []string
+		cellCol, row        int
 	)
 	d := f.sharedStringsReader()
 	decoder := xml.NewDecoder(bytes.NewReader(f.readXML(name)))
@@ -733,31 +731,38 @@ func (f *File) searchSheet(name, value string, regSearch bool) ([]string, error)
 		case xml.StartElement:
 			inElement = startElement.Name.Local
 			if inElement == "row" {
-				r = xlsxRow{}
-				_ = decoder.DecodeElement(&r, &startElement)
-				for _, colCell := range r.C {
-					val, _ := colCell.getValueFrom(f, d)
-					if regSearch {
-						regex := regexp.MustCompile(value)
-						if !regex.MatchString(val) {
-							continue
-						}
-					} else {
-						if val != value {
-							continue
+				for _, attr := range startElement.Attr {
+					if attr.Name.Local == "r" {
+						row, err = strconv.Atoi(attr.Value)
+						if err != nil {
+							return result, err
 						}
 					}
-
-					cellCol, _, err := CellNameToCoordinates(colCell.R)
-					if err != nil {
-						return result, err
+				}
+			}
+			if inElement == "c" {
+				colCell := xlsxC{}
+				_ = decoder.DecodeElement(&colCell, &startElement)
+				val, _ := colCell.getValueFrom(f, d)
+				if regSearch {
+					regex := regexp.MustCompile(value)
+					if !regex.MatchString(val) {
+						continue
 					}
-					cellName, err := CoordinatesToCellName(cellCol, r.R)
-					if err != nil {
-						return result, err
+				} else {
+					if val != value {
+						continue
 					}
-					result = append(result, cellName)
 				}
+				cellCol, _, err = CellNameToCoordinates(colCell.R)
+				if err != nil {
+					return result, err
+				}
+				cellName, err = CoordinatesToCellName(cellCol, row)
+				if err != nil {
+					return result, err
+				}
+				result = append(result, cellName)
 			}
 		default:
 		}