Преглед изворни кода

Use dimensions in sheet definitions instead of looping through rows/columns to find the dimensions. This should restrict the main loop to only the range of cells that are actually in use, and drop the loop finding the maximum extent. This should speed things up quite a bit.

Geoffrey J. Teale пре 12 година
родитељ
комит
c502147d60
4 измењених фајлова са 68 додато и 40 уклоњено
  1. 38 28
      lib.go
  2. 23 0
      lib_test.go
  3. 4 12
      worksheet.go
  4. 3 0
      worksheet_test.go

+ 38 - 28
lib.go

@@ -186,6 +186,28 @@ func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
 	return x, y, error
 }
 
+// getMaxMinFromDimensionRef return the zero based cartesian maximum
+// and minimum coordinates from the dimension reference embedded in a
+// XLSX worksheet.  For example, the dimension reference "A1:B2"
+// returns "0,0", "1,1".
+func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
+	var parts []string
+	parts = strings.Split(ref, ":")
+	minx, miny, err = getCoordsFromCellIDString(parts[0])
+	if err != nil {
+		return -1, -1, -1, -1, err
+	}
+	if len(parts) == 1 {
+		maxx, maxy = minx, miny
+		return
+	}
+	maxx, maxy, err = getCoordsFromCellIDString(parts[1])
+	if err != nil {
+		return -1, -1, -1, -1, err
+	}
+	return
+}
+
 // makeRowFromSpan will, when given a span expressed as a string,
 // return an empty Row large enough to encompass that span and
 // populate it with empty cells.  All rows start from cell 1 -
@@ -267,52 +289,40 @@ func getValueFromCellData(rawcell xlsxC, reftable []string) string {
 func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
 	var rows []*Row
 	var row *Row
-	var maxCol int
-	var maxRow int
+	var minCol, maxCol, minRow, maxRow, colCount, rowCount int
 	var reftable []string
+	var err error
 
+	if len(Worksheet.SheetData.Row) == 0 {
+		return nil, 0, 0
+	}
 	reftable = file.referenceTable
-	maxCol = 0
-	maxRow = 0
-	for _, rawrow := range Worksheet.SheetData.Row {
-		for _, rawcell := range rawrow.C {
-			x, y, error := getCoordsFromCellIDString(rawcell.R)
-			if error != nil {
-				panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
-			}
-			if x > maxCol {
-				maxCol = x
-			}
-			if y > maxRow {
-				maxRow = y
-			}
-		}
+	minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
+	if err != nil {
+		panic(err.Error())
 	}
-	maxCol += 1
-	maxRow += 1
-	rows = make([]*Row, maxRow)
-	for _, rawrow := range Worksheet.SheetData.Row {
+	rowCount = (maxRow - minRow) + 1
+	colCount = (maxCol - minCol) + 1
+	rows = make([]*Row, rowCount)
+	for rowIndex := 0; rowIndex < rowCount; rowIndex++ {
+		rawrow := Worksheet.SheetData.Row[rowIndex]
 		// range is not empty
 		if len(rawrow.Spans) != 0 {
 			row = makeRowFromSpan(rawrow.Spans)
 		} else {
 			row = makeRowFromRaw(rawrow)
 		}
-		rowno := 0
 		for _, rawcell := range rawrow.C {
-			x, y, _ := getCoordsFromCellIDString(rawcell.R)
-			if y != 0 && rowno == 0 {
-				rowno = y
-			}
+			x, _, _ := getCoordsFromCellIDString(rawcell.R)
 			if x < len(row.Cells) {
 				row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
 				row.Cells[x].styleIndex = rawcell.S
 				row.Cells[x].styles = file.styles
 			}
 		}
-		rows[rowno] = row
+		rows[rowIndex] = row
 	}
-	return rows, maxCol, maxRow
+	return rows, colCount, rowCount
 }
 
 type indexedSheet struct {

+ 23 - 0
lib_test.go

@@ -285,6 +285,29 @@ func TestGetCoordsFromCellIDString(t *testing.T) {
 	}
 }
 
+func TestGetMaxMinFromDimensionRef(t *testing.T) {
+	var dimensionRef string = "A1:B2"
+	var minx, miny, maxx, maxy int
+	var err error
+	minx, miny, maxx, maxy, err = getMaxMinFromDimensionRef(dimensionRef)
+	if err != nil {
+		t.Error(err)
+	}
+	if minx != 0 {
+		t.Error("Expected minx == 0, but got ", strconv.Itoa(minx))
+	}
+	if miny != 0 {
+		t.Error("Expected miny == 0, but got ", strconv.Itoa(miny))
+	}
+	if maxx != 1 {
+		t.Error("Expected maxx == 0, but got ", strconv.Itoa(maxx))
+	}
+	if maxy != 1 {
+		t.Error("Expected maxy == 0, but got ", strconv.Itoa(maxy))
+	}
+
+}
+
 func TestGetRangeFromString(t *testing.T) {
 	var rangeString string
 	var lower, upper int

+ 4 - 12
worksheet.go

@@ -5,26 +5,18 @@ package xlsx
 // currently I have not checked it for completeness - it does as much
 // as I need.
 type xlsxWorksheet struct {
-	Cols          xlsxCols          `xml:"cols"`
+	Dimension     xlsxDimension          `xml:"dimension"`
 	SheetData     xlsxSheetData     `xml:"sheetData"`
 }
 
-// xlsxCols directly maps the cols element in the namespace
+// xlsxDimension directly maps the dimension element in the namespace
 // http://schemas.openxmlformats.org/spreadsheetml/2006/main -
 // currently I have not checked it for completeness - it does as much
 // as I need.
-type xlsxCols struct {
-	Col []xlsxCol `xml:"col"`
+type xlsxDimension struct {
+	Ref string `xml:"ref,attr"`
 }
 
-// xlsxCol directly maps the col element in the namespace
-// http://schemas.openxmlformats.org/spreadsheetml/2006/main -
-// currently I have not checked it for completeness - it does as much
-// as I need.
-type xlsxCol struct {
-	Max       int     `xml:"max,attr"`
-	Min       int     `xml:"min,attr"`
-}
 
 // xlsxSheetData directly maps the sheetData element in the namespace
 // http://schemas.openxmlformats.org/spreadsheetml/2006/main -

+ 3 - 0
worksheet_test.go

@@ -60,6 +60,9 @@ func TestUnmarshallWorksheet(t *testing.T) {
 		t.Error(error.Error())
 		return
 	}
+	if worksheet.Dimension.Ref != "A1:B2" {
+		t.Error(fmt.Sprintf("Expected worksheet.Dimension.Ref == 'A1:B2', got %s", worksheet.Dimension.Ref))
+	}
 	if len(worksheet.SheetData.Row) == 0 {
 		t.Error(fmt.Sprintf("Expected len(worksheet.SheetData.Row) == '2', got %d", worksheet.SheetData.Row))
 	}