Преглед на файлове

Use dimensions in sheet definitions instead of looping through rows/columns to find the dimensions. This should restrict the main loop to only the range of cells that are actually in use, and drop the loop finding the maximum extent. This should speed things up quite a bit.

Geoffrey J. Teale преди 12 години
родител
ревизия
c502147d60
променени са 4 файла, в които са добавени 68 реда и са изтрити 40 реда
  1. 38 28
      lib.go
  2. 23 0
      lib_test.go
  3. 4 12
      worksheet.go
  4. 3 0
      worksheet_test.go

+ 38 - 28
lib.go

@@ -186,6 +186,28 @@ func getCoordsFromCellIDString(cellIDString string) (x, y int, error error) {
 	return x, y, error
 	return x, y, error
 }
 }
 
 
+// getMaxMinFromDimensionRef return the zero based cartesian maximum
+// and minimum coordinates from the dimension reference embedded in a
+// XLSX worksheet.  For example, the dimension reference "A1:B2"
+// returns "0,0", "1,1".
+func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) {
+	var parts []string
+	parts = strings.Split(ref, ":")
+	minx, miny, err = getCoordsFromCellIDString(parts[0])
+	if err != nil {
+		return -1, -1, -1, -1, err
+	}
+	if len(parts) == 1 {
+		maxx, maxy = minx, miny
+		return
+	}
+	maxx, maxy, err = getCoordsFromCellIDString(parts[1])
+	if err != nil {
+		return -1, -1, -1, -1, err
+	}
+	return
+}
+
 // makeRowFromSpan will, when given a span expressed as a string,
 // makeRowFromSpan will, when given a span expressed as a string,
 // return an empty Row large enough to encompass that span and
 // return an empty Row large enough to encompass that span and
 // populate it with empty cells.  All rows start from cell 1 -
 // populate it with empty cells.  All rows start from cell 1 -
@@ -267,52 +289,40 @@ func getValueFromCellData(rawcell xlsxC, reftable []string) string {
 func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
 func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int) {
 	var rows []*Row
 	var rows []*Row
 	var row *Row
 	var row *Row
-	var maxCol int
-	var maxRow int
+	var minCol, maxCol, minRow, maxRow, colCount, rowCount int
 	var reftable []string
 	var reftable []string
+	var err error
 
 
+	if len(Worksheet.SheetData.Row) == 0 {
+		return nil, 0, 0
+	}
 	reftable = file.referenceTable
 	reftable = file.referenceTable
-	maxCol = 0
-	maxRow = 0
-	for _, rawrow := range Worksheet.SheetData.Row {
-		for _, rawcell := range rawrow.C {
-			x, y, error := getCoordsFromCellIDString(rawcell.R)
-			if error != nil {
-				panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R))
-			}
-			if x > maxCol {
-				maxCol = x
-			}
-			if y > maxRow {
-				maxRow = y
-			}
-		}
+	minCol, minRow, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref)
+	if err != nil {
+		panic(err.Error())
 	}
 	}
-	maxCol += 1
-	maxRow += 1
-	rows = make([]*Row, maxRow)
-	for _, rawrow := range Worksheet.SheetData.Row {
+	rowCount = (maxRow - minRow) + 1
+	colCount = (maxCol - minCol) + 1
+	rows = make([]*Row, rowCount)
+	for rowIndex := 0; rowIndex < rowCount; rowIndex++ {
+		rawrow := Worksheet.SheetData.Row[rowIndex]
 		// range is not empty
 		// range is not empty
 		if len(rawrow.Spans) != 0 {
 		if len(rawrow.Spans) != 0 {
 			row = makeRowFromSpan(rawrow.Spans)
 			row = makeRowFromSpan(rawrow.Spans)
 		} else {
 		} else {
 			row = makeRowFromRaw(rawrow)
 			row = makeRowFromRaw(rawrow)
 		}
 		}
-		rowno := 0
 		for _, rawcell := range rawrow.C {
 		for _, rawcell := range rawrow.C {
-			x, y, _ := getCoordsFromCellIDString(rawcell.R)
-			if y != 0 && rowno == 0 {
-				rowno = y
-			}
+			x, _, _ := getCoordsFromCellIDString(rawcell.R)
 			if x < len(row.Cells) {
 			if x < len(row.Cells) {
 				row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
 				row.Cells[x].Value = getValueFromCellData(rawcell, reftable)
 				row.Cells[x].styleIndex = rawcell.S
 				row.Cells[x].styleIndex = rawcell.S
 				row.Cells[x].styles = file.styles
 				row.Cells[x].styles = file.styles
 			}
 			}
 		}
 		}
-		rows[rowno] = row
+		rows[rowIndex] = row
 	}
 	}
-	return rows, maxCol, maxRow
+	return rows, colCount, rowCount
 }
 }
 
 
 type indexedSheet struct {
 type indexedSheet struct {

+ 23 - 0
lib_test.go

@@ -285,6 +285,29 @@ func TestGetCoordsFromCellIDString(t *testing.T) {
 	}
 	}
 }
 }
 
 
+func TestGetMaxMinFromDimensionRef(t *testing.T) {
+	var dimensionRef string = "A1:B2"
+	var minx, miny, maxx, maxy int
+	var err error
+	minx, miny, maxx, maxy, err = getMaxMinFromDimensionRef(dimensionRef)
+	if err != nil {
+		t.Error(err)
+	}
+	if minx != 0 {
+		t.Error("Expected minx == 0, but got ", strconv.Itoa(minx))
+	}
+	if miny != 0 {
+		t.Error("Expected miny == 0, but got ", strconv.Itoa(miny))
+	}
+	if maxx != 1 {
+		t.Error("Expected maxx == 0, but got ", strconv.Itoa(maxx))
+	}
+	if maxy != 1 {
+		t.Error("Expected maxy == 0, but got ", strconv.Itoa(maxy))
+	}
+
+}
+
 func TestGetRangeFromString(t *testing.T) {
 func TestGetRangeFromString(t *testing.T) {
 	var rangeString string
 	var rangeString string
 	var lower, upper int
 	var lower, upper int

+ 4 - 12
worksheet.go

@@ -5,26 +5,18 @@ package xlsx
 // currently I have not checked it for completeness - it does as much
 // currently I have not checked it for completeness - it does as much
 // as I need.
 // as I need.
 type xlsxWorksheet struct {
 type xlsxWorksheet struct {
-	Cols          xlsxCols          `xml:"cols"`
+	Dimension     xlsxDimension          `xml:"dimension"`
 	SheetData     xlsxSheetData     `xml:"sheetData"`
 	SheetData     xlsxSheetData     `xml:"sheetData"`
 }
 }
 
 
-// xlsxCols directly maps the cols element in the namespace
+// xlsxDimension directly maps the dimension element in the namespace
 // http://schemas.openxmlformats.org/spreadsheetml/2006/main -
 // http://schemas.openxmlformats.org/spreadsheetml/2006/main -
 // currently I have not checked it for completeness - it does as much
 // currently I have not checked it for completeness - it does as much
 // as I need.
 // as I need.
-type xlsxCols struct {
-	Col []xlsxCol `xml:"col"`
+type xlsxDimension struct {
+	Ref string `xml:"ref,attr"`
 }
 }
 
 
-// xlsxCol directly maps the col element in the namespace
-// http://schemas.openxmlformats.org/spreadsheetml/2006/main -
-// currently I have not checked it for completeness - it does as much
-// as I need.
-type xlsxCol struct {
-	Max       int     `xml:"max,attr"`
-	Min       int     `xml:"min,attr"`
-}
 
 
 // xlsxSheetData directly maps the sheetData element in the namespace
 // xlsxSheetData directly maps the sheetData element in the namespace
 // http://schemas.openxmlformats.org/spreadsheetml/2006/main -
 // http://schemas.openxmlformats.org/spreadsheetml/2006/main -

+ 3 - 0
worksheet_test.go

@@ -60,6 +60,9 @@ func TestUnmarshallWorksheet(t *testing.T) {
 		t.Error(error.Error())
 		t.Error(error.Error())
 		return
 		return
 	}
 	}
+	if worksheet.Dimension.Ref != "A1:B2" {
+		t.Error(fmt.Sprintf("Expected worksheet.Dimension.Ref == 'A1:B2', got %s", worksheet.Dimension.Ref))
+	}
 	if len(worksheet.SheetData.Row) == 0 {
 	if len(worksheet.SheetData.Row) == 0 {
 		t.Error(fmt.Sprintf("Expected len(worksheet.SheetData.Row) == '2', got %d", worksheet.SheetData.Row))
 		t.Error(fmt.Sprintf("Expected len(worksheet.SheetData.Row) == '2', got %d", worksheet.SheetData.Row))
 	}
 	}