Просмотр исходного кода

Extract XLSXWorksheet from XLSX file from a reference in XSLXSheet

Geoffrey J. Teale 14 лет назад
Родитель
Сommit
40bcbbcb5f
4 измененных файлов с 265 добавлено и 18 удалено
  1. 104 16
      lib.go
  2. 100 2
      lib_test.go
  3. 27 0
      workbook.go
  4. 34 0
      workbook_test.go

+ 104 - 16
lib.go

@@ -19,23 +19,58 @@ func (e *XLSXReaderError) String() string {
 	return e.Error
 	return e.Error
 }
 }
 
 
+
+// Cell is a high level structure intended to provide user access to
+// the contents of Cell within an xlsx.Row.
+type Cell struct {
+	data string
+}
+
+// Row is a high level structure indended to provide user access to a
+// row within a xlsx.Sheet.  An xlsx.Row contains a slice of xlsx.Cell.
+type Row struct {
+	Cells []*Cell
+}
+
 // Sheet is a high level structure intended to provide user access to
 // Sheet is a high level structure intended to provide user access to
 // the contents of a particular sheet within an XLSX file.
 // the contents of a particular sheet within an XLSX file.
 type Sheet struct {
 type Sheet struct {
-
+	Rows []*Row
 }
 }
 
 
 // File is a high level structure providing a slice of Sheet structs
 // File is a high level structure providing a slice of Sheet structs
 // to the user.
 // to the user.
 type File struct {
 type File struct {
+	worksheets map[string] *zip.File
+	referenceTable []string
 	Sheets []*Sheet
 	Sheets []*Sheet
 }
 }
 
 
 
 
+// readRowsFromSheet is an internal helper function that extracts the
+// rows from a XSLXWorksheet, poulates them with Cells and resolves
+// the value references from the reference table and stores them in
+func readRowsFromSheet(worksheet *XLSXWorksheet, reftable []string) []*Row {
+	var rows []*Row
+	rows = make([]*Row, len(worksheet.SheetData.Row))
+	for i, rawrow := range worksheet.SheetData.Row {
+		row := new(Row)
+		row.Cells = make([]*Cell, len(rawrow.C))
+		for j, rawcell := range rawrow.C {
+			cell := new(Cell)
+			cell.data = rawcell.V.Data
+			row.Cells[j] = cell
+		}
+		rows[i] = row
+	}
+	return rows
+}
+
+
 // readSheetsFromZipFile is an internal helper function that loops
 // readSheetsFromZipFile is an internal helper function that loops
 // over the Worksheets defined in the XSLXWorkbook and loads them into
 // over the Worksheets defined in the XSLXWorkbook and loads them into
 // Sheet objects stored in the Sheets slice of a xlsx.File struct.
 // Sheet objects stored in the Sheets slice of a xlsx.File struct.
-func readSheetsFromZipFile(f *zip.File) ([]*Sheet, os.Error) {
+func readSheetsFromZipFile(f *zip.File, file *File) ([]*Sheet, os.Error) {
 	var workbook *XLSXWorkbook
 	var workbook *XLSXWorkbook
 	var error os.Error
 	var error os.Error
 	var rc io.ReadCloser
 	var rc io.ReadCloser
@@ -49,39 +84,92 @@ func readSheetsFromZipFile(f *zip.File) ([]*Sheet, os.Error) {
 		return nil, error
 		return nil, error
 	}
 	}
 	sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
 	sheets := make([]*Sheet, len(workbook.Sheets.Sheet))
-	for i, _ := range workbook.Sheets.Sheet {
+	for i, rawsheet := range workbook.Sheets.Sheet {
+		worksheet, error := getWorksheetFromSheet(rawsheet, file.worksheets)
+		if error != nil {
+			return nil, error
+		}
 		sheet := new(Sheet)
 		sheet := new(Sheet)
+		sheet.Rows = readRowsFromSheet(worksheet, file.referenceTable)
 		sheets[i] = sheet
 		sheets[i] = sheet
 	}
 	}
 	return sheets, nil
 	return sheets, nil
 }
 }
 
 
+
+// readSharedStringsFromZipFile() is an internal helper function to
+// extract a reference table from the sharedStrings.xml file within
+// the XLSX zip file.
+func readSharedStringsFromZipFile(f *zip.File) ([]string, os.Error) {
+	var sst *XLSXSST
+	var error os.Error
+	var rc io.ReadCloser
+	var reftable []string
+	rc, error = f.Open()
+	if error != nil {
+		return nil, error
+	}
+	sst = new(XLSXSST)
+	error = xml.Unmarshal(rc, sst)
+	if error != nil {
+		return nil, error
+	}
+	reftable = MakeSharedStringRefTable(sst)
+	return reftable, nil
+}
+
 // OpenFile() take the name of an XLSX file and returns a populated
 // OpenFile() take the name of an XLSX file and returns a populated
 // xlsx.File struct for it.
 // xlsx.File struct for it.
 func OpenFile(filename string) (x *File, e os.Error) {
 func OpenFile(filename string) (x *File, e os.Error) {
 	var f *zip.ReadCloser
 	var f *zip.ReadCloser
 	var error os.Error
 	var error os.Error
-	var xlsxFile *File
+	var file *File
 	var v *zip.File
 	var v *zip.File
+	var workbook *zip.File
+	var sharedStrings *zip.File
+	var reftable []string
+	var worksheets map[string]*zip.File
 	f, error = zip.OpenReader(filename)
 	f, error = zip.OpenReader(filename)
 	if error != nil {
 	if error != nil {
 		return nil, error
 		return nil, error
 	}
 	}
-	xlsxFile = new(File)
+	file = new(File)
+	worksheets = make(map[string]*zip.File, len(f.File))
 	for _, v = range f.File {
 	for _, v = range f.File {
-		if v.Name == "xl/workbook.xml" {
-			sheets, error := readSheetsFromZipFile(v)
-			if error != nil {
-				return nil, error
+		switch v.Name {
+		case "xl/sharedStrings.xml":
+			sharedStrings = v
+		case "xl/workbook.xml":
+			workbook = v
+		default:
+			if len(v.Name) > 12 {
+				if v.Name[0:13] == "xl/worksheets" {
+					worksheets[v.Name[14:len(v.Name)-4]]= v
+				}
 			}
 			}
-			if sheets == nil {
-				error := new(XLSXReaderError)
-				error.Error = "No sheets found in XLSX File"
-				return nil, error
-			}
-			xlsxFile.Sheets = sheets
 		}
 		}
 	}
 	}
+	file.worksheets = worksheets
+	reftable, error = readSharedStringsFromZipFile(sharedStrings)
+	if error != nil {
+		return nil, error
+	}
+	if reftable == nil {
+		error := new(XLSXReaderError)
+		error.Error = "No valid sharedStrings.xml found in XLSX file"
+		return nil, error
+	}
+	file.referenceTable = reftable
+	sheets, error := readSheetsFromZipFile(workbook, file)
+	if error != nil {
+		return nil, error
+	}
+	if sheets == nil {
+		error := new(XLSXReaderError)
+				error.Error = "No sheets found in XLSX File"
+		return nil, error
+	}
+	file.Sheets = sheets
 	f.Close()
 	f.Close()
-	return xlsxFile, nil
+	return file, nil
 }
 }

+ 100 - 2
lib_test.go

@@ -2,8 +2,10 @@ package xlsx
 
 
 
 
 import (
 import (
+	"bytes"
 	"os"
 	"os"
 	"testing"
 	"testing"
+	"xml"
 )
 )
 
 
 
 
@@ -31,6 +33,7 @@ func TestCreateSheet(t *testing.T) {
 	var xlsxFile *File
 	var xlsxFile *File
 	var error os.Error
 	var error os.Error
 	var sheet *Sheet
 	var sheet *Sheet
+	var row *Row
 	xlsxFile, error = OpenFile("testfile.xlsx")
 	xlsxFile, error = OpenFile("testfile.xlsx")
 	if error != nil {
 	if error != nil {
 		t.Error(error.String())
 		t.Error(error.String())
@@ -45,8 +48,103 @@ func TestCreateSheet(t *testing.T) {
 		return
 		return
 	}
 	}
 	sheet = xlsxFile.Sheets[0]
 	sheet = xlsxFile.Sheets[0]
-	if len(sheet.Cells) == 0 {
-		t.Error("Expected len(sheet.Cells) == 4")
+	if len(sheet.Rows) != 2 {
+		t.Error("Expected len(sheet.Rows) == 2")
+		return
+	}
+	row = sheet.Rows[0]
+	if len(row.Cells) != 2 {
+		t.Error("Expected len(row.Cells) == 2")
+		return
 	}
 	}
 }
 }
 
 
+// Test that we can correctly extract a reference table from the
+// sharedStrings.xml file embedded in the XLSX file and return a
+// reference table of string values from it.
+func TestReadSharedStringsFromZipFile(t *testing.T) {
+	var xlsxFile *File
+	var error os.Error
+	xlsxFile, error = OpenFile("testfile.xlsx")
+	if error != nil {
+		t.Error(error.String())
+		return
+	}
+	if xlsxFile.referenceTable == nil {
+		t.Error("expected non nil xlsxFile.referenceTable")
+		return
+	}
+}
+
+
+func TestReadRowsFromSheet(t *testing.T) {
+	var sharedstringsXML = bytes.NewBufferString(`
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="4" uniqueCount="4">
+  <si>
+    <t>Foo</t>
+  </si>
+  <si>
+    <t>Bar</t>
+  </si>
+  <si>
+    <t xml:space="preserve">Baz </t>
+  </si>
+  <si>
+    <t>Quuk</t>
+  </si>
+</sst>`)
+	var sheetxml = bytes.NewBufferString(`
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" 
+           xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
+  <dimension ref="A1:B2"/>
+  <sheetViews>
+    <sheetView tabSelected="1" workbookViewId="0">
+      <selection activeCell="C2" sqref="C2"/>
+    </sheetView>
+  </sheetViews>
+  <sheetFormatPr baseColWidth="10" defaultRowHeight="15"/>
+  <sheetData>
+    <row r="1" spans="1:2">
+      <c r="A1" t="s">
+        <v>0</v>
+      </c>
+      <c r="B1" t="s">
+        <v>1</v>
+      </c>
+    </row>
+    <row r="2" spans="1:2">
+      <c r="A2" t="s">
+        <v>2</v>
+      </c>
+      <c r="B2" t="s">
+        <v>3</v>
+      </c>
+    </row>
+  </sheetData>
+  <pageMargins left="0.7" right="0.7" 
+               top="0.78740157499999996" 
+               bottom="0.78740157499999996" 
+               header="0.3" 
+               footer="0.3"/>
+</worksheet>`)
+	worksheet := new(XLSXWorksheet)
+	error := xml.Unmarshal(sheetxml, worksheet)
+	if error != nil {
+		t.Error(error.String())
+		return
+	}
+	sst := new(XLSXSST)
+	error = xml.Unmarshal(sharedstringsXML, sst)
+	if error != nil {
+		t.Error(error.String())
+		return
+	}
+	reftable := MakeSharedStringRefTable(sst)
+	rows := readRowsFromSheet(worksheet, reftable)
+	if len(rows) != 2 {
+		t.Error("Expected len(rows) == 2")
+	}
+	
+}

+ 27 - 0
workbook.go

@@ -1,5 +1,12 @@
 package xlsx
 package xlsx
 
 
+import (
+	"archive/zip"
+	"fmt"
+	"io"
+	"os"
+	"xml"
+)
 
 
 // XLSXWorkbook directly maps the workbook element from the namespace
 // XLSXWorkbook directly maps the workbook element from the namespace
 // http://schemas.openxmlformats.org/spreadsheetml/2006/main -
 // http://schemas.openxmlformats.org/spreadsheetml/2006/main -
@@ -97,3 +104,23 @@ type XLSXDefinedName struct {
 type XLSXCalcPr struct {
 type XLSXCalcPr struct {
 	CalcId string "attr"
 	CalcId string "attr"
 }
 }
+
+
+// getWorksheetFromSheet() is an internal helper function to open a sheetN.xml file, refered to by an xlsx.XLSXSheet struct, from the XLSX file and unmarshal it an xlsx.XLSXWorksheet struct 
+func getWorksheetFromSheet(sheet XLSXSheet, worksheets map[string]*zip.File) (*XLSXWorksheet, os.Error) {
+	var rc io.ReadCloser
+	var worksheet *XLSXWorksheet
+	var error os.Error
+	worksheet = new(XLSXWorksheet)
+	sheetName := fmt.Sprintf("sheet%s", sheet.SheetId)
+	f := worksheets[sheetName]
+	rc, error = f.Open()
+	if error != nil {
+		return nil, error
+	}
+	error = xml.Unmarshal(rc, worksheet)
+	if error != nil {
+		return nil, error
+	}
+	return worksheet, nil 
+}

+ 34 - 0
workbook_test.go

@@ -81,3 +81,37 @@ func TestUnmarshallWorkbookXML(t *testing.T) {
 		t.Error("workbook.CalcPr.CalcId != '125725'")
 		t.Error("workbook.CalcPr.CalcId != '125725'")
 	}
 	}
 }
 }
+
+
+// // Test we can correctly create an xlsx.XLSXWorksheet from a reference
+// // in an xlsx.XLSXSheet using getWorksheetFromSheet()
+// func TestGetWorksheetFromSheet(t *testing.T) {
+
+// 	var xlsxFile *File
+// 	var error os.Error
+// 	xlsxFile, error = OpenFile("testfile.xlsx")
+// 	if error != nil {
+// 		t.Error(error.String())
+// 		return
+// 	}
+// 	var workbook *XLSXWorkbook
+// 	workbook = new(XLSXWorkbook)
+// 	error = xml.Unmarshal(buf, workbook)
+// 	if error != nil {
+// 		t.Error(error.String())
+// 		return
+// 	}
+// 	if len(workbook.Sheets.Sheet) == 0 {
+// 		t.Error("Expected len(workbook.Sheets.Sheet) == 0")
+// 	}
+// 	sheet := workbook.Sheets.Sheet[0]
+// 	worksheet, error := getWorksheetFromSheet(sheet, file)
+// 	if error != nil {
+// 		t.Error(error.String())
+// 		return
+// 	}
+// 	if worksheet == nil {
+// 		t.Error("getWorksheetFromSheet return nil worksheet without reporting an error")
+// 		return
+// 	}
+// }