Browse Source

Turn shared string RefTable into a struct instead of straight slice.

Geoffrey J. Teale 11 years ago
parent
commit
8fc25a0e82
7 changed files with 116 additions and 65 deletions
  1. 1 1
      file.go
  2. 6 6
      lib.go
  3. 39 21
      sharedstrings.go
  4. 16 8
      sharedstrings_test.go
  5. 16 14
      sheet.go
  6. 37 14
      sheet_test.go
  7. 1 1
      worksheet.go

+ 1 - 1
file.go

@@ -8,7 +8,7 @@ import (
 // to the user.
 // to the user.
 type File struct {
 type File struct {
 	worksheets     map[string]*zip.File
 	worksheets     map[string]*zip.File
-	referenceTable []string
+	referenceTable *RefTable
 	styles         *xlsxStyles
 	styles         *xlsxStyles
 	Sheets         []*Sheet          // sheet access by index
 	Sheets         []*Sheet          // sheet access by index
 	Sheet          map[string]*Sheet // sheet access by name
 	Sheet          map[string]*Sheet // sheet access by name

+ 6 - 6
lib.go

@@ -264,7 +264,7 @@ func makeRowFromRaw(rawrow xlsxRow) *Row {
 // getValueFromCellData attempts to extract a valid value, usable in
 // getValueFromCellData attempts to extract a valid value, usable in
 // CSV form from the raw cell value.  Note - this is not actually
 // CSV form from the raw cell value.  Note - this is not actually
 // general enough - we should support retaining tabs and newlines.
 // general enough - we should support retaining tabs and newlines.
-func getValueFromCellData(rawcell xlsxC, reftable []string) string {
+func getValueFromCellData(rawcell xlsxC, reftable *RefTable) string {
 	var value string = ""
 	var value string = ""
 	var data string = rawcell.V
 	var data string = rawcell.V
 	if len(data) > 0 {
 	if len(data) > 0 {
@@ -274,7 +274,7 @@ func getValueFromCellData(rawcell xlsxC, reftable []string) string {
 			if error != nil {
 			if error != nil {
 				panic(error)
 				panic(error)
 			}
 			}
-			value = reftable[ref]
+			value = reftable.ResolveSharedString(ref)
 		} else {
 		} else {
 			value = vval
 			value = vval
 		}
 		}
@@ -289,7 +289,7 @@ func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File) ([]*Row, int, int)
 	var rows []*Row
 	var rows []*Row
 	var row *Row
 	var row *Row
 	var minCol, maxCol, minRow, maxRow, colCount, rowCount int
 	var minCol, maxCol, minRow, maxRow, colCount, rowCount int
-	var reftable []string
+	var reftable *RefTable
 	var err error
 	var err error
 	var insertRowIndex, insertColIndex int
 	var insertRowIndex, insertColIndex int
 
 
@@ -408,12 +408,12 @@ func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]strin
 // readSharedStringsFromZipFile() is an internal helper function to
 // readSharedStringsFromZipFile() is an internal helper function to
 // extract a reference table from the sharedStrings.xml file within
 // extract a reference table from the sharedStrings.xml file within
 // the XLSX zip file.
 // the XLSX zip file.
-func readSharedStringsFromZipFile(f *zip.File) ([]string, error) {
+func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
 	var sst *xlsxSST
 	var sst *xlsxSST
 	var error error
 	var error error
 	var rc io.ReadCloser
 	var rc io.ReadCloser
 	var decoder *xml.Decoder
 	var decoder *xml.Decoder
-	var reftable []string
+	var reftable *RefTable
 	rc, error = f.Open()
 	rc, error = f.Open()
 	if error != nil {
 	if error != nil {
 		return nil, error
 		return nil, error
@@ -494,7 +494,7 @@ func ReadZipReader(r *zip.Reader) (*File, error) {
 	var err error
 	var err error
 	var file *File
 	var file *File
 	var names []string
 	var names []string
-	var reftable []string
+	var reftable *RefTable
 	var sharedStrings *zip.File
 	var sharedStrings *zip.File
 	var sheetMap map[string]*Sheet
 	var sheetMap map[string]*Sheet
 	var sheetXMLMap map[string]string
 	var sheetXMLMap map[string]string

+ 39 - 21
sharedstrings.go

@@ -26,35 +26,34 @@ type xlsxR struct {
 	T string `xml:"t"`
 	T string `xml:"t"`
 }
 }
 
 
-// // xlsxT directly maps the t element from the namespace
-// // http://schemas.openxmlformats.org/spreadsheetml/2006/main -
-// // currently I have not checked this for completeness - it does as
-// // much as I need.
-// type xlsxT struct {
-// 	Data string `xml:"chardata"`
-// }
 
 
-
-type RefTable []string
+type RefTable struct {
+	indexedStrings []string
+	knownStrings map[string]int
+}
 
 
 // NewSharedStringRefTable() creates a new, empty RefTable.
 // NewSharedStringRefTable() creates a new, empty RefTable.
-func NewSharedStringRefTable() RefTable {
-	return RefTable{}
+func NewSharedStringRefTable() *RefTable {
+	rt := RefTable{}
+	rt.knownStrings = make(map[string]int)
+	return &rt
 }
 }
 
 
 // MakeSharedStringRefTable() takes an xlsxSST struct and converts
 // MakeSharedStringRefTable() takes an xlsxSST struct and converts
 // it's contents to an slice of strings used to refer to string values
 // it's contents to an slice of strings used to refer to string values
 // by numeric index - this is the model used within XLSX worksheet (a
 // by numeric index - this is the model used within XLSX worksheet (a
 // numeric reference is stored to a shared cell value).
 // numeric reference is stored to a shared cell value).
-func MakeSharedStringRefTable(source *xlsxSST) RefTable {
-	reftable := make(RefTable, len(source.SI))
-	for i, si := range source.SI {
+func MakeSharedStringRefTable(source *xlsxSST) *RefTable {
+	reftable := NewSharedStringRefTable()
+	for _, si := range source.SI {
 		if len(si.R) > 0 {
 		if len(si.R) > 0 {
+			newString := ""
 			for j := 0; j < len(si.R); j++ {
 			for j := 0; j < len(si.R); j++ {
-				reftable[i] = reftable[i] + si.R[j].T
+				newString = newString + si.R[j].T
 			}
 			}
+			reftable.AddString(newString)
 		} else {
 		} else {
-			reftable[i] = si.T
+			reftable.AddString(si.T)
 		}
 		}
 	}
 	}
 	return reftable
 	return reftable
@@ -62,11 +61,11 @@ func MakeSharedStringRefTable(source *xlsxSST) RefTable {
 
 
 // makeXlsxSST() takes a RefTable and returns and
 // makeXlsxSST() takes a RefTable and returns and
 // equivalent xlsxSST representation.
 // equivalent xlsxSST representation.
-func (rt RefTable) makeXlsxSST() xlsxSST {
+func (rt *RefTable) makeXLSXSST() xlsxSST {
 	sst := xlsxSST{}
 	sst := xlsxSST{}
-	sst.Count = len(rt)
+	sst.Count = len(rt.indexedStrings)
 	sst.UniqueCount = sst.Count
 	sst.UniqueCount = sst.Count
-	for _, ref := range rt {
+	for _, ref := range rt.indexedStrings {
 		si := xlsxSI{}
 		si := xlsxSI{}
 		si.T = ref
 		si.T = ref
 		sst.SI = append(sst.SI, si)
 		sst.SI = append(sst.SI, si)
@@ -78,6 +77,25 @@ func (rt RefTable) makeXlsxSST() xlsxSST {
 // a provided reference table (just a slice of strings in the correct
 // a provided reference table (just a slice of strings in the correct
 // order).  This function only exists to provide clarity or purpose
 // order).  This function only exists to provide clarity or purpose
 // via it's name.
 // via it's name.
-func (rt RefTable) ResolveSharedString(index int) string {
-	return rt[index]
+func (rt *RefTable) ResolveSharedString(index int) string {
+	return rt.indexedStrings[index]
+}
+
+
+// AddString adds a string to the reference table and return it's
+// numeric index.  If the string already exists then it simply returns
+// the existing index.
+func (rt *RefTable) AddString(str string) int {
+	index, ok := rt.knownStrings[str]
+	if ok {
+		return index
+	}
+	rt.indexedStrings = append(rt.indexedStrings, str)
+	index = len(rt.indexedStrings) - 1
+	rt.knownStrings[str] = index
+	return index
+}
+
+func (rt *RefTable) Length() int {
+	return len(rt.indexedStrings)
 }
 }

+ 16 - 8
sharedstrings_test.go

@@ -33,10 +33,18 @@ func (s *SharedStringsSuite) SetUpTest(c *C) {
         </sst>`)
         </sst>`)
 }
 }
 
 
+// We can add a new string to the RefTable
+func (s *SharedStringsSuite) TestRefTableAddString(c *C) {
+	refTable := NewSharedStringRefTable()
+	index := refTable.AddString("Foo")
+	c.Assert(index, Equals, 0)
+	c.Assert(refTable.ResolveSharedString(0), Equals, "Foo")
+}
+
 func (s *SharedStringsSuite) TestCreateNewSharedStringRefTable(c *C) {
 func (s *SharedStringsSuite) TestCreateNewSharedStringRefTable(c *C) {
 	refTable := NewSharedStringRefTable()
 	refTable := NewSharedStringRefTable()
-	refTable = append(refTable, "Foo")
-	refTable = append(refTable, "Bar")
+	refTable.AddString("Foo")
+	refTable.AddString("Bar")
 	c.Assert(refTable.ResolveSharedString(0), Equals, "Foo")
 	c.Assert(refTable.ResolveSharedString(0), Equals, "Foo")
 	c.Assert(refTable.ResolveSharedString(1), Equals, "Bar")
 	c.Assert(refTable.ResolveSharedString(1), Equals, "Bar")
 }
 }
@@ -48,9 +56,9 @@ func (s *SharedStringsSuite) TestMakeSharedStringRefTable(c *C) {
 	err := xml.NewDecoder(s.SharedStringsXML).Decode(sst)
 	err := xml.NewDecoder(s.SharedStringsXML).Decode(sst)
 	c.Assert(err, IsNil)
 	c.Assert(err, IsNil)
 	reftable := MakeSharedStringRefTable(sst)
 	reftable := MakeSharedStringRefTable(sst)
-	c.Assert(len(reftable), Equals, 4)
-	c.Assert(reftable[0], Equals, "Foo")
-	c.Assert(reftable[1], Equals, "Bar")
+	c.Assert(reftable.Length(), Equals, 4)
+	c.Assert(reftable.ResolveSharedString(0), Equals, "Foo")
+	c.Assert(reftable.ResolveSharedString(1), Equals, "Bar")
 }
 }
 
 
 // Test we can correctly resolve a numeric reference in the reference
 // Test we can correctly resolve a numeric reference in the reference
@@ -79,9 +87,9 @@ func (s *SharedStringsSuite) TestUnmarshallSharedStrings(c *C) {
 // Test we can correctly create the xlsx.xlsxSST struct from a RefTable
 // Test we can correctly create the xlsx.xlsxSST struct from a RefTable
 func (s *SharedStringsSuite) TestMakeXlsxSST(c *C) {
 func (s *SharedStringsSuite) TestMakeXlsxSST(c *C) {
 	refTable := NewSharedStringRefTable()
 	refTable := NewSharedStringRefTable()
-	refTable = append(refTable, "Foo")
-	refTable = append(refTable, "Bar")
-	sst := refTable.makeXlsxSST()
+	refTable.AddString("Foo")
+	refTable.AddString("Bar")
+	sst := refTable.makeXLSXSST()
 	c.Assert(sst, NotNil)
 	c.Assert(sst, NotNil)
 	c.Assert(sst.Count, Equals, 2)
 	c.Assert(sst.Count, Equals, 2)
 	c.Assert(sst.UniqueCount, Equals, 2)
 	c.Assert(sst.UniqueCount, Equals, 2)

+ 16 - 14
sheet.go

@@ -1,9 +1,10 @@
 package xlsx
 package xlsx
 
 
 import (
 import (
-	"bytes"
-	"encoding/xml"
+	// "bytes"
+	// "encoding/xml"
 	"fmt"
 	"fmt"
+	"strconv"
 )
 )
 
 
 // Sheet is a high level structure intended to provide user access to
 // Sheet is a high level structure intended to provide user access to
@@ -25,7 +26,7 @@ func (s *Sheet) AddRow() *Row {
 }
 }
 
 
 // Dump sheet to it's XML representation
 // Dump sheet to it's XML representation
-func (s *Sheet) makeXLSXSheet() ([]byte, error) {
+func (s *Sheet) makeXLSXSheet(refTable *RefTable) *xlsxWorksheet {
 	worksheet := &xlsxWorksheet{}
 	worksheet := &xlsxWorksheet{}
 	xSheet := xlsxSheetData{}
 	xSheet := xlsxSheetData{}
 	maxRow := 0
 	maxRow := 0
@@ -42,7 +43,7 @@ func (s *Sheet) makeXLSXSheet() ([]byte, error) {
 			}
 			}
 			xC := xlsxC{}
 			xC := xlsxC{}
 			xC.R = fmt.Sprintf("%s%d", numericToLetters(c), r + 1)
 			xC.R = fmt.Sprintf("%s%d", numericToLetters(c), r + 1)
-			xC.V = cell.Value
+			xC.V = strconv.Itoa(refTable.AddString(cell.Value))
 			xC.T = "s" // Hardcode string type, for now.
 			xC.T = "s" // Hardcode string type, for now.
 			xRow.C = append(xRow.C, xC)
 			xRow.C = append(xRow.C, xC)
 		}
 		}
@@ -53,15 +54,16 @@ func (s *Sheet) makeXLSXSheet() ([]byte, error) {
 	dimension.Ref = fmt.Sprintf("A1:%s%d",
 	dimension.Ref = fmt.Sprintf("A1:%s%d",
 		numericToLetters(maxCell), maxRow + 1)
 		numericToLetters(maxCell), maxRow + 1)
 	worksheet.Dimension = dimension
 	worksheet.Dimension = dimension
-	output := bytes.NewBufferString(xml.Header)
-	body, err := xml.MarshalIndent(worksheet, "  ", "  ")
-	if err != nil {
-		return nil, err
-	}
-	_, err = output.Write(body)
-	if err != nil {
-		return nil, err
-	}
-	return output.Bytes(), nil
+	return worksheet
+	// output := bytes.NewBufferString(xml.Header)
+	// body, err := xml.MarshalIndent(worksheet, "  ", "  ")
+	// if err != nil {
+	// 	return nil, err
+	// }
+	// _, err = output.Write(body)
+	// if err != nil {
+	// 	return nil, err
+	// }
+	// return output.Bytes(), nil
 
 
 }
 }

+ 37 - 14
sheet_test.go

@@ -24,19 +24,42 @@ func (s *SheetSuite) TestMakeXLSXSheetFromRows(c *C) {
 	row := sheet.AddRow()
 	row := sheet.AddRow()
 	cell := row.AddCell()
 	cell := row.AddCell()
 	cell.Value = "A cell!"
 	cell.Value = "A cell!"
-	xSheet, err := sheet.makeXLSXSheet()
-	c.Assert(err, IsNil)
-	expectedXLSXSheet := `<?xml version="1.0" encoding="UTF-8"?>
-  <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
-    <dimension ref="A1:A1"></dimension>
-    <sheetData>
-      <row r="1">
-        <c r="A1" t="s">
-          <v>A cell!</v>
-        </c>
-      </row>
-    </sheetData>
-  </worksheet>`
-	c.Assert(string(xSheet), Equals, expectedXLSXSheet)
+	refTable := NewSharedStringRefTable()
+	// refTable.AddStringsFromSheet(sheet)
+	xSheet:= sheet.makeXLSXSheet(refTable)
+	// xSST := refTable.makeXLSXSST()
+	c.Assert(xSheet.Dimension.Ref, Equals, "A1:A1")
+	c.Assert(xSheet.SheetData.Row, HasLen, 1)
+	xRow := xSheet.SheetData.Row[0]
+	c.Assert(xRow.R, Equals, 1)
+	c.Assert(xRow.Spans, Equals, "")
+	c.Assert(xRow.C, HasLen, 1)
+	xC := xRow.C[0]
+	c.Assert(xC.R, Equals, "A1")
+	c.Assert(xC.S, Equals, 0)
+	c.Assert(xC.T, Equals, "s") // Shared string type
+	c.Assert(xC.V, Equals, "0") // reference to shared string
 }
 }
 
 
+
+// 	expectedXLSXSheet := `<?xml version="1.0" encoding="UTF-8"?>
+//   <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+//     <dimension ref="A1:A1"></dimension>
+//     <sheetData>
+//       <row r="1">
+//         <c r="A1" t="s">
+//           <v>0</v>
+//         </c>
+//       </row>
+//     </sheetData>
+//   </worksheet>`
+// 	expectedXLSXSST := `<?xml version="1.0" encoding="UTF-8"?>
+//   <sst uniqueCount="1">
+//     <si>
+//       <t>A cell!</t>
+//     </si>
+//   </sst>`
+// 	c.Assert(string(xSheet), Equals, expectedXLSXSheet)
+// 	c.Assert(string(xSST), Equals, expectedXLSXSST)
+// }
+

+ 1 - 1
worksheet.go

@@ -49,7 +49,7 @@ type xlsxC struct {
 	R string `xml:"r,attr"`  // Cell ID, e.g. A1
 	R string `xml:"r,attr"`  // Cell ID, e.g. A1
 	S int    `xml:"s,attr,omitempty"`  // Style reference.
 	S int    `xml:"s,attr,omitempty"`  // Style reference.
 	T string `xml:"t,attr"`  // Type.
 	T string `xml:"t,attr"`  // Type.
-	V string `xml:"v"`       // Value
+	V string  `xml:"v"`       // Value
 }
 }
 
 
 // get cell
 // get cell