stream_file_builder.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. // Authors: Ryan Hollis (ryanh@)
  2. // The purpose of StreamFileBuilder and StreamFile is to allow streamed writing of XLSX files.
  3. // Directions:
  4. // 1. Create a StreamFileBuilder with NewStreamFileBuilder() or NewStreamFileBuilderForPath().
  5. // 2. Add the sheets and their first row of data by calling AddSheet().
  6. // 3. Call Build() to get a StreamFile. Once built, all functions on the builder will return an error.
  7. // 4. Write to the StreamFile with Write(). Writes begin on the first sheet. New rows are always written and flushed
  8. // to the io. All rows written to the same sheet must have the same number of cells as the header provided when the sheet
  9. // was created or an error will be returned.
  10. // 5. Call NextSheet() to proceed to the next sheet. Once NextSheet() is called, the previous sheet can not be edited.
  11. // 6. Call Close() to finish.
  12. // Directions for using custom styles and different data types:
  13. // 1. Create a StreamFileBuilder with NewStreamFileBuilder() or NewStreamFileBuilderForPath().
  14. // 2. Use MakeStyle() to create the styles you want yo use in your document. Keep a list of these styles.
  15. // 3. Add all the styles you created by using AddStreamStyle() or AddStreamStyleList().
  16. // 4. Add the sheets and their column styles of data by calling AddSheetS().
  17. // 5. Call Build() to get a StreamFile. Once built, all functions on the builder will return an error.
  18. // 6. Write to the StreamFile with WriteS(). Writes begin on the first sheet. New rows are always written and flushed
  19. // to the io. All rows written to the same sheet must have the same number of cells as the number of column styles
  20. // provided when adding the sheet with AddSheetS() or an error will be returned.
  21. // 5. Call NextSheet() to proceed to the next sheet. Once NextSheet() is called, the previous sheet can not be edited.
  22. // 6. Call Close() to finish.
  23. // Future work suggestions:
  24. // The current default style uses fonts that are not on Macs by default so opening the XLSX files in Numbers causes a
  25. // pop up that says there are missing fonts. The font could be changed to something that is usually found on Mac and PC.
  26. // Extend support for Formulas and Shared Strings.
  27. package xlsx
  28. import (
  29. "archive/zip"
  30. "errors"
  31. "io"
  32. "os"
  33. "strconv"
  34. "strings"
  35. )
  36. type cellStreamStyle map[int]StreamStyle
  37. type defaultCellType map[int]*CellType
  38. type StreamFileBuilder struct {
  39. built bool
  40. firstSheetAdded bool
  41. customStylesAdded bool
  42. xlsxFile *File
  43. zipWriter *zip.Writer
  44. cellTypeToStyleIds map[CellType]int
  45. maxStyleId int
  46. styleIds [][]int
  47. customStreamStyles map[StreamStyle]struct{}
  48. styleIdMap map[StreamStyle]int
  49. streamingCellMetadatas map[int]*StreamingCellMetadata
  50. sheetStreamStyles map[int]cellStreamStyle
  51. sheetDefaultCellType map[int]defaultCellType
  52. defaultColumnStreamingCellMetadataAdded bool
  53. }
  54. const (
  55. sheetFilePathPrefix = "xl/worksheets/sheet"
  56. sheetFilePathSuffix = ".xml"
  57. endSheetDataTag = "</sheetData>"
  58. dimensionTag = `<dimension ref="%s"></dimension>`
  59. // This is the index of the max style that this library will insert into XLSX sheets by default.
  60. // This allows us to predict what the style id of styles that we add will be.
  61. // TestXlsxStyleBehavior tests that this behavior continues to be what we expect.
  62. initMaxStyleId = 1
  63. )
  64. var BuiltStreamFileBuilderError = errors.New("StreamFileBuilder has already been built, functions may no longer be used")
  65. // NewStreamFileBuilder creates an StreamFileBuilder that will write to the the provided io.writer
  66. func NewStreamFileBuilder(writer io.Writer) *StreamFileBuilder {
  67. return &StreamFileBuilder{
  68. zipWriter: zip.NewWriter(writer),
  69. xlsxFile: NewFile(),
  70. cellTypeToStyleIds: make(map[CellType]int),
  71. maxStyleId: initMaxStyleId,
  72. customStreamStyles: make(map[StreamStyle]struct{}),
  73. styleIdMap: make(map[StreamStyle]int),
  74. streamingCellMetadatas: make(map[int]*StreamingCellMetadata),
  75. sheetStreamStyles: make(map[int]cellStreamStyle),
  76. sheetDefaultCellType: make(map[int]defaultCellType),
  77. }
  78. }
  79. // NewStreamFileBuilderForPath takes the name of an XLSX file and returns a builder for it.
  80. // The file will be created if it does not exist, or truncated if it does.
  81. func NewStreamFileBuilderForPath(path string) (*StreamFileBuilder, error) {
  82. file, err := os.Create(path)
  83. if err != nil {
  84. return nil, err
  85. }
  86. return NewStreamFileBuilder(file), nil
  87. }
  88. // AddSheet will add sheets with the given name with the provided headers. The headers cannot be edited later, and all
  89. // rows written to the sheet must contain the same number of cells as the header. Sheet names must be unique, or an
  90. // error will be thrown.
  91. func (sb *StreamFileBuilder) AddSheet(name string, cellTypes []*CellType) error {
  92. if sb.built {
  93. return BuiltStreamFileBuilderError
  94. }
  95. sheet, err := sb.xlsxFile.AddSheet(name)
  96. if err != nil {
  97. // Set built on error so that all subsequent calls to the builder will also fail.
  98. sb.built = true
  99. return err
  100. }
  101. sb.styleIds = append(sb.styleIds, []int{})
  102. for i, cellType := range cellTypes {
  103. var cellStyleIndex int
  104. var ok bool
  105. if cellType != nil {
  106. // The cell type is one of the attributes of a Style.
  107. // Since it is the only attribute of Style that we use, we can assume that cell types
  108. // map one to one with Styles and their Style ID.
  109. // If a new cell type is used, a new style gets created with an increased id, if an existing cell type is
  110. // used, the pre-existing style will also be used.
  111. cellStyleIndex, ok = sb.cellTypeToStyleIds[*cellType]
  112. if !ok {
  113. sb.maxStyleId++
  114. cellStyleIndex = sb.maxStyleId
  115. sb.cellTypeToStyleIds[*cellType] = sb.maxStyleId
  116. }
  117. sheet.SetType(i+1, i+1, *cellType)
  118. }
  119. sb.styleIds[len(sb.styleIds)-1] = append(sb.styleIds[len(sb.styleIds)-1], cellStyleIndex)
  120. }
  121. return nil
  122. }
  123. func (sb *StreamFileBuilder) AddSheetWithDefaultColumnMetadata(name string, columnsDefaultStreamingCellMetadata []*StreamingCellMetadata) error {
  124. if sb.built {
  125. return BuiltStreamFileBuilderError
  126. }
  127. _, err := sb.xlsxFile.AddSheet(name)
  128. if err != nil {
  129. // Set built on error so that all subsequent calls to the builder will also fail.
  130. sb.built = true
  131. return err
  132. }
  133. sb.styleIds = append(sb.styleIds, []int{})
  134. sheetIndex := len(sb.xlsxFile.Sheets) - 1
  135. cSS := make(cellStreamStyle)
  136. dCT := make(defaultCellType)
  137. for i, streamingCellMetadata := range columnsDefaultStreamingCellMetadata {
  138. var cellStyleIndex int
  139. var ok bool
  140. if streamingCellMetadata != nil {
  141. // Exact same logic as `AddSheet` to ensure compatibility as much as possible
  142. // with the `AddSheet` + `StreamFile.Write` code path
  143. cellStyleIndex, ok = sb.cellTypeToStyleIds[streamingCellMetadata.cellType]
  144. if !ok {
  145. sb.maxStyleId++
  146. cellStyleIndex = sb.maxStyleId
  147. sb.cellTypeToStyleIds[streamingCellMetadata.cellType] = sb.maxStyleId
  148. }
  149. // Add streamStyle and set default cell metadata on col
  150. sb.customStreamStyles[streamingCellMetadata.streamStyle] = struct{}{}
  151. sb.streamingCellMetadatas[i+1] = streamingCellMetadata
  152. cSS[i] = streamingCellMetadata.streamStyle
  153. dCT[i] = streamingCellMetadata.cellType.Ptr()
  154. }
  155. sb.styleIds[len(sb.styleIds)-1] = append(sb.styleIds[len(sb.styleIds)-1], cellStyleIndex)
  156. }
  157. // Add fall back streamStyle
  158. sb.customStreamStyles[StreamStyleDefaultString] = struct{}{}
  159. // Toggle to true to ensure `styleIdMap` is constructed from `customStreamStyles` on `Build`
  160. sb.customStylesAdded = true
  161. // Hack to ensure the `dimension` tag on each `worksheet` xml is stripped. Otherwise only the first
  162. // row of each worksheet will be read back rather than all rows
  163. sb.defaultColumnStreamingCellMetadataAdded = true
  164. sb.sheetStreamStyles[sheetIndex] = cSS
  165. sb.sheetDefaultCellType[sheetIndex] = dCT
  166. return nil
  167. }
  168. // AddSheetS will add a sheet with the given name and column styles. The number of column styles given
  169. // is the number of columns that will be created, and thus the number of cells each row has to have.
  170. // columnStyles[0] becomes the style of the first column, columnStyles[1] the style of the second column etc.
  171. // All the styles in columnStyles have to have been added or an error will be returned.
  172. // Sheet names must be unique, or an error will be returned.
  173. func (sb *StreamFileBuilder) AddSheetS(name string, columnStyles []StreamStyle) error {
  174. if sb.built {
  175. return BuiltStreamFileBuilderError
  176. }
  177. sheet, err := sb.xlsxFile.AddSheet(name)
  178. if err != nil {
  179. // Set built on error so that all subsequent calls to the builder will also fail.
  180. sb.built = true
  181. return err
  182. }
  183. // To make sure no new styles can be added after adding a sheet
  184. sb.firstSheetAdded = true
  185. // Check if all styles that will be used for columns have been created
  186. for _, colStyle := range columnStyles {
  187. if _, ok := sb.customStreamStyles[colStyle]; !ok {
  188. return errors.New("trying to make use of a style that has not been added")
  189. }
  190. }
  191. // Is needed for stream file to work but is not needed for streaming with styles
  192. sb.styleIds = append(sb.styleIds, []int{})
  193. if sheet.Cols == nil {
  194. panic("trying to use uninitialised ColStore")
  195. }
  196. cSS := make(map[int]StreamStyle)
  197. // Set default column styles based on the cel styles in the first row
  198. // Set the default column width to 11. This makes enough places for the
  199. // default date style cells to display the dates correctly
  200. for i, colStyle := range columnStyles {
  201. colNum := i + 1
  202. cSS[colNum] = colStyle
  203. sheet.SetColWidth(colNum, colNum, 11)
  204. }
  205. sheetIndex := len(sb.xlsxFile.Sheets) - 1
  206. sb.sheetStreamStyles[sheetIndex] = cSS
  207. return nil
  208. }
  209. // AddValidation will add a validation to a sheet.
  210. func (sb *StreamFileBuilder) AddValidation(sheetIndex int, validation *xlsxDataValidation) {
  211. sheet := sb.xlsxFile.Sheets[sheetIndex]
  212. sheet.AddDataValidation(validation)
  213. }
  214. // Build begins streaming the XLSX file to the io, by writing all the XLSX metadata. It creates a StreamFile struct
  215. // that can be used to write the rows to the sheets.
  216. func (sb *StreamFileBuilder) Build() (*StreamFile, error) {
  217. if sb.built {
  218. return nil, BuiltStreamFileBuilderError
  219. }
  220. sb.built = true
  221. parts, err := sb.xlsxFile.MarshallParts()
  222. if err != nil {
  223. return nil, err
  224. }
  225. if sb.customStylesAdded {
  226. parts["xl/styles.xml"], err = sb.marshalStyles()
  227. if err != nil {
  228. return nil, err
  229. }
  230. }
  231. es := &StreamFile{
  232. zipWriter: sb.zipWriter,
  233. xlsxFile: sb.xlsxFile,
  234. sheetXmlPrefix: make([]string, len(sb.xlsxFile.Sheets)),
  235. sheetXmlSuffix: make([]string, len(sb.xlsxFile.Sheets)),
  236. styleIds: sb.styleIds,
  237. styleIdMap: sb.styleIdMap,
  238. streamingCellMetadatas: sb.streamingCellMetadatas,
  239. sheetStreamStyles: sb.sheetStreamStyles,
  240. sheetDefaultCellType: sb.sheetDefaultCellType,
  241. }
  242. for path, data := range parts {
  243. // If the part is a sheet, don't write it yet. We only want to write the XLSX metadata files, since at this
  244. // point the sheets are still empty. The sheet files will be written later as their rows come in.
  245. if strings.HasPrefix(path, sheetFilePathPrefix) {
  246. // sb.default ColumnStreamingCellMetadataAdded is a hack because neither the `AddSheet` nor `AddSheetS` codepaths
  247. // actually encode a valid worksheet dimension. `AddSheet` encodes an empty one: "" and `AddSheetS` encodes
  248. // an effectively empty one: "A1". `AddSheetWithDefaultColumnMetadata` uses logic from both paths which results
  249. // in an effectively invalid dimension being encoded which, upon read, results in only reading in the header of
  250. // a given worksheet and non of the rows that follow
  251. if err := sb.processEmptySheetXML(es, path, data, !sb.customStylesAdded || sb.defaultColumnStreamingCellMetadataAdded); err != nil {
  252. return nil, err
  253. }
  254. continue
  255. }
  256. metadataFile, err := sb.zipWriter.Create(path)
  257. if err != nil {
  258. return nil, err
  259. }
  260. _, err = metadataFile.Write([]byte(data))
  261. if err != nil {
  262. return nil, err
  263. }
  264. }
  265. if err := es.NextSheet(); err != nil {
  266. return nil, err
  267. }
  268. return es, nil
  269. }
  270. func (sb *StreamFileBuilder) marshalStyles() (string, error) {
  271. for streamStyle := range sb.customStreamStyles {
  272. XfId := handleStyleForXLSX(streamStyle.style, streamStyle.xNumFmtId, sb.xlsxFile.styles)
  273. sb.styleIdMap[streamStyle] = XfId
  274. }
  275. styleSheetXMLString, err := sb.xlsxFile.styles.Marshal()
  276. if err != nil {
  277. return "", err
  278. }
  279. return styleSheetXMLString, nil
  280. }
  281. // AddStreamStyle adds a new style to the style sheet.
  282. // Only Styles that have been added through this function will be usable.
  283. // This function cannot be used after AddSheetS or Build has been called, and if it is
  284. // called after AddSheetS or Buildit will return an error.
  285. func (sb *StreamFileBuilder) AddStreamStyle(streamStyle StreamStyle) error {
  286. if sb.firstSheetAdded {
  287. return errors.New("at least one sheet has been added, cannot add new styles anymore")
  288. }
  289. if sb.built {
  290. return errors.New("file has been build, cannot add new styles anymore")
  291. }
  292. sb.customStreamStyles[streamStyle] = struct{}{}
  293. sb.customStylesAdded = true
  294. return nil
  295. }
  296. // AddStreamStyleList adds a list of new styles to the style sheet.
  297. // Only Styles that have been added through either this function or AddStreamStyle will be usable.
  298. // This function cannot be used after AddSheetS and Build has been called, and if it is
  299. // called after AddSheetS and Build it will return an error.
  300. func (sb *StreamFileBuilder) AddStreamStyleList(streamStyles []StreamStyle) error {
  301. for _, streamStyle := range streamStyles {
  302. err := sb.AddStreamStyle(streamStyle)
  303. if err != nil {
  304. return err
  305. }
  306. }
  307. return nil
  308. }
  309. // processEmptySheetXML will take in the path and XML data of an empty sheet, and will save the beginning and end of the
  310. // XML file so that these can be written at the right time.
  311. func (sb *StreamFileBuilder) processEmptySheetXML(sf *StreamFile, path, data string, removeDimensionTagFlag bool) error {
  312. // Get the sheet index from the path
  313. sheetIndex, err := getSheetIndex(sf, path)
  314. if err != nil {
  315. return err
  316. }
  317. // Remove the Dimension tag. Since more rows are going to be written to the sheet, it will be wrong.
  318. // It is valid to for a sheet to be missing a Dimension tag, but it is not valid for it to be wrong.
  319. if removeDimensionTagFlag {
  320. data = removeDimensionTag(data)
  321. }
  322. // Split the sheet at the end of its SheetData tag so that more rows can be added inside.
  323. prefix, suffix, err := splitSheetIntoPrefixAndSuffix(data)
  324. if err != nil {
  325. return err
  326. }
  327. sf.sheetXmlPrefix[sheetIndex] = prefix
  328. sf.sheetXmlSuffix[sheetIndex] = suffix
  329. return nil
  330. }
  331. // getSheetIndex parses the path to the XLSX sheet data and returns the index
  332. // The files that store the data for each sheet must have the format:
  333. // xl/worksheets/sheet123.xml
  334. // where 123 is the index of the sheet. This file path format is part of the XLSX file standard.
  335. func getSheetIndex(sf *StreamFile, path string) (int, error) {
  336. indexString := path[len(sheetFilePathPrefix) : len(path)-len(sheetFilePathSuffix)]
  337. sheetXLSXIndex, err := strconv.Atoi(indexString)
  338. if err != nil {
  339. return -1, errors.New("unexpected sheet file name from xlsx package")
  340. }
  341. if sheetXLSXIndex < 1 || len(sf.sheetXmlPrefix) < sheetXLSXIndex ||
  342. len(sf.sheetXmlSuffix) < sheetXLSXIndex || len(sf.xlsxFile.Sheets) < sheetXLSXIndex {
  343. return -1, errors.New("unexpected sheet index")
  344. }
  345. sheetArrayIndex := sheetXLSXIndex - 1
  346. return sheetArrayIndex, nil
  347. }
  348. // removeDimensionTag will return the passed in XLSX Spreadsheet XML with the dimension tag removed.
  349. // data is the XML data for the sheet
  350. // sheet is the Sheet struct that the XML was created from.
  351. func removeDimensionTag(data string) string {
  352. start := strings.Index(data, "<dimension")
  353. end := strings.Index(data, "</dimension>") + 12
  354. return data[0:start] + data[end:]
  355. }
  356. // splitSheetIntoPrefixAndSuffix will split the provided XML sheet into a prefix and a suffix so that
  357. // more spreadsheet rows can be inserted in between.
  358. func splitSheetIntoPrefixAndSuffix(data string) (string, string, error) {
  359. // Split the sheet at the end of its SheetData tag so that more rows can be added inside.
  360. sheetParts := strings.Split(data, endSheetDataTag)
  361. if len(sheetParts) != 2 {
  362. return "", "", errors.New("unexpected Sheet XML: SheetData close tag not found")
  363. }
  364. return sheetParts[0], sheetParts[1], nil
  365. }