stream.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. // Copyright 2016 - 2021 The excelize Authors. All rights reserved. Use of
  2. // this source code is governed by a BSD-style license that can be found in
  3. // the LICENSE file.
  4. //
  5. // Package excelize providing a set of functions that allow you to write to
  6. // and read from XLSX / XLSM / XLTM files. Supports reading and writing
  7. // spreadsheet documents generated by Microsoft Excel™ 2007 and later. Supports
  8. // complex components by high compatibility, and provided streaming API for
  9. // generating or reading data from a worksheet with huge amounts of data. This
  10. // library needs Go version 1.15 or later.
  11. package excelize
  12. import (
  13. "bytes"
  14. "encoding/xml"
  15. "fmt"
  16. "io"
  17. "io/ioutil"
  18. "os"
  19. "reflect"
  20. "strconv"
  21. "strings"
  22. "time"
  23. )
  24. // StreamWriter defined the type of stream writer.
  25. type StreamWriter struct {
  26. File *File
  27. Sheet string
  28. SheetID int
  29. sheetWritten bool
  30. cols string
  31. worksheet *xlsxWorksheet
  32. rawData bufferedWriter
  33. mergeCellsCount int
  34. mergeCells string
  35. tableParts string
  36. }
  37. // NewStreamWriter return stream writer struct by given worksheet name for
  38. // generate new worksheet with large amounts of data. Note that after set
  39. // rows, you must call the 'Flush' method to end the streaming writing
  40. // process and ensure that the order of line numbers is ascending, the common
  41. // API and stream API can't be work mixed to writing data on the worksheets,
  42. // you can't get cell value when in-memory chunks data over 16MB. For
  43. // example, set data for worksheet of size 102400 rows x 50 columns with
  44. // numbers and style:
  45. //
  46. // file := excelize.NewFile()
  47. // streamWriter, err := file.NewStreamWriter("Sheet1")
  48. // if err != nil {
  49. // fmt.Println(err)
  50. // }
  51. // styleID, err := file.NewStyle(`{"font":{"color":"#777777"}}`)
  52. // if err != nil {
  53. // fmt.Println(err)
  54. // }
  55. // if err := streamWriter.SetRow("A1", []interface{}{excelize.Cell{StyleID: styleID, Value: "Data"}},
  56. // excelize.RowOpts{Height: 45, Hidden: false}); err != nil {
  57. // fmt.Println(err)
  58. // }
  59. // for rowID := 2; rowID <= 102400; rowID++ {
  60. // row := make([]interface{}, 50)
  61. // for colID := 0; colID < 50; colID++ {
  62. // row[colID] = rand.Intn(640000)
  63. // }
  64. // cell, _ := excelize.CoordinatesToCellName(1, rowID)
  65. // if err := streamWriter.SetRow(cell, row); err != nil {
  66. // fmt.Println(err)
  67. // }
  68. // }
  69. // if err := streamWriter.Flush(); err != nil {
  70. // fmt.Println(err)
  71. // }
  72. // if err := file.SaveAs("Book1.xlsx"); err != nil {
  73. // fmt.Println(err)
  74. // }
  75. //
  76. // Set cell value and cell formula for a worksheet with stream writer:
  77. //
  78. // err := streamWriter.SetRow("A1", []interface{}{
  79. // excelize.Cell{Value: 1},
  80. // excelize.Cell{Value: 2},
  81. // excelize.Cell{Formula: "SUM(A1,B1)"}});
  82. //
  83. func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) {
  84. sheetID := f.getSheetID(sheet)
  85. if sheetID == -1 {
  86. return nil, fmt.Errorf("sheet %s is not exist", sheet)
  87. }
  88. sw := &StreamWriter{
  89. File: f,
  90. Sheet: sheet,
  91. SheetID: sheetID,
  92. }
  93. var err error
  94. sw.worksheet, err = f.workSheetReader(sheet)
  95. if err != nil {
  96. return nil, err
  97. }
  98. sheetPath := f.sheetMap[trimSheetName(sheet)]
  99. if f.streams == nil {
  100. f.streams = make(map[string]*StreamWriter)
  101. }
  102. f.streams[sheetPath] = sw
  103. _, _ = sw.rawData.WriteString(XMLHeader + `<worksheet` + templateNamespaceIDMap)
  104. bulkAppendFields(&sw.rawData, sw.worksheet, 2, 5)
  105. return sw, err
  106. }
  107. // AddTable creates an Excel table for the StreamWriter using the given
  108. // coordinate area and format set. For example, create a table of A1:D5:
  109. //
  110. // err := sw.AddTable("A1", "D5", "")
  111. //
  112. // Create a table of F2:H6 with format set:
  113. //
  114. // err := sw.AddTable("F2", "H6", `{
  115. // "table_name": "table",
  116. // "table_style": "TableStyleMedium2",
  117. // "show_first_column": true,
  118. // "show_last_column": true,
  119. // "show_row_stripes": false,
  120. // "show_column_stripes": true
  121. // }`)
  122. //
  123. // Note that the table must be at least two lines including the header. The
  124. // header cells must contain strings and must be unique.
  125. //
  126. // Currently only one table is allowed for a StreamWriter. AddTable must be
  127. // called after the rows are written but before Flush.
  128. //
  129. // See File.AddTable for details on the table format.
  130. func (sw *StreamWriter) AddTable(hcell, vcell, format string) error {
  131. formatSet, err := parseFormatTableSet(format)
  132. if err != nil {
  133. return err
  134. }
  135. coordinates, err := areaRangeToCoordinates(hcell, vcell)
  136. if err != nil {
  137. return err
  138. }
  139. _ = sortCoordinates(coordinates)
  140. // Correct the minimum number of rows, the table at least two lines.
  141. if coordinates[1] == coordinates[3] {
  142. coordinates[3]++
  143. }
  144. // Correct table reference coordinate area, such correct C1:B3 to B1:C3.
  145. ref, err := sw.File.coordinatesToAreaRef(coordinates)
  146. if err != nil {
  147. return err
  148. }
  149. // create table columns using the first row
  150. tableHeaders, err := sw.getRowValues(coordinates[1], coordinates[0], coordinates[2])
  151. if err != nil {
  152. return err
  153. }
  154. tableColumn := make([]*xlsxTableColumn, len(tableHeaders))
  155. for i, name := range tableHeaders {
  156. tableColumn[i] = &xlsxTableColumn{
  157. ID: i + 1,
  158. Name: name,
  159. }
  160. }
  161. tableID := sw.File.countTables() + 1
  162. name := formatSet.TableName
  163. if name == "" {
  164. name = "Table" + strconv.Itoa(tableID)
  165. }
  166. table := xlsxTable{
  167. XMLNS: NameSpaceSpreadSheet.Value,
  168. ID: tableID,
  169. Name: name,
  170. DisplayName: name,
  171. Ref: ref,
  172. AutoFilter: &xlsxAutoFilter{
  173. Ref: ref,
  174. },
  175. TableColumns: &xlsxTableColumns{
  176. Count: len(tableColumn),
  177. TableColumn: tableColumn,
  178. },
  179. TableStyleInfo: &xlsxTableStyleInfo{
  180. Name: formatSet.TableStyle,
  181. ShowFirstColumn: formatSet.ShowFirstColumn,
  182. ShowLastColumn: formatSet.ShowLastColumn,
  183. ShowRowStripes: formatSet.ShowRowStripes,
  184. ShowColumnStripes: formatSet.ShowColumnStripes,
  185. },
  186. }
  187. sheetRelationshipsTableXML := "../tables/table" + strconv.Itoa(tableID) + ".xml"
  188. tableXML := strings.Replace(sheetRelationshipsTableXML, "..", "xl", -1)
  189. // Add first table for given sheet.
  190. sheetPath := sw.File.sheetMap[trimSheetName(sw.Sheet)]
  191. sheetRels := "xl/worksheets/_rels/" + strings.TrimPrefix(sheetPath, "xl/worksheets/") + ".rels"
  192. rID := sw.File.addRels(sheetRels, SourceRelationshipTable, sheetRelationshipsTableXML, "")
  193. sw.tableParts = fmt.Sprintf(`<tableParts count="1"><tablePart r:id="rId%d"></tablePart></tableParts>`, rID)
  194. sw.File.addContentTypePart(tableID, "table")
  195. b, _ := xml.Marshal(table)
  196. sw.File.saveFileList(tableXML, b)
  197. return nil
  198. }
  199. // Extract values from a row in the StreamWriter.
  200. func (sw *StreamWriter) getRowValues(hrow, hcol, vcol int) (res []string, err error) {
  201. res = make([]string, vcol-hcol+1)
  202. r, err := sw.rawData.Reader()
  203. if err != nil {
  204. return nil, err
  205. }
  206. dec := sw.File.xmlNewDecoder(r)
  207. for {
  208. token, err := dec.Token()
  209. if err == io.EOF {
  210. return res, nil
  211. }
  212. if err != nil {
  213. return nil, err
  214. }
  215. startElement, ok := getRowElement(token, hrow)
  216. if !ok {
  217. continue
  218. }
  219. // decode cells
  220. var row xlsxRow
  221. if err := dec.DecodeElement(&row, &startElement); err != nil {
  222. return nil, err
  223. }
  224. for _, c := range row.C {
  225. col, _, err := CellNameToCoordinates(c.R)
  226. if err != nil {
  227. return nil, err
  228. }
  229. if col < hcol || col > vcol {
  230. continue
  231. }
  232. res[col-hcol] = c.V
  233. }
  234. return res, nil
  235. }
  236. }
  237. // Check if the token is an XLSX row with the matching row number.
  238. func getRowElement(token xml.Token, hrow int) (startElement xml.StartElement, ok bool) {
  239. startElement, ok = token.(xml.StartElement)
  240. if !ok {
  241. return
  242. }
  243. ok = startElement.Name.Local == "row"
  244. if !ok {
  245. return
  246. }
  247. ok = false
  248. for _, attr := range startElement.Attr {
  249. if attr.Name.Local != "r" {
  250. continue
  251. }
  252. row, _ := strconv.Atoi(attr.Value)
  253. if row == hrow {
  254. ok = true
  255. return
  256. }
  257. }
  258. return
  259. }
  260. // Cell can be used directly in StreamWriter.SetRow to specify a style and
  261. // a value.
  262. type Cell struct {
  263. StyleID int
  264. Formula string
  265. Value interface{}
  266. }
  267. // RowOpts define the options for set row.
  268. type RowOpts struct {
  269. Height float64
  270. Hidden bool
  271. }
  272. // SetRow writes an array to stream rows by giving a worksheet name, starting
  273. // coordinate and a pointer to an array of values. Note that you must call the
  274. // 'Flush' method to end the streaming writing process.
  275. //
  276. // As a special case, if Cell is used as a value, then the Cell.StyleID will be
  277. // applied to that cell.
  278. func (sw *StreamWriter) SetRow(axis string, values []interface{}, opts ...RowOpts) error {
  279. col, row, err := CellNameToCoordinates(axis)
  280. if err != nil {
  281. return err
  282. }
  283. if !sw.sheetWritten {
  284. if len(sw.cols) > 0 {
  285. _, _ = sw.rawData.WriteString("<cols>" + sw.cols + "</cols>")
  286. }
  287. _, _ = sw.rawData.WriteString(`<sheetData>`)
  288. sw.sheetWritten = true
  289. }
  290. attrs, err := marshalRowAttrs(opts...)
  291. if err != nil {
  292. return err
  293. }
  294. fmt.Fprintf(&sw.rawData, `<row r="%d"%s>`, row, attrs)
  295. for i, val := range values {
  296. axis, err := CoordinatesToCellName(col+i, row)
  297. if err != nil {
  298. return err
  299. }
  300. c := xlsxC{R: axis}
  301. if v, ok := val.(Cell); ok {
  302. c.S = v.StyleID
  303. val = v.Value
  304. setCellFormula(&c, v.Formula)
  305. } else if v, ok := val.(*Cell); ok && v != nil {
  306. c.S = v.StyleID
  307. val = v.Value
  308. setCellFormula(&c, v.Formula)
  309. }
  310. if err = setCellValFunc(&c, val); err != nil {
  311. _, _ = sw.rawData.WriteString(`</row>`)
  312. return err
  313. }
  314. writeCell(&sw.rawData, c)
  315. }
  316. _, _ = sw.rawData.WriteString(`</row>`)
  317. return sw.rawData.Sync()
  318. }
  319. // marshalRowAttrs prepare attributes of the row by given options.
  320. func marshalRowAttrs(opts ...RowOpts) (attrs string, err error) {
  321. var opt *RowOpts
  322. for _, o := range opts {
  323. opt = &o
  324. }
  325. if opt == nil {
  326. return
  327. }
  328. if opt.Height > MaxRowHeight {
  329. err = ErrMaxRowHeight
  330. return
  331. }
  332. if opt.Height > 0 {
  333. attrs += fmt.Sprintf(` ht="%v" customHeight="true"`, opt.Height)
  334. }
  335. if opt.Hidden {
  336. attrs += ` hidden="true"`
  337. }
  338. return
  339. }
  340. // SetColWidth provides a function to set the width of a single column or
  341. // multiple columns for the StreamWriter. Note that you must call
  342. // the 'SetColWidth' function before the 'SetRow' function. For example set
  343. // the width column B:C as 20:
  344. //
  345. // err := streamWriter.SetColWidth(2, 3, 20)
  346. //
  347. func (sw *StreamWriter) SetColWidth(min, max int, width float64) error {
  348. if sw.sheetWritten {
  349. return ErrStreamSetColWidth
  350. }
  351. if min > TotalColumns || max > TotalColumns {
  352. return ErrColumnNumber
  353. }
  354. if min < 1 || max < 1 {
  355. return ErrColumnNumber
  356. }
  357. if width > MaxColumnWidth {
  358. return ErrColumnWidth
  359. }
  360. if min > max {
  361. min, max = max, min
  362. }
  363. sw.cols += fmt.Sprintf(`<col min="%d" max="%d" width="%f" customWidth="1"/>`, min, max, width)
  364. return nil
  365. }
  366. // MergeCell provides a function to merge cells by a given coordinate area for
  367. // the StreamWriter. Don't create a merged cell that overlaps with another
  368. // existing merged cell.
  369. func (sw *StreamWriter) MergeCell(hcell, vcell string) error {
  370. _, err := areaRangeToCoordinates(hcell, vcell)
  371. if err != nil {
  372. return err
  373. }
  374. sw.mergeCellsCount++
  375. sw.mergeCells += fmt.Sprintf(`<mergeCell ref="%s:%s"/>`, hcell, vcell)
  376. return nil
  377. }
  378. // setCellFormula provides a function to set formula of a cell.
  379. func setCellFormula(c *xlsxC, formula string) {
  380. if formula != "" {
  381. c.F = &xlsxF{Content: formula}
  382. }
  383. }
  384. // setCellValFunc provides a function to set value of a cell.
  385. func setCellValFunc(c *xlsxC, val interface{}) (err error) {
  386. switch val := val.(type) {
  387. case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64:
  388. err = setCellIntFunc(c, val)
  389. case float32:
  390. c.T, c.V = setCellFloat(float64(val), -1, 32)
  391. case float64:
  392. c.T, c.V = setCellFloat(val, -1, 64)
  393. case string:
  394. c.T, c.V, c.XMLSpace = setCellStr(val)
  395. case []byte:
  396. c.T, c.V, c.XMLSpace = setCellStr(string(val))
  397. case time.Duration:
  398. c.T, c.V = setCellDuration(val)
  399. case time.Time:
  400. c.T, c.V, _, err = setCellTime(val)
  401. case bool:
  402. c.T, c.V = setCellBool(val)
  403. case nil:
  404. c.T, c.V, c.XMLSpace = setCellStr("")
  405. default:
  406. c.T, c.V, c.XMLSpace = setCellStr(fmt.Sprint(val))
  407. }
  408. return err
  409. }
  410. // setCellIntFunc is a wrapper of SetCellInt.
  411. func setCellIntFunc(c *xlsxC, val interface{}) (err error) {
  412. switch val := val.(type) {
  413. case int:
  414. c.T, c.V = setCellInt(val)
  415. case int8:
  416. c.T, c.V = setCellInt(int(val))
  417. case int16:
  418. c.T, c.V = setCellInt(int(val))
  419. case int32:
  420. c.T, c.V = setCellInt(int(val))
  421. case int64:
  422. c.T, c.V = setCellInt(int(val))
  423. case uint:
  424. c.T, c.V = setCellInt(int(val))
  425. case uint8:
  426. c.T, c.V = setCellInt(int(val))
  427. case uint16:
  428. c.T, c.V = setCellInt(int(val))
  429. case uint32:
  430. c.T, c.V = setCellInt(int(val))
  431. case uint64:
  432. c.T, c.V = setCellInt(int(val))
  433. default:
  434. }
  435. return
  436. }
  437. func writeCell(buf *bufferedWriter, c xlsxC) {
  438. _, _ = buf.WriteString(`<c`)
  439. if c.XMLSpace.Value != "" {
  440. fmt.Fprintf(buf, ` xml:%s="%s"`, c.XMLSpace.Name.Local, c.XMLSpace.Value)
  441. }
  442. fmt.Fprintf(buf, ` r="%s"`, c.R)
  443. if c.S != 0 {
  444. fmt.Fprintf(buf, ` s="%d"`, c.S)
  445. }
  446. if c.T != "" {
  447. fmt.Fprintf(buf, ` t="%s"`, c.T)
  448. }
  449. _, _ = buf.WriteString(`>`)
  450. if c.F != nil {
  451. _, _ = buf.WriteString(`<f>`)
  452. _ = xml.EscapeText(buf, []byte(c.F.Content))
  453. _, _ = buf.WriteString(`</f>`)
  454. }
  455. if c.V != "" {
  456. _, _ = buf.WriteString(`<v>`)
  457. _ = xml.EscapeText(buf, []byte(c.V))
  458. _, _ = buf.WriteString(`</v>`)
  459. }
  460. _, _ = buf.WriteString(`</c>`)
  461. }
  462. // Flush ending the streaming writing process.
  463. func (sw *StreamWriter) Flush() error {
  464. if !sw.sheetWritten {
  465. _, _ = sw.rawData.WriteString(`<sheetData>`)
  466. sw.sheetWritten = true
  467. }
  468. _, _ = sw.rawData.WriteString(`</sheetData>`)
  469. bulkAppendFields(&sw.rawData, sw.worksheet, 8, 15)
  470. if sw.mergeCellsCount > 0 {
  471. sw.mergeCells = fmt.Sprintf(`<mergeCells count="%d">%s</mergeCells>`, sw.mergeCellsCount, sw.mergeCells)
  472. }
  473. _, _ = sw.rawData.WriteString(sw.mergeCells)
  474. bulkAppendFields(&sw.rawData, sw.worksheet, 17, 38)
  475. _, _ = sw.rawData.WriteString(sw.tableParts)
  476. bulkAppendFields(&sw.rawData, sw.worksheet, 40, 40)
  477. _, _ = sw.rawData.WriteString(`</worksheet>`)
  478. if err := sw.rawData.Flush(); err != nil {
  479. return err
  480. }
  481. sheetPath := sw.File.sheetMap[trimSheetName(sw.Sheet)]
  482. sw.File.Sheet.Delete(sheetPath)
  483. delete(sw.File.checked, sheetPath)
  484. sw.File.Pkg.Delete(sheetPath)
  485. return nil
  486. }
  487. // bulkAppendFields bulk-appends fields in a worksheet by specified field
  488. // names order range.
  489. func bulkAppendFields(w io.Writer, ws *xlsxWorksheet, from, to int) {
  490. s := reflect.ValueOf(ws).Elem()
  491. enc := xml.NewEncoder(w)
  492. for i := 0; i < s.NumField(); i++ {
  493. if from <= i && i <= to {
  494. _ = enc.Encode(s.Field(i).Interface())
  495. }
  496. }
  497. }
  498. // bufferedWriter uses a temp file to store an extended buffer. Writes are
  499. // always made to an in-memory buffer, which will always succeed. The buffer
  500. // is written to the temp file with Sync, which may return an error.
  501. // Therefore, Sync should be periodically called and the error checked.
  502. type bufferedWriter struct {
  503. tmp *os.File
  504. buf bytes.Buffer
  505. }
  506. // Write to the in-memory buffer. The err is always nil.
  507. func (bw *bufferedWriter) Write(p []byte) (n int, err error) {
  508. return bw.buf.Write(p)
  509. }
  510. // WriteString wites to the in-memory buffer. The err is always nil.
  511. func (bw *bufferedWriter) WriteString(p string) (n int, err error) {
  512. return bw.buf.WriteString(p)
  513. }
  514. // Reader provides read-access to the underlying buffer/file.
  515. func (bw *bufferedWriter) Reader() (io.Reader, error) {
  516. if bw.tmp == nil {
  517. return bytes.NewReader(bw.buf.Bytes()), nil
  518. }
  519. if err := bw.Flush(); err != nil {
  520. return nil, err
  521. }
  522. fi, err := bw.tmp.Stat()
  523. if err != nil {
  524. return nil, err
  525. }
  526. // os.File.ReadAt does not affect the cursor position and is safe to use here
  527. return io.NewSectionReader(bw.tmp, 0, fi.Size()), nil
  528. }
  529. // Sync will write the in-memory buffer to a temp file, if the in-memory
  530. // buffer has grown large enough. Any error will be returned.
  531. func (bw *bufferedWriter) Sync() (err error) {
  532. // Try to use local storage
  533. if bw.buf.Len() < StreamChunkSize {
  534. return nil
  535. }
  536. if bw.tmp == nil {
  537. bw.tmp, err = ioutil.TempFile(os.TempDir(), "excelize-")
  538. if err != nil {
  539. // can not use local storage
  540. return nil
  541. }
  542. }
  543. return bw.Flush()
  544. }
  545. // Flush the entire in-memory buffer to the temp file, if a temp file is being
  546. // used.
  547. func (bw *bufferedWriter) Flush() error {
  548. if bw.tmp == nil {
  549. return nil
  550. }
  551. _, err := bw.buf.WriteTo(bw.tmp)
  552. if err != nil {
  553. return err
  554. }
  555. bw.buf.Reset()
  556. return nil
  557. }
  558. // Close the underlying temp file and reset the in-memory buffer.
  559. func (bw *bufferedWriter) Close() error {
  560. bw.buf.Reset()
  561. if bw.tmp == nil {
  562. return nil
  563. }
  564. defer os.Remove(bw.tmp.Name())
  565. return bw.tmp.Close()
  566. }