lib.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. // Copyright 2016 - 2021 The excelize Authors. All rights reserved. Use of
  2. // this source code is governed by a BSD-style license that can be found in
  3. // the LICENSE file.
  4. //
  5. // Package excelize providing a set of functions that allow you to write to
  6. // and read from XLSX / XLSM / XLTM files. Supports reading and writing
  7. // spreadsheet documents generated by Microsoft Excel™ 2007 and later. Supports
  8. // complex components by high compatibility, and provided streaming API for
  9. // generating or reading data from a worksheet with huge amounts of data. This
  10. // library needs Go version 1.15 or later.
  11. package excelize
  12. import (
  13. "archive/zip"
  14. "bytes"
  15. "container/list"
  16. "encoding/xml"
  17. "fmt"
  18. "io"
  19. "regexp"
  20. "strconv"
  21. "strings"
  22. "unicode"
  23. )
  24. // ReadZipReader can be used to read the spreadsheet in memory without touching the
  25. // filesystem.
  26. func ReadZipReader(r *zip.Reader) (map[string][]byte, int, error) {
  27. var err error
  28. var docPart = map[string]string{
  29. "[content_types].xml": "[Content_Types].xml",
  30. "xl/sharedstrings.xml": "xl/sharedStrings.xml",
  31. }
  32. fileList := make(map[string][]byte, len(r.File))
  33. worksheets := 0
  34. for _, v := range r.File {
  35. fileName := strings.Replace(v.Name, "\\", "/", -1)
  36. if partName, ok := docPart[strings.ToLower(fileName)]; ok {
  37. fileName = partName
  38. }
  39. if fileList[fileName], err = readFile(v); err != nil {
  40. return nil, 0, err
  41. }
  42. if strings.HasPrefix(fileName, "xl/worksheets/sheet") {
  43. worksheets++
  44. }
  45. }
  46. return fileList, worksheets, nil
  47. }
  48. // readXML provides a function to read XML content as string.
  49. func (f *File) readXML(name string) []byte {
  50. if content, _ := f.Pkg.Load(name); content != nil {
  51. return content.([]byte)
  52. }
  53. if content, ok := f.streams[name]; ok {
  54. return content.rawData.buf.Bytes()
  55. }
  56. return []byte{}
  57. }
  58. // saveFileList provides a function to update given file content in file list
  59. // of XLSX.
  60. func (f *File) saveFileList(name string, content []byte) {
  61. f.Pkg.Store(name, append([]byte(XMLHeader), content...))
  62. }
  63. // Read file content as string in a archive file.
  64. func readFile(file *zip.File) ([]byte, error) {
  65. rc, err := file.Open()
  66. if err != nil {
  67. return nil, err
  68. }
  69. dat := make([]byte, 0, file.FileInfo().Size())
  70. buff := bytes.NewBuffer(dat)
  71. _, _ = io.Copy(buff, rc)
  72. rc.Close()
  73. return buff.Bytes(), nil
  74. }
  75. // SplitCellName splits cell name to column name and row number.
  76. //
  77. // Example:
  78. //
  79. // excelize.SplitCellName("AK74") // return "AK", 74, nil
  80. //
  81. func SplitCellName(cell string) (string, int, error) {
  82. alpha := func(r rune) bool {
  83. return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z')
  84. }
  85. if strings.IndexFunc(cell, alpha) == 0 {
  86. i := strings.LastIndexFunc(cell, alpha)
  87. if i >= 0 && i < len(cell)-1 {
  88. col, rowstr := cell[:i+1], cell[i+1:]
  89. if row, err := strconv.Atoi(rowstr); err == nil && row > 0 {
  90. return col, row, nil
  91. }
  92. }
  93. }
  94. return "", -1, newInvalidCellNameError(cell)
  95. }
  96. // JoinCellName joins cell name from column name and row number.
  97. func JoinCellName(col string, row int) (string, error) {
  98. normCol := strings.Map(func(rune rune) rune {
  99. switch {
  100. case 'A' <= rune && rune <= 'Z':
  101. return rune
  102. case 'a' <= rune && rune <= 'z':
  103. return rune - 32
  104. }
  105. return -1
  106. }, col)
  107. if len(col) == 0 || len(col) != len(normCol) {
  108. return "", newInvalidColumnNameError(col)
  109. }
  110. if row < 1 {
  111. return "", newInvalidRowNumberError(row)
  112. }
  113. return normCol + strconv.Itoa(row), nil
  114. }
  115. // ColumnNameToNumber provides a function to convert Excel sheet column name
  116. // to int. Column name case insensitive. The function returns an error if
  117. // column name incorrect.
  118. //
  119. // Example:
  120. //
  121. // excelize.ColumnNameToNumber("AK") // returns 37, nil
  122. //
  123. func ColumnNameToNumber(name string) (int, error) {
  124. if len(name) == 0 {
  125. return -1, newInvalidColumnNameError(name)
  126. }
  127. col := 0
  128. multi := 1
  129. for i := len(name) - 1; i >= 0; i-- {
  130. r := name[i]
  131. if r >= 'A' && r <= 'Z' {
  132. col += int(r-'A'+1) * multi
  133. } else if r >= 'a' && r <= 'z' {
  134. col += int(r-'a'+1) * multi
  135. } else {
  136. return -1, newInvalidColumnNameError(name)
  137. }
  138. multi *= 26
  139. }
  140. if col > TotalColumns {
  141. return -1, ErrColumnNumber
  142. }
  143. return col, nil
  144. }
  145. // ColumnNumberToName provides a function to convert the integer to Excel
  146. // sheet column title.
  147. //
  148. // Example:
  149. //
  150. // excelize.ColumnNumberToName(37) // returns "AK", nil
  151. //
  152. func ColumnNumberToName(num int) (string, error) {
  153. if num < 1 {
  154. return "", fmt.Errorf("incorrect column number %d", num)
  155. }
  156. if num > TotalColumns {
  157. return "", ErrColumnNumber
  158. }
  159. var col string
  160. for num > 0 {
  161. col = string(rune((num-1)%26+65)) + col
  162. num = (num - 1) / 26
  163. }
  164. return col, nil
  165. }
  166. // CellNameToCoordinates converts alphanumeric cell name to [X, Y] coordinates
  167. // or returns an error.
  168. //
  169. // Example:
  170. //
  171. // excelize.CellNameToCoordinates("A1") // returns 1, 1, nil
  172. // excelize.CellNameToCoordinates("Z3") // returns 26, 3, nil
  173. //
  174. func CellNameToCoordinates(cell string) (int, int, error) {
  175. const msg = "cannot convert cell %q to coordinates: %v"
  176. colname, row, err := SplitCellName(cell)
  177. if err != nil {
  178. return -1, -1, fmt.Errorf(msg, cell, err)
  179. }
  180. if row > TotalRows {
  181. return -1, -1, fmt.Errorf("row number exceeds maximum limit")
  182. }
  183. col, err := ColumnNameToNumber(colname)
  184. return col, row, err
  185. }
  186. // CoordinatesToCellName converts [X, Y] coordinates to alpha-numeric cell
  187. // name or returns an error.
  188. //
  189. // Example:
  190. //
  191. // excelize.CoordinatesToCellName(1, 1) // returns "A1", nil
  192. // excelize.CoordinatesToCellName(1, 1, true) // returns "$A$1", nil
  193. //
  194. func CoordinatesToCellName(col, row int, abs ...bool) (string, error) {
  195. if col < 1 || row < 1 {
  196. return "", fmt.Errorf("invalid cell coordinates [%d, %d]", col, row)
  197. }
  198. sign := ""
  199. for _, a := range abs {
  200. if a {
  201. sign = "$"
  202. }
  203. }
  204. colname, err := ColumnNumberToName(col)
  205. return sign + colname + sign + strconv.Itoa(row), err
  206. }
  207. // boolPtr returns a pointer to a bool with the given value.
  208. func boolPtr(b bool) *bool { return &b }
  209. // intPtr returns a pointer to a int with the given value.
  210. func intPtr(i int) *int { return &i }
  211. // float64Ptr returns a pofloat64er to a float64 with the given value.
  212. func float64Ptr(f float64) *float64 { return &f }
  213. // stringPtr returns a pointer to a string with the given value.
  214. func stringPtr(s string) *string { return &s }
  215. // defaultTrue returns true if b is nil, or the pointed value.
  216. func defaultTrue(b *bool) bool {
  217. if b == nil {
  218. return true
  219. }
  220. return *b
  221. }
  222. // parseFormatSet provides a method to convert format string to []byte and
  223. // handle empty string.
  224. func parseFormatSet(formatSet string) []byte {
  225. if formatSet != "" {
  226. return []byte(formatSet)
  227. }
  228. return []byte("{}")
  229. }
  230. // namespaceStrictToTransitional provides a method to convert Strict and
  231. // Transitional namespaces.
  232. func namespaceStrictToTransitional(content []byte) []byte {
  233. var namespaceTranslationDic = map[string]string{
  234. StrictSourceRelationship: SourceRelationship.Value,
  235. StrictSourceRelationshipOfficeDocument: SourceRelationshipOfficeDocument,
  236. StrictSourceRelationshipChart: SourceRelationshipChart,
  237. StrictSourceRelationshipComments: SourceRelationshipComments,
  238. StrictSourceRelationshipImage: SourceRelationshipImage,
  239. StrictNameSpaceSpreadSheet: NameSpaceSpreadSheet.Value,
  240. }
  241. for s, n := range namespaceTranslationDic {
  242. content = bytesReplace(content, []byte(s), []byte(n), -1)
  243. }
  244. return content
  245. }
  246. // bytesReplace replace old bytes with given new.
  247. func bytesReplace(s, old, new []byte, n int) []byte {
  248. if n == 0 {
  249. return s
  250. }
  251. if len(old) < len(new) {
  252. return bytes.Replace(s, old, new, n)
  253. }
  254. if n < 0 {
  255. n = len(s)
  256. }
  257. var wid, i, j, w int
  258. for i, j = 0, 0; i < len(s) && j < n; j++ {
  259. wid = bytes.Index(s[i:], old)
  260. if wid < 0 {
  261. break
  262. }
  263. w += copy(s[w:], s[i:i+wid])
  264. w += copy(s[w:], new)
  265. i += wid + len(old)
  266. }
  267. w += copy(s[w:], s[i:])
  268. return s[0:w]
  269. }
  270. // genSheetPasswd provides a method to generate password for worksheet
  271. // protection by given plaintext. When an Excel sheet is being protected with
  272. // a password, a 16-bit (two byte) long hash is generated. To verify a
  273. // password, it is compared to the hash. Obviously, if the input data volume
  274. // is great, numerous passwords will match the same hash. Here is the
  275. // algorithm to create the hash value:
  276. //
  277. // take the ASCII values of all characters shift left the first character 1 bit,
  278. // the second 2 bits and so on (use only the lower 15 bits and rotate all higher bits,
  279. // the highest bit of the 16-bit value is always 0 [signed short])
  280. // XOR all these values
  281. // XOR the count of characters
  282. // XOR the constant 0xCE4B
  283. func genSheetPasswd(plaintext string) string {
  284. var password int64 = 0x0000
  285. var charPos uint = 1
  286. for _, v := range plaintext {
  287. value := int64(v) << charPos
  288. charPos++
  289. rotatedBits := value >> 15 // rotated bits beyond bit 15
  290. value &= 0x7fff // first 15 bits
  291. password ^= (value | rotatedBits)
  292. }
  293. password ^= int64(len(plaintext))
  294. password ^= 0xCE4B
  295. return strings.ToUpper(strconv.FormatInt(password, 16))
  296. }
  297. // getRootElement extract root element attributes by given XML decoder.
  298. func getRootElement(d *xml.Decoder) []xml.Attr {
  299. tokenIdx := 0
  300. for {
  301. token, _ := d.Token()
  302. if token == nil {
  303. break
  304. }
  305. switch startElement := token.(type) {
  306. case xml.StartElement:
  307. tokenIdx++
  308. if tokenIdx == 1 {
  309. return startElement.Attr
  310. }
  311. }
  312. }
  313. return nil
  314. }
  315. // genXMLNamespace generate serialized XML attributes with a multi namespace
  316. // by given element attributes.
  317. func genXMLNamespace(attr []xml.Attr) string {
  318. var rootElement string
  319. for _, v := range attr {
  320. if lastSpace := getXMLNamespace(v.Name.Space, attr); lastSpace != "" {
  321. if lastSpace == NameSpaceXML {
  322. lastSpace = "xml"
  323. }
  324. rootElement += fmt.Sprintf("%s:%s=\"%s\" ", lastSpace, v.Name.Local, v.Value)
  325. continue
  326. }
  327. rootElement += fmt.Sprintf("%s=\"%s\" ", v.Name.Local, v.Value)
  328. }
  329. return strings.TrimSpace(rootElement) + ">"
  330. }
  331. // getXMLNamespace extract XML namespace from specified element name and attributes.
  332. func getXMLNamespace(space string, attr []xml.Attr) string {
  333. for _, attribute := range attr {
  334. if attribute.Value == space {
  335. return attribute.Name.Local
  336. }
  337. }
  338. return space
  339. }
  340. // replaceNameSpaceBytes provides a function to replace the XML root element
  341. // attribute by the given component part path and XML content.
  342. func (f *File) replaceNameSpaceBytes(path string, contentMarshal []byte) []byte {
  343. var oldXmlns = []byte(`xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">`)
  344. var newXmlns = []byte(templateNamespaceIDMap)
  345. if attr, ok := f.xmlAttr[path]; ok {
  346. newXmlns = []byte(genXMLNamespace(attr))
  347. }
  348. return bytesReplace(contentMarshal, oldXmlns, newXmlns, -1)
  349. }
  350. // addNameSpaces provides a function to add a XML attribute by the given
  351. // component part path.
  352. func (f *File) addNameSpaces(path string, ns xml.Attr) {
  353. exist := false
  354. mc := false
  355. ignore := -1
  356. if attr, ok := f.xmlAttr[path]; ok {
  357. for i, attribute := range attr {
  358. if attribute.Name.Local == ns.Name.Local && attribute.Name.Space == ns.Name.Space {
  359. exist = true
  360. }
  361. if attribute.Name.Local == "Ignorable" && getXMLNamespace(attribute.Name.Space, attr) == "mc" {
  362. ignore = i
  363. }
  364. if attribute.Name.Local == "mc" && attribute.Name.Space == "xmlns" {
  365. mc = true
  366. }
  367. }
  368. }
  369. if !exist {
  370. f.xmlAttr[path] = append(f.xmlAttr[path], ns)
  371. if !mc {
  372. f.xmlAttr[path] = append(f.xmlAttr[path], SourceRelationshipCompatibility)
  373. }
  374. if ignore == -1 {
  375. f.xmlAttr[path] = append(f.xmlAttr[path], xml.Attr{
  376. Name: xml.Name{Local: "Ignorable", Space: "mc"},
  377. Value: ns.Name.Local,
  378. })
  379. return
  380. }
  381. f.setIgnorableNameSpace(path, ignore, ns)
  382. }
  383. }
  384. // setIgnorableNameSpace provides a function to set XML namespace as ignorable
  385. // by the given attribute.
  386. func (f *File) setIgnorableNameSpace(path string, index int, ns xml.Attr) {
  387. ignorableNS := []string{"c14", "cdr14", "a14", "pic14", "x14", "xdr14", "x14ac", "dsp", "mso14", "dgm14", "x15", "x12ac", "x15ac", "xr", "xr2", "xr3", "xr4", "xr5", "xr6", "xr7", "xr8", "xr9", "xr10", "xr11", "xr12", "xr13", "xr14", "xr15", "x15", "x16", "x16r2", "mo", "mx", "mv", "o", "v"}
  388. if inStrSlice(strings.Fields(f.xmlAttr[path][index].Value), ns.Name.Local) == -1 && inStrSlice(ignorableNS, ns.Name.Local) != -1 {
  389. f.xmlAttr[path][index].Value = strings.TrimSpace(fmt.Sprintf("%s %s", f.xmlAttr[path][index].Value, ns.Name.Local))
  390. }
  391. }
  392. // addSheetNameSpace add XML attribute for worksheet.
  393. func (f *File) addSheetNameSpace(sheet string, ns xml.Attr) {
  394. name := f.sheetMap[trimSheetName(sheet)]
  395. f.addNameSpaces(name, ns)
  396. }
  397. // isNumeric determines whether an expression is a valid numeric type and get
  398. // the precision for the numeric.
  399. func isNumeric(s string) (bool, int) {
  400. dot := false
  401. p := 0
  402. for i, v := range s {
  403. if v == '.' {
  404. if dot {
  405. return false, 0
  406. }
  407. dot = true
  408. } else if v < '0' || v > '9' {
  409. if i == 0 && v == '-' {
  410. continue
  411. }
  412. return false, 0
  413. } else if dot {
  414. p++
  415. }
  416. }
  417. return true, p
  418. }
  419. var (
  420. bstrExp = regexp.MustCompile(`_x[a-zA-Z\d]{4}_`)
  421. bstrEscapeExp = regexp.MustCompile(`x[a-zA-Z\d]{4}_`)
  422. )
  423. // bstrUnmarshal parses the binary basic string, this will trim escaped string
  424. // literal which not permitted in an XML 1.0 document. The basic string
  425. // variant type can store any valid Unicode character. Unicode characters
  426. // that cannot be directly represented in XML as defined by the XML 1.0
  427. // specification, shall be escaped using the Unicode numerical character
  428. // representation escape character format _xHHHH_, where H represents a
  429. // hexadecimal character in the character's value. For example: The Unicode
  430. // character 8 is not permitted in an XML 1.0 document, so it shall be
  431. // escaped as _x0008_. To store the literal form of an escape sequence, the
  432. // initial underscore shall itself be escaped (i.e. stored as _x005F_). For
  433. // example: The string literal _x0008_ would be stored as _x005F_x0008_.
  434. func bstrUnmarshal(s string) (result string) {
  435. matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0
  436. for _, match := range matches {
  437. result += s[cursor:match[0]]
  438. subStr := s[match[0]:match[1]]
  439. if subStr == "_x005F_" {
  440. cursor = match[1]
  441. if l > match[1]+6 && !bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) {
  442. result += subStr
  443. continue
  444. }
  445. result += "_"
  446. continue
  447. }
  448. if bstrExp.MatchString(subStr) {
  449. cursor = match[1]
  450. v, err := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`)
  451. if err != nil {
  452. if l > match[1]+6 && bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) {
  453. result += subStr[:6]
  454. cursor = match[1] + 6
  455. continue
  456. }
  457. result += subStr
  458. continue
  459. }
  460. hasRune := false
  461. for _, c := range v {
  462. if unicode.IsControl(c) {
  463. hasRune = true
  464. }
  465. }
  466. if !hasRune {
  467. result += v
  468. }
  469. }
  470. }
  471. if cursor < l {
  472. result += s[cursor:]
  473. }
  474. return result
  475. }
  476. // bstrMarshal encode the escaped string literal which not permitted in an XML
  477. // 1.0 document.
  478. func bstrMarshal(s string) (result string) {
  479. matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0
  480. for _, match := range matches {
  481. result += s[cursor:match[0]]
  482. subStr := s[match[0]:match[1]]
  483. if subStr == "_x005F_" {
  484. cursor = match[1]
  485. if match[1]+6 <= l && bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) {
  486. _, err := strconv.Unquote(`"\u` + s[match[1]+1:match[1]+5] + `"`)
  487. if err == nil {
  488. result += subStr + "x005F" + subStr
  489. continue
  490. }
  491. }
  492. result += subStr + "x005F_"
  493. continue
  494. }
  495. if bstrExp.MatchString(subStr) {
  496. cursor = match[1]
  497. _, err := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`)
  498. if err == nil {
  499. result += "_x005F" + subStr
  500. continue
  501. }
  502. result += subStr
  503. }
  504. }
  505. if cursor < l {
  506. result += s[cursor:]
  507. }
  508. return result
  509. }
  510. // Stack defined an abstract data type that serves as a collection of elements.
  511. type Stack struct {
  512. list *list.List
  513. }
  514. // NewStack create a new stack.
  515. func NewStack() *Stack {
  516. list := list.New()
  517. return &Stack{list}
  518. }
  519. // Push a value onto the top of the stack.
  520. func (stack *Stack) Push(value interface{}) {
  521. stack.list.PushBack(value)
  522. }
  523. // Pop the top item of the stack and return it.
  524. func (stack *Stack) Pop() interface{} {
  525. e := stack.list.Back()
  526. if e != nil {
  527. stack.list.Remove(e)
  528. return e.Value
  529. }
  530. return nil
  531. }
  532. // Peek view the top item on the stack.
  533. func (stack *Stack) Peek() interface{} {
  534. e := stack.list.Back()
  535. if e != nil {
  536. return e.Value
  537. }
  538. return nil
  539. }
  540. // Len return the number of items in the stack.
  541. func (stack *Stack) Len() int {
  542. return stack.list.Len()
  543. }
  544. // Empty the stack.
  545. func (stack *Stack) Empty() bool {
  546. return stack.list.Len() == 0
  547. }