format_code.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645
  1. package xlsx
  2. import (
  3. "errors"
  4. "fmt"
  5. "math"
  6. "strconv"
  7. "strings"
  8. )
  9. // Do not edit these attributes once this struct is created. This struct should only be created by
  10. // parseFullNumberFormatString() from a number format string. If the format for a cell needs to change, change
  11. // the number format string and getNumberFormat() will invalidate the old struct and re-parse the string.
  12. type parsedNumberFormat struct {
  13. numFmt string
  14. isTimeFormat bool
  15. negativeFormatExpectsPositive bool
  16. positiveFormat *formatOptions
  17. negativeFormat *formatOptions
  18. zeroFormat *formatOptions
  19. textFormat *formatOptions
  20. parseEncounteredError *error
  21. }
  22. type formatOptions struct {
  23. isTimeFormat bool
  24. showPercent bool
  25. fullFormatString string
  26. reducedFormatString string
  27. prefix string
  28. suffix string
  29. }
  30. // FormatValue returns a value, and possibly an error condition
  31. // from a Cell. If it is possible to apply a format to the cell
  32. // value, it will do so, if not then an error will be returned, along
  33. // with the raw value of the Cell.
  34. //
  35. // This is the documentation of the "General" Format in the Office Open XML spec:
  36. //
  37. // Numbers
  38. // The application shall attempt to display the full number up to 11 digits (inc. decimal point). If the number is too
  39. // large*, the application shall attempt to show exponential format. If the number has too many significant digits, the
  40. // display shall be truncated. The optimal method of display is based on the available cell width. If the number cannot
  41. // be displayed using any of these formats in the available width, the application shall show "#" across the width of
  42. // the cell.
  43. //
  44. // Conditions for switching to exponential format:
  45. // 1. The cell value shall have at least five digits for xE-xx
  46. // 2. If the exponent is bigger than the size allowed, a floating point number cannot fit, so try exponential notation.
  47. // 3. Similarly, for negative exponents, check if there is space for even one (non-zero) digit in floating point format**.
  48. // 4. Finally, if there isn't room for all of the significant digits in floating point format (for a negative exponent),
  49. // exponential format shall display more digits if the exponent is less than -3. (The 3 is because E-xx takes 4
  50. // characters, and the leading 0 in floating point takes only 1 character. Thus, for an exponent less than -3, there is
  51. // more than 3 additional leading 0's, more than enough to compensate for the size of the E-xx.)
  52. //
  53. // Floating point rule:
  54. // For general formatting in cells, max overall length for cell display is 11, not including negative sign, but includes
  55. // leading zeros and decimal separator.***
  56. //
  57. // Added Notes:
  58. // * "If the number is too large" can also mean "if the number has more than 11 digits", so greater than or equal to
  59. // 1e11 and less than 1e-9.
  60. // ** Means that you should switch to scientific if there would be 9 zeros after the decimal (the decimal and first zero
  61. // count against the 11 character limit), so less than 1e9.
  62. // *** The way this is written, you can get numbers that are more than 11 characters because the golang Float fmt
  63. // does not support adjusting the precision while not padding with zeros, while also not switching to scientific
  64. // notation too early.
  65. func (fullFormat *parsedNumberFormat) FormatValue(cell *Cell) (string, error) {
  66. switch cell.cellType {
  67. case CellTypeError:
  68. // The error type is what XLSX uses in error cases such as when formulas are invalid.
  69. // There will be text in the cell's value that can be shown, something ugly like #NAME? or #######
  70. return cell.Value, nil
  71. case CellTypeBool:
  72. if cell.Value == "0" {
  73. return "FALSE", nil
  74. } else if cell.Value == "1" {
  75. return "TRUE", nil
  76. } else {
  77. return cell.Value, errors.New("invalid value in bool cell")
  78. }
  79. case CellTypeString:
  80. fallthrough
  81. case CellTypeInline:
  82. fallthrough
  83. case CellTypeStringFormula:
  84. textFormat := cell.parsedNumFmt.textFormat
  85. // This switch statement is only for String formats
  86. switch textFormat.reducedFormatString {
  87. case builtInNumFmt[builtInNumFmtIndex_GENERAL]: // General is literally "general"
  88. return cell.Value, nil
  89. case builtInNumFmt[builtInNumFmtIndex_STRING]: // String is "@"
  90. return textFormat.prefix + cell.Value + textFormat.suffix, nil
  91. case "":
  92. // If cell is not "General" and there is not an "@" symbol in the format, then the cell's value is not
  93. // used when determining what to display. It would be completely legal to have a format of "Error"
  94. // for strings, and all values that are not numbers would show up as "Error". In that case, this code would
  95. // have a prefix of "Error" and a reduced format string of "" (empty string).
  96. return textFormat.prefix + textFormat.suffix, nil
  97. default:
  98. return cell.Value, errors.New("invalid or unsupported format, unsupported string format")
  99. }
  100. case CellTypeDate:
  101. // These are dates that are stored in date format instead of being stored as numbers with a format to turn them
  102. // into a date string.
  103. return cell.Value, nil
  104. case CellTypeNumeric:
  105. return fullFormat.formatNumericCell(cell)
  106. default:
  107. return cell.Value, errors.New("unknown cell type")
  108. }
  109. }
  110. func (fullFormat *parsedNumberFormat) formatNumericCell(cell *Cell) (string, error) {
  111. rawValue := strings.TrimSpace(cell.Value)
  112. // If there wasn't a value in the cell, it shouldn't have been marked as Numeric.
  113. // It's better to support this case though.
  114. if rawValue == "" {
  115. return "", nil
  116. }
  117. if fullFormat.isTimeFormat {
  118. return fullFormat.parseTime(rawValue, cell.date1904)
  119. }
  120. var numberFormat *formatOptions
  121. floatVal, floatErr := strconv.ParseFloat(rawValue, 64)
  122. if floatErr != nil {
  123. return rawValue, floatErr
  124. }
  125. // Choose the correct format. There can be different formats for positive, negative, and zero numbers.
  126. // Excel only uses the zero format if the value is literally zero, even if the number is so small that it shows
  127. // up as "0" when the positive format is used.
  128. if floatVal > 0 {
  129. numberFormat = fullFormat.positiveFormat
  130. } else if floatVal < 0 {
  131. // If format string specified a different format for negative numbers, then the number should be made positive
  132. // before getting formatted. The format string itself will contain formatting that denotes a negative number and
  133. // this formatting will end up in the prefix or suffix. Commonly if there is a negative format specified, the
  134. // number will get surrounded by parenthesis instead of showing it with a minus sign.
  135. if fullFormat.negativeFormatExpectsPositive {
  136. floatVal = math.Abs(floatVal)
  137. }
  138. numberFormat = fullFormat.negativeFormat
  139. } else {
  140. numberFormat = fullFormat.zeroFormat
  141. }
  142. // When showPercent is true, multiply the number by 100.
  143. // The percent sign will be in the prefix or suffix already, so it does not need to be added in this function.
  144. // The number format itself will be the same as any other number format once the value is multiplied by 100.
  145. if numberFormat.showPercent {
  146. floatVal = 100 * floatVal
  147. }
  148. // Only the most common format strings are supported here.
  149. // Eventually this switch needs to be replaced with a more general solution.
  150. // Some of these "supported" formats should have thousand separators, but don't get them since Go fmt
  151. // doesn't have a way to request thousands separators.
  152. // The only things that should be supported here are in the array formattingCharacters,
  153. // everything else has been stripped out before and will be placed in the prefix or suffix.
  154. // The formatting characters can have non-formatting characters mixed in with them and those should be maintained.
  155. // However, at this time we fail to parse those formatting codes and they get replaced with "General"
  156. var formattedNum string
  157. switch numberFormat.reducedFormatString {
  158. case builtInNumFmt[builtInNumFmtIndex_GENERAL]: // General is literally "general"
  159. // prefix, showPercent, and suffix cannot apply to the general format
  160. // The logic for showing numbers when the format is "general" is much more complicated than the rest of these.
  161. generalFormatted, err := generalNumericScientific(cell.Value, true)
  162. if err != nil {
  163. return rawValue, nil
  164. }
  165. return generalFormatted, nil
  166. case builtInNumFmt[builtInNumFmtIndex_STRING]: // String is "@"
  167. formattedNum = cell.Value
  168. case builtInNumFmt[builtInNumFmtIndex_INT], "#,##0": // Int is "0"
  169. // Previously this case would cast to int and print with %d, but that will not round the value correctly.
  170. formattedNum = fmt.Sprintf("%.0f", floatVal)
  171. case "0.0", "#,##0.0":
  172. formattedNum = fmt.Sprintf("%.1f", floatVal)
  173. case builtInNumFmt[builtInNumFmtIndex_FLOAT], "#,##0.00": // Float is "0.00"
  174. formattedNum = fmt.Sprintf("%.2f", floatVal)
  175. case "0.000", "#,##0.000":
  176. formattedNum = fmt.Sprintf("%.3f", floatVal)
  177. case "0.0000", "#,##0.0000":
  178. formattedNum = fmt.Sprintf("%.4f", floatVal)
  179. case "0.00e+00", "##0.0e+0":
  180. formattedNum = fmt.Sprintf("%e", floatVal)
  181. case "":
  182. // Do nothing.
  183. default:
  184. return rawValue, nil
  185. }
  186. return numberFormat.prefix + formattedNum + numberFormat.suffix, nil
  187. }
  188. func generalNumericScientific(value string, allowScientific bool) (string, error) {
  189. if strings.TrimSpace(value) == "" {
  190. return "", nil
  191. }
  192. f, err := strconv.ParseFloat(value, 64)
  193. if err != nil {
  194. return value, err
  195. }
  196. if allowScientific {
  197. absF := math.Abs(f)
  198. // When using General format, numbers that are less than 1e-9 (0.000000001) and greater than or equal to
  199. // 1e11 (100,000,000,000) should be shown in scientific notation.
  200. // Numbers less than the number after zero, are assumed to be zero.
  201. if (absF >= math.SmallestNonzeroFloat64 && absF < minNonScientificNumber) || absF >= maxNonScientificNumber {
  202. return strconv.FormatFloat(f, 'E', -1, 64), nil
  203. }
  204. }
  205. // This format (fmt="f", prec=-1) will prevent padding with zeros and will never switch to scientific notation.
  206. // However, it will show more than 11 characters for very precise numbers, and this cannot be changed.
  207. // You could also use fmt="g", prec=11, which doesn't pad with zeros and allows the correct precision,
  208. // but it will use scientific notation on numbers less than 1e-4. That value is hardcoded in Go and cannot be
  209. // configured or disabled.
  210. return strconv.FormatFloat(f, 'f', -1, 64), nil
  211. }
  212. // Format strings are a little strange to compare because empty string
  213. // needs to be taken as general, and general needs to be compared case
  214. // insensitively.
  215. func compareFormatString(fmt1, fmt2 string) bool {
  216. if fmt1 == fmt2 {
  217. return true
  218. }
  219. if fmt1 == "" || strings.EqualFold(fmt1, "general") {
  220. fmt1 = "general"
  221. }
  222. if fmt2 == "" || strings.EqualFold(fmt2, "general") {
  223. fmt2 = "general"
  224. }
  225. return fmt1 == fmt2
  226. }
  227. func parseFullNumberFormatString(numFmt string) *parsedNumberFormat {
  228. parsedNumFmt := &parsedNumberFormat{
  229. numFmt: numFmt,
  230. }
  231. if isTimeFormat(numFmt) {
  232. // Time formats cannot have multiple groups separated by semicolons, there is only one format.
  233. // Strings are unaffected by the time format.
  234. parsedNumFmt.isTimeFormat = true
  235. parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
  236. return parsedNumFmt
  237. }
  238. var fmtOptions []*formatOptions
  239. formats, err := splitFormatOnSemicolon(numFmt)
  240. if err == nil {
  241. for _, formatSection := range formats {
  242. parsedFormat, err := parseNumberFormatSection(formatSection)
  243. if err != nil {
  244. // If an invalid number section is found, fall back to general
  245. parsedFormat = fallbackErrorFormat
  246. parsedNumFmt.parseEncounteredError = &err
  247. }
  248. fmtOptions = append(fmtOptions, parsedFormat)
  249. }
  250. } else {
  251. fmtOptions = append(fmtOptions, fallbackErrorFormat)
  252. parsedNumFmt.parseEncounteredError = &err
  253. }
  254. if len(fmtOptions) > 4 {
  255. fmtOptions = []*formatOptions{fallbackErrorFormat}
  256. err = errors.New("invalid number format, too many format sections")
  257. parsedNumFmt.parseEncounteredError = &err
  258. }
  259. if len(fmtOptions) == 1 {
  260. // If there is only one option, it is used for all
  261. parsedNumFmt.positiveFormat = fmtOptions[0]
  262. parsedNumFmt.negativeFormat = fmtOptions[0]
  263. parsedNumFmt.zeroFormat = fmtOptions[0]
  264. if strings.Contains(fmtOptions[0].fullFormatString, "@") {
  265. parsedNumFmt.textFormat = fmtOptions[0]
  266. } else {
  267. parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
  268. }
  269. } else if len(fmtOptions) == 2 {
  270. // If there are two formats, the first is used for positive and zeros, the second gets used as a negative format,
  271. // and strings are not formatted.
  272. // When negative numbers now have their own format, they should become positive before having the format applied.
  273. // The format will contain a negative sign if it is desired, but they may be colored red or wrapped in
  274. // parenthesis instead.
  275. parsedNumFmt.negativeFormatExpectsPositive = true
  276. parsedNumFmt.positiveFormat = fmtOptions[0]
  277. parsedNumFmt.negativeFormat = fmtOptions[1]
  278. parsedNumFmt.zeroFormat = fmtOptions[0]
  279. parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
  280. } else if len(fmtOptions) == 3 {
  281. // If there are three formats, the first is used for positive, the second gets used as a negative format,
  282. // the third is for negative, and strings are not formatted.
  283. parsedNumFmt.negativeFormatExpectsPositive = true
  284. parsedNumFmt.positiveFormat = fmtOptions[0]
  285. parsedNumFmt.negativeFormat = fmtOptions[1]
  286. parsedNumFmt.zeroFormat = fmtOptions[2]
  287. parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
  288. } else {
  289. // With four options, the first is positive, the second is negative, the third is zero, and the fourth is strings
  290. // Negative numbers should be still become positive before having the negative formatting applied.
  291. parsedNumFmt.negativeFormatExpectsPositive = true
  292. parsedNumFmt.positiveFormat = fmtOptions[0]
  293. parsedNumFmt.negativeFormat = fmtOptions[1]
  294. parsedNumFmt.zeroFormat = fmtOptions[2]
  295. parsedNumFmt.textFormat = fmtOptions[3]
  296. }
  297. return parsedNumFmt
  298. }
  299. // splitFormatOnSemicolon will split the format string into the format sections
  300. // This logic to split the different formats on semicolon is fully correct, and will skip all literal semicolons,
  301. // and will catch all breaking semicolons.
  302. func splitFormatOnSemicolon(format string) ([]string, error) {
  303. var formats []string
  304. prevIndex := 0
  305. for i := 0; i < len(format); i++ {
  306. if format[i] == ';' {
  307. formats = append(formats, format[prevIndex:i])
  308. prevIndex = i + 1
  309. } else if format[i] == '\\' {
  310. i++
  311. } else if format[i] == '"' {
  312. endQuoteIndex := strings.Index(format[i+1:], "\"")
  313. if endQuoteIndex == -1 {
  314. // This is an invalid format string, fall back to general
  315. return nil, errors.New("invalid format string, unmatched double quote")
  316. }
  317. i += endQuoteIndex + 1
  318. }
  319. }
  320. return append(formats, format[prevIndex:]), nil
  321. }
  322. var fallbackErrorFormat = &formatOptions{
  323. fullFormatString: "general",
  324. reducedFormatString: "general",
  325. }
  326. // parseNumberFormatSection takes in individual format and parses out most of the options.
  327. // Some options are parsed, removed from the string, and set as settings on formatOptions.
  328. // There remainder of the format string is put in the reducedFormatString attribute, and supported values for these
  329. // are handled in a switch in the Cell.FormattedValue() function.
  330. // Ideally more and more of the format string would be parsed out here into settings until there is no remainder string
  331. // at all.
  332. // Features that this supports:
  333. // - Time formats are detected, and marked in the options. Time format strings are handled when doing the formatting.
  334. // The logic to detect time formats is currently not correct, and can catch formats that are not time formats as well
  335. // as miss formats that are time formats.
  336. // - Color formats are detected and removed.
  337. // - Currency annotations are handled properly.
  338. // - Literal strings wrapped in quotes are handled and put into prefix or suffix.
  339. // - Numbers that should be percent are detected and marked in the options.
  340. // - Conditionals are detected and removed, but they are not obeyed. The conditional groups will be used just like the
  341. // positive;negative;zero;string format groups. Here is an example of a conditional format: "[Red][<=100];[Blue][>100]"
  342. // Decoding the actual number formatting portion is out of scope, that is placed into reducedFormatString and is used
  343. // when formatting the string. The string there will be reduced to only the things in the formattingCharacters array.
  344. // Everything not in that array has been parsed out and put into formatOptions.
  345. func parseNumberFormatSection(fullFormat string) (*formatOptions, error) {
  346. reducedFormat := strings.TrimSpace(fullFormat)
  347. // general is the only format that does not use the normal format symbols notations
  348. if compareFormatString(reducedFormat, "general") {
  349. return &formatOptions{
  350. fullFormatString: "general",
  351. reducedFormatString: "general",
  352. }, nil
  353. }
  354. prefix, reducedFormat, showPercent1, err := parseLiterals(reducedFormat)
  355. if err != nil {
  356. return nil, err
  357. }
  358. reducedFormat, suffixFormat := splitFormatAndSuffixFormat(reducedFormat)
  359. suffix, remaining, showPercent2, err := parseLiterals(suffixFormat)
  360. if err != nil {
  361. return nil, err
  362. }
  363. if len(remaining) > 0 {
  364. // This paradigm of codes consisting of literals, number formats, then more literals is not always correct, they can
  365. // actually be intertwined. Though 99% of the time number formats will not do this.
  366. // Excel uses this format string for Social Security Numbers: 000\-00\-0000
  367. // and this for US phone numbers: [<=9999999]###\-####;\(###\)\ ###\-####
  368. return nil, errors.New("invalid or unsupported format string")
  369. }
  370. return &formatOptions{
  371. fullFormatString: fullFormat,
  372. isTimeFormat: false,
  373. reducedFormatString: reducedFormat,
  374. prefix: prefix,
  375. suffix: suffix,
  376. showPercent: showPercent1 || showPercent2,
  377. }, nil
  378. }
  379. // formattingCharacters will be left in the reducedNumberFormat
  380. // It is important that these be looked for in order so that the slash cases are handled correctly.
  381. // / (slash) is a fraction format if preceded by 0, #, or ?, otherwise it is not a formatting character
  382. // E- E+ e- e+ are scientific notation, but E, e, -, + are not formatting characters independently
  383. // \ (back slash) makes the next character a literal (not formatting)
  384. // " Anything in double quotes is not a formatting character
  385. // _ (underscore) skips the width of the next character, so the next character cannot be formatting
  386. var formattingCharacters = []string{"0/", "#/", "?/", "E-", "E+", "e-", "e+", "0", "#", "?", ".", ",", "@", "*"}
  387. // The following are also time format characters, but since this is only used for detecting, not decoding, they are
  388. // redundant here: ee, gg, ggg, rr, ss, mm, hh, yyyy, dd, ddd, dddd, mm, mmm, mmmm, mmmmm, ss.0000, ss.000, ss.00, ss.0
  389. // The .00 type format is very tricky, because it only counts if it comes after ss or s or [ss] or [s]
  390. // .00 is actually a valid number format by itself.
  391. var timeFormatCharacters = []string{"m", "d", "yy", "h", "m", "AM/PM", "A/P", "am/pm", "a/p", "r", "g", "e", "b1", "b2", "[hh]", "[h]", "[mm]", "[m]",
  392. "s.0000", "s.000", "s.00", "s.0", "s", "[ss].0000", "[ss].000", "[ss].00", "[ss].0", "[ss]", "[s].0000", "[s].000", "[s].00", "[s].0", "[s]"}
  393. func splitFormatAndSuffixFormat(format string) (string, string) {
  394. var i int
  395. for ; i < len(format); i++ {
  396. curReducedFormat := format[i:]
  397. var found bool
  398. for _, special := range formattingCharacters {
  399. if strings.HasPrefix(curReducedFormat, special) {
  400. // Skip ahead if the special character was longer than length 1
  401. i += len(special) - 1
  402. found = true
  403. break
  404. }
  405. }
  406. if !found {
  407. break
  408. }
  409. }
  410. suffixFormat := format[i:]
  411. format = format[:i]
  412. return format, suffixFormat
  413. }
  414. func parseLiterals(format string) (string, string, bool, error) {
  415. var prefix string
  416. showPercent := false
  417. for i := 0; i < len(format); i++ {
  418. curReducedFormat := format[i:]
  419. switch curReducedFormat[0] {
  420. case '\\':
  421. // If there is a slash, skip the next character, and add it to the prefix
  422. if len(curReducedFormat) > 1 {
  423. i++
  424. prefix += curReducedFormat[1:2]
  425. }
  426. case '_':
  427. // If there is an underscore, skip the next character, but don't add it to the prefix
  428. if len(curReducedFormat) > 1 {
  429. i++
  430. }
  431. case '*':
  432. // Asterisks are used to repeat the next character to fill the full cell width.
  433. // There isn't really a cell size in this context, so this will be ignored.
  434. case '"':
  435. // If there is a quote skip to the next quote, and add the quoted characters to the prefix
  436. endQuoteIndex := strings.Index(curReducedFormat[1:], "\"")
  437. if endQuoteIndex == -1 {
  438. return "", "", false, errors.New("invalid formatting code, unmatched double quote")
  439. }
  440. prefix = prefix + curReducedFormat[1:endQuoteIndex+1]
  441. i += endQuoteIndex + 1
  442. case '%':
  443. showPercent = true
  444. prefix += "%"
  445. case '[':
  446. // Brackets can be currency annotations (e.g. [$$-409])
  447. // color formats (e.g. [color1] through [color56], as well as [red] etc.)
  448. // conditionals (e.g. [>100], the valid conditionals are =, >, <, >=, <=, <>)
  449. bracketIndex := strings.Index(curReducedFormat, "]")
  450. if bracketIndex == -1 {
  451. return "", "", false, errors.New("invalid formatting code, invalid brackets")
  452. }
  453. // Currencies in Excel are annotated with this format: [$<Currency String>-<Language Info>]
  454. // Currency String is something like $, ¥, €, or £
  455. // Language Info is three hexadecimal characters
  456. if len(curReducedFormat) > 2 && curReducedFormat[1] == '$' {
  457. dashIndex := strings.Index(curReducedFormat, "-")
  458. if dashIndex != -1 && dashIndex < bracketIndex {
  459. // Get the currency symbol, and skip to the end of the currency format
  460. prefix += curReducedFormat[2:dashIndex]
  461. } else {
  462. return "", "", false, errors.New("invalid formatting code, invalid currency annotation")
  463. }
  464. }
  465. i += bracketIndex
  466. case '$', '-', '+', '/', '(', ')', ':', '!', '^', '&', '\'', '~', '{', '}', '<', '>', '=', ' ':
  467. // These symbols are allowed to be used as literal without escaping
  468. prefix += curReducedFormat[0:1]
  469. default:
  470. for _, special := range formattingCharacters {
  471. if strings.HasPrefix(curReducedFormat, special) {
  472. // This means we found the start of the actual number formatting portion, and should return.
  473. return prefix, format[i:], showPercent, nil
  474. }
  475. }
  476. // Symbols that don't have meaning and aren't in the exempt literal characters and are not escaped.
  477. return "", "", false, errors.New("invalid formatting code: unsupported or unescaped characters")
  478. }
  479. }
  480. return prefix, "", showPercent, nil
  481. }
  482. // parseTime returns a string parsed using time.Time
  483. func (fullFormat *parsedNumberFormat) parseTime(value string, date1904 bool) (string, error) {
  484. f, err := strconv.ParseFloat(value, 64)
  485. if err != nil {
  486. return value, err
  487. }
  488. val := TimeFromExcelTime(f, date1904)
  489. format := fullFormat.numFmt
  490. // Replace Excel placeholders with Go time placeholders.
  491. // For example, replace yyyy with 2006. These are in a specific order,
  492. // due to the fact that m is used in month, minute, and am/pm. It would
  493. // be easier to fix that with regular expressions, but if it's possible
  494. // to keep this simple it would be easier to maintain.
  495. // Full-length month and days (e.g. March, Tuesday) have letters in them that would be replaced
  496. // by other characters below (such as the 'h' in March, or the 'd' in Tuesday) below.
  497. // First we convert them to arbitrary characters unused in Excel Date formats, and then at the end,
  498. // turn them to what they should actually be.
  499. // Based off: http://www.ozgrid.com/Excel/CustomFormats.htm
  500. replacements := []struct{ xltime, gotime string }{
  501. {"yyyy", "2006"},
  502. {"yy", "06"},
  503. {"mmmm", "%%%%"},
  504. {"dddd", "&&&&"},
  505. {"dd", "02"},
  506. {"d", "2"},
  507. {"mmm", "Jan"},
  508. {"mmss", "0405"},
  509. {"ss", "05"},
  510. {"mm:", "04:"},
  511. {":mm", ":04"},
  512. {"mm", "01"},
  513. {"am/pm", "pm"},
  514. {"m/", "1/"},
  515. {"%%%%", "January"},
  516. {"&&&&", "Monday"},
  517. }
  518. // It is the presence of the "am/pm" indicator that determins
  519. // if this is a 12 hour or 24 hours time format, not the
  520. // number of 'h' characters.
  521. if is12HourTime(format) {
  522. format = strings.Replace(format, "hh", "03", 1)
  523. format = strings.Replace(format, "h", "3", 1)
  524. } else {
  525. format = strings.Replace(format, "hh", "15", 1)
  526. format = strings.Replace(format, "h", "15", 1)
  527. }
  528. for _, repl := range replacements {
  529. format = strings.Replace(format, repl.xltime, repl.gotime, 1)
  530. }
  531. // If the hour is optional, strip it out, along with the
  532. // possible dangling colon that would remain.
  533. if val.Hour() < 1 {
  534. format = strings.Replace(format, "]:", "]", 1)
  535. format = strings.Replace(format, "[03]", "", 1)
  536. format = strings.Replace(format, "[3]", "", 1)
  537. format = strings.Replace(format, "[15]", "", 1)
  538. } else {
  539. format = strings.Replace(format, "[3]", "3", 1)
  540. format = strings.Replace(format, "[15]", "15", 1)
  541. }
  542. return val.Format(format), nil
  543. }
  544. // isTimeFormat checks whether an Excel format string represents a time.Time.
  545. // This function is now correct, but it can detect time format strings that cannot be correctly handled by parseTime()
  546. func isTimeFormat(format string) bool {
  547. var foundTimeFormatCharacters bool
  548. for i := 0; i < len(format); i++ {
  549. curReducedFormat := format[i:]
  550. switch curReducedFormat[0] {
  551. case '\\', '_':
  552. // If there is a slash, skip the next character, and add it to the prefix
  553. // If there is an underscore, skip the next character, but don't add it to the prefix
  554. if len(curReducedFormat) > 1 {
  555. i++
  556. }
  557. case '*':
  558. // Asterisks are used to repeat the next character to fill the full cell width.
  559. // There isn't really a cell size in this context, so this will be ignored.
  560. case '"':
  561. // If there is a quote skip to the next quote, and add the quoted characters to the prefix
  562. endQuoteIndex := strings.Index(curReducedFormat[1:], "\"")
  563. if endQuoteIndex == -1 {
  564. // This is not any type of valid format.
  565. return false
  566. }
  567. i += endQuoteIndex + 1
  568. case '$', '-', '+', '/', '(', ')', ':', '!', '^', '&', '\'', '~', '{', '}', '<', '>', '=', ' ':
  569. // These symbols are allowed to be used as literal without escaping
  570. case ',':
  571. // This is not documented in the XLSX spec as far as I can tell, but Excel and Numbers will include
  572. // commas in number formats without escaping them, so this should be supported.
  573. default:
  574. foundInThisLoop := false
  575. for _, special := range timeFormatCharacters {
  576. if strings.HasPrefix(curReducedFormat, special) {
  577. foundTimeFormatCharacters = true
  578. foundInThisLoop = true
  579. i += len(special) - 1
  580. break
  581. }
  582. }
  583. if foundInThisLoop {
  584. continue
  585. }
  586. if curReducedFormat[0] == '[' {
  587. // For number formats, this code would happen above in a case '[': section.
  588. // However, for time formats it must happen after looking for occurrences in timeFormatCharacters
  589. // because there are a few time formats that can be wrapped in brackets.
  590. // Brackets can be currency annotations (e.g. [$$-409])
  591. // color formats (e.g. [color1] through [color56], as well as [red] etc.)
  592. // conditionals (e.g. [>100], the valid conditionals are =, >, <, >=, <=, <>)
  593. bracketIndex := strings.Index(curReducedFormat, "]")
  594. if bracketIndex == -1 {
  595. // This is not any type of valid format.
  596. return false
  597. }
  598. i += bracketIndex
  599. continue
  600. }
  601. // Symbols that don't have meaning, aren't in the exempt literal characters, and aren't escaped are invalid.
  602. // The string could still be a valid number format string.
  603. return false
  604. }
  605. }
  606. // If the string doesn't have any time formatting characters, it could technically be a time format, but it
  607. // would be a pretty weak time format. A valid time format with no time formatting symbols will also be a number
  608. // format with no number formatting symbols, which is essentially a constant string that does not depend on the
  609. // cell's value in anyway. The downstream logic will do the right thing in that case if this returns false.
  610. return foundTimeFormatCharacters
  611. }
  612. // is12HourTime checks whether an Excel time format string is a 12
  613. // hours form.
  614. func is12HourTime(format string) bool {
  615. return strings.Contains(format, "am/pm") || strings.Contains(format, "AM/PM") || strings.Contains(format, "a/p") || strings.Contains(format, "A/P")
  616. }