format_code.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. package xlsx
  2. import (
  3. "errors"
  4. "fmt"
  5. "math"
  6. "strconv"
  7. "strings"
  8. )
  9. // Do not edit these attributes once this struct is created. This struct should only be created by
  10. // parseFullNumberFormatString() from a number format string. If the format for a cell needs to change, change
  11. // the number format string and getNumberFormat() will invalidate the old struct and re-parse the string.
  12. type parsedNumberFormat struct {
  13. numFmt string
  14. isTimeFormat bool
  15. negativeFormatExpectsPositive bool
  16. positiveFormat *formatOptions
  17. negativeFormat *formatOptions
  18. zeroFormat *formatOptions
  19. textFormat *formatOptions
  20. parseEncounteredError *error
  21. }
  22. type formatOptions struct {
  23. isTimeFormat bool
  24. showPercent bool
  25. fullFormatString string
  26. reducedFormatString string
  27. prefix string
  28. suffix string
  29. }
  30. // FormatValue returns a value, and possibly an error condition
  31. // from a Cell. If it is possible to apply a format to the cell
  32. // value, it will do so, if not then an error will be returned, along
  33. // with the raw value of the Cell.
  34. //
  35. // This is the documentation of the "General" Format in the Office Open XML spec:
  36. //
  37. // Numbers
  38. // The application shall attempt to display the full number up to 11 digits (inc. decimal point). If the number is too
  39. // large*, the application shall attempt to show exponential format. If the number has too many significant digits, the
  40. // display shall be truncated. The optimal method of display is based on the available cell width. If the number cannot
  41. // be displayed using any of these formats in the available width, the application shall show "#" across the width of
  42. // the cell.
  43. //
  44. // Conditions for switching to exponential format:
  45. // 1. The cell value shall have at least five digits for xE-xx
  46. // 2. If the exponent is bigger than the size allowed, a floating point number cannot fit, so try exponential notation.
  47. // 3. Similarly, for negative exponents, check if there is space for even one (non-zero) digit in floating point format**.
  48. // 4. Finally, if there isn't room for all of the significant digits in floating point format (for a negative exponent),
  49. // exponential format shall display more digits if the exponent is less than -3. (The 3 is because E-xx takes 4
  50. // characters, and the leading 0 in floating point takes only 1 character. Thus, for an exponent less than -3, there is
  51. // more than 3 additional leading 0's, more than enough to compensate for the size of the E-xx.)
  52. //
  53. // Floating point rule:
  54. // For general formatting in cells, max overall length for cell display is 11, not including negative sign, but includes
  55. // leading zeros and decimal separator.***
  56. //
  57. // Added Notes:
  58. // * "If the number is too large" can also mean "if the number has more than 11 digits", so greater than or equal to
  59. // 1e11 and less than 1e-9.
  60. // ** Means that you should switch to scientific if there would be 9 zeros after the decimal (the decimal and first zero
  61. // count against the 11 character limit), so less than 1e9.
  62. // *** The way this is written, you can get numbers that are more than 11 characters because the golang Float fmt
  63. // does not support adjusting the precision while not padding with zeros, while also not switching to scientific
  64. // notation too early.
  65. func (fullFormat *parsedNumberFormat) FormatValue(cell *Cell) (string, error) {
  66. switch cell.cellType {
  67. case CellTypeError:
  68. // The error type is what XLSX uses in error cases such as when formulas are invalid.
  69. // There will be text in the cell's value that can be shown, something ugly like #NAME? or #######
  70. return cell.Value, nil
  71. case CellTypeBool:
  72. if cell.Value == "0" {
  73. return "FALSE", nil
  74. } else if cell.Value == "1" {
  75. return "TRUE", nil
  76. } else {
  77. return cell.Value, errors.New("invalid value in bool cell")
  78. }
  79. case CellTypeString:
  80. fallthrough
  81. case CellTypeInline:
  82. fallthrough
  83. case CellTypeStringFormula:
  84. textFormat := cell.parsedNumFmt.textFormat
  85. // This switch statement is only for String formats
  86. switch textFormat.reducedFormatString {
  87. case builtInNumFmt[builtInNumFmtIndex_GENERAL]: // General is literally "general"
  88. return cell.Value, nil
  89. case builtInNumFmt[builtInNumFmtIndex_STRING]: // String is "@"
  90. return textFormat.prefix + cell.Value + textFormat.suffix, nil
  91. case "":
  92. // If cell is not "General" and there is not an "@" symbol in the format, then the cell's value is not
  93. // used when determining what to display. It would be completely legal to have a format of "Error"
  94. // for strings, and all values that are not numbers would show up as "Error". In that case, this code would
  95. // have a prefix of "Error" and a reduced format string of "" (empty string).
  96. return textFormat.prefix + textFormat.suffix, nil
  97. default:
  98. return cell.Value, errors.New("invalid or unsupported format, unsupported string format")
  99. }
  100. case CellTypeDate:
  101. // These are dates that are stored in date format instead of being stored as numbers with a format to turn them
  102. // into a date string.
  103. return cell.Value, nil
  104. case CellTypeNumeric:
  105. return fullFormat.formatNumericCell(cell)
  106. default:
  107. return cell.Value, errors.New("unknown cell type")
  108. }
  109. }
  110. func (fullFormat *parsedNumberFormat) formatNumericCell(cell *Cell) (string, error) {
  111. rawValue := strings.TrimSpace(cell.Value)
  112. // If there wasn't a value in the cell, it shouldn't have been marked as Numeric.
  113. // It's better to support this case though.
  114. if rawValue == "" {
  115. return "", nil
  116. }
  117. if fullFormat.isTimeFormat {
  118. return fullFormat.parseTime(rawValue, cell.date1904)
  119. }
  120. var numberFormat *formatOptions
  121. floatVal, floatErr := strconv.ParseFloat(rawValue, 64)
  122. if floatErr != nil {
  123. return rawValue, floatErr
  124. }
  125. // Choose the correct format. There can be different formats for positive, negative, and zero numbers.
  126. // Excel only uses the zero format if the value is literally zero, even if the number is so small that it shows
  127. // up as "0" when the positive format is used.
  128. if floatVal > 0 {
  129. numberFormat = fullFormat.positiveFormat
  130. } else if floatVal < 0 {
  131. // If format string specified a different format for negative numbers, then the number should be made positive
  132. // before getting formatted. The format string itself will contain formatting that denotes a negative number and
  133. // this formatting will end up in the prefix or suffix. Commonly if there is a negative format specified, the
  134. // number will get surrounded by parenthesis instead of showing it with a minus sign.
  135. if fullFormat.negativeFormatExpectsPositive {
  136. floatVal = math.Abs(floatVal)
  137. }
  138. numberFormat = fullFormat.negativeFormat
  139. } else {
  140. numberFormat = fullFormat.zeroFormat
  141. }
  142. // When showPercent is true, multiply the number by 100.
  143. // The percent sign will be in the prefix or suffix already, so it does not need to be added in this function.
  144. // The number format itself will be the same as any other number format once the value is multiplied by 100.
  145. if numberFormat.showPercent {
  146. floatVal = 100 * floatVal
  147. }
  148. // Only the most common format strings are supported here.
  149. // Eventually this switch needs to be replaced with a more general solution.
  150. // Some of these "supported" formats should have thousand separators, but don't get them since Go fmt
  151. // doesn't have a way to request thousands separators.
  152. // The only things that should be supported here are in the array formattingCharacters,
  153. // everything else has been stripped out before and will be placed in the prefix or suffix.
  154. // The formatting characters can have non-formatting characters mixed in with them and those should be maintained.
  155. // However, at this time we fail to parse those formatting codes and they get replaced with "General"
  156. var formattedNum string
  157. switch numberFormat.reducedFormatString {
  158. case builtInNumFmt[builtInNumFmtIndex_GENERAL]: // General is literally "general"
  159. // prefix, showPercent, and suffix cannot apply to the general format
  160. // The logic for showing numbers when the format is "general" is much more complicated than the rest of these.
  161. generalFormatted, err := generalNumericScientific(cell.Value, true)
  162. if err != nil {
  163. return rawValue, nil
  164. }
  165. return generalFormatted, nil
  166. case builtInNumFmt[builtInNumFmtIndex_STRING]: // String is "@"
  167. formattedNum = cell.Value
  168. case builtInNumFmt[builtInNumFmtIndex_INT], "#,##0": // Int is "0"
  169. // Previously this case would cast to int and print with %d, but that will not round the value correctly.
  170. formattedNum = fmt.Sprintf("%.0f", floatVal)
  171. case "0.0", "#,##0.0":
  172. formattedNum = fmt.Sprintf("%.1f", floatVal)
  173. case builtInNumFmt[builtInNumFmtIndex_FLOAT], "#,##0.00": // Float is "0.00"
  174. formattedNum = fmt.Sprintf("%.2f", floatVal)
  175. case "0.000", "#,##0.000":
  176. formattedNum = fmt.Sprintf("%.3f", floatVal)
  177. case "0.0000", "#,##0.0000":
  178. formattedNum = fmt.Sprintf("%.4f", floatVal)
  179. case "0.00e+00", "##0.0e+0":
  180. formattedNum = fmt.Sprintf("%e", floatVal)
  181. case "":
  182. // Do nothing.
  183. default:
  184. return rawValue, nil
  185. }
  186. return numberFormat.prefix + formattedNum + numberFormat.suffix, nil
  187. }
  188. func generalNumericScientific(value string, allowScientific bool) (string, error) {
  189. if strings.TrimSpace(value) == "" {
  190. return "", nil
  191. }
  192. f, err := strconv.ParseFloat(value, 64)
  193. if err != nil {
  194. return value, err
  195. }
  196. if allowScientific {
  197. absF := math.Abs(f)
  198. // When using General format, numbers that are less than 1e-9 (0.000000001) and greater than or equal to
  199. // 1e11 (100,000,000,000) should be shown in scientific notation.
  200. // Numbers less than the number after zero, are assumed to be zero.
  201. if (absF >= math.SmallestNonzeroFloat64 && absF < minNonScientificNumber) || absF >= maxNonScientificNumber {
  202. return strconv.FormatFloat(f, 'E', -1, 64), nil
  203. }
  204. }
  205. // This format (fmt="f", prec=-1) will prevent padding with zeros and will never switch to scientific notation.
  206. // However, it will show more than 11 characters for very precise numbers, and this cannot be changed.
  207. // You could also use fmt="g", prec=11, which doesn't pad with zeros and allows the correct precision,
  208. // but it will use scientific notation on numbers less than 1e-4. That value is hardcoded in Go and cannot be
  209. // configured or disabled.
  210. return strconv.FormatFloat(f, 'f', -1, 64), nil
  211. }
  212. // Format strings are a little strange to compare because empty string needs to be taken as general, and general needs
  213. // to be compared case insensitively.
  214. func compareFormatString(fmt1, fmt2 string) bool {
  215. if fmt1 == fmt2 {
  216. return true
  217. }
  218. if fmt1 == "" || strings.EqualFold(fmt1, "general") {
  219. fmt1 = "general"
  220. }
  221. if fmt2 == "" || strings.EqualFold(fmt2, "general") {
  222. fmt2 = "general"
  223. }
  224. return fmt1 == fmt2
  225. }
  226. func parseFullNumberFormatString(numFmt string) *parsedNumberFormat {
  227. parsedNumFmt := &parsedNumberFormat{
  228. numFmt: numFmt,
  229. }
  230. if isTimeFormat(numFmt) {
  231. // Time formats cannot have multiple groups separated by semicolons, there is only one format.
  232. // Strings are unaffected by the time format.
  233. parsedNumFmt.isTimeFormat = true
  234. parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
  235. return parsedNumFmt
  236. }
  237. var fmtOptions []*formatOptions
  238. formats, err := splitFormatOnSemicolon(numFmt)
  239. if err == nil {
  240. for _, formatSection := range formats {
  241. parsedFormat, err := parseNumberFormatSection(formatSection)
  242. if err != nil {
  243. // If an invalid number section is found, fall back to general
  244. parsedFormat = fallbackErrorFormat
  245. parsedNumFmt.parseEncounteredError = &err
  246. }
  247. fmtOptions = append(fmtOptions, parsedFormat)
  248. }
  249. } else {
  250. fmtOptions = append(fmtOptions, fallbackErrorFormat)
  251. parsedNumFmt.parseEncounteredError = &err
  252. }
  253. if len(fmtOptions) > 4 {
  254. fmtOptions = []*formatOptions{fallbackErrorFormat}
  255. err = errors.New("invalid number format, too many format sections")
  256. parsedNumFmt.parseEncounteredError = &err
  257. }
  258. if len(fmtOptions) == 1 {
  259. // If there is only one option, it is used for all
  260. parsedNumFmt.positiveFormat = fmtOptions[0]
  261. parsedNumFmt.negativeFormat = fmtOptions[0]
  262. parsedNumFmt.zeroFormat = fmtOptions[0]
  263. if strings.Contains(fmtOptions[0].fullFormatString, "@") {
  264. parsedNumFmt.textFormat = fmtOptions[0]
  265. } else {
  266. parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
  267. }
  268. } else if len(fmtOptions) == 2 {
  269. // If there are two formats, the first is used for positive and zeros, the second gets used as a negative format,
  270. // and strings are not formatted.
  271. // When negative numbers now have their own format, they should become positive before having the format applied.
  272. // The format will contain a negative sign if it is desired, but they may be colored red or wrapped in
  273. // parenthesis instead.
  274. parsedNumFmt.negativeFormatExpectsPositive = true
  275. parsedNumFmt.positiveFormat = fmtOptions[0]
  276. parsedNumFmt.negativeFormat = fmtOptions[1]
  277. parsedNumFmt.zeroFormat = fmtOptions[0]
  278. parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
  279. } else if len(fmtOptions) == 3 {
  280. // If there are three formats, the first is used for positive, the second gets used as a negative format,
  281. // the third is for negative, and strings are not formatted.
  282. parsedNumFmt.negativeFormatExpectsPositive = true
  283. parsedNumFmt.positiveFormat = fmtOptions[0]
  284. parsedNumFmt.negativeFormat = fmtOptions[1]
  285. parsedNumFmt.zeroFormat = fmtOptions[2]
  286. parsedNumFmt.textFormat, _ = parseNumberFormatSection("general")
  287. } else {
  288. // With four options, the first is positive, the second is negative, the third is zero, and the fourth is strings
  289. // Negative numbers should be still become positive before having the negative formatting applied.
  290. parsedNumFmt.negativeFormatExpectsPositive = true
  291. parsedNumFmt.positiveFormat = fmtOptions[0]
  292. parsedNumFmt.negativeFormat = fmtOptions[1]
  293. parsedNumFmt.zeroFormat = fmtOptions[2]
  294. parsedNumFmt.textFormat = fmtOptions[3]
  295. }
  296. return parsedNumFmt
  297. }
  298. // splitFormatOnSemicolon will split the format string into the format sections
  299. // This logic to split the different formats on semicolon is fully correct, and will skip all literal semicolons,
  300. // and will catch all breaking semicolons.
  301. func splitFormatOnSemicolon(format string) ([]string, error) {
  302. var formats []string
  303. prevIndex := 0
  304. for i := 0; i < len(format); i++ {
  305. if format[i] == ';' {
  306. formats = append(formats, format[prevIndex:i])
  307. prevIndex = i + 1
  308. } else if format[i] == '\\' {
  309. i++
  310. } else if format[i] == '"' {
  311. endQuoteIndex := strings.Index(format[i+1:], "\"")
  312. if endQuoteIndex == -1 {
  313. // This is an invalid format string, fall back to general
  314. return nil, errors.New("invalid format string, unmatched double quote")
  315. }
  316. i += endQuoteIndex + 1
  317. }
  318. }
  319. return append(formats, format[prevIndex:]), nil
  320. }
  321. var fallbackErrorFormat = &formatOptions{
  322. fullFormatString: "general",
  323. reducedFormatString: "general",
  324. }
  325. // parseNumberFormatSection takes in individual format and parses out most of the options.
  326. // Some options are parsed, removed from the string, and set as settings on formatOptions.
  327. // There remainder of the format string is put in the reducedFormatString attribute, and supported values for these
  328. // are handled in a switch in the Cell.FormattedValue() function.
  329. // Ideally more and more of the format string would be parsed out here into settings until there is no remainder string
  330. // at all.
  331. // Features that this supports:
  332. // - Time formats are detected, and marked in the options. Time format strings are handled when doing the formatting.
  333. // The logic to detect time formats is currently not correct, and can catch formats that are not time formats as well
  334. // as miss formats that are time formats.
  335. // - Color formats are detected and removed.
  336. // - Currency annotations are handled properly.
  337. // - Literal strings wrapped in quotes are handled and put into prefix or suffix.
  338. // - Numbers that should be percent are detected and marked in the options.
  339. // - Conditionals are detected and removed, but they are not obeyed. The conditional groups will be used just like the
  340. // positive;negative;zero;string format groups. Here is an example of a conditional format: "[Red][<=100];[Blue][>100]"
  341. // Decoding the actual number formatting portion is out of scope, that is placed into reducedFormatString and is used
  342. // when formatting the string. The string there will be reduced to only the things in the formattingCharacters array.
  343. // Everything not in that array has been parsed out and put into formatOptions.
  344. func parseNumberFormatSection(fullFormat string) (*formatOptions, error) {
  345. reducedFormat := strings.TrimSpace(fullFormat)
  346. // general is the only format that does not use the normal format symbols notations
  347. if compareFormatString(reducedFormat, "general") {
  348. return &formatOptions{
  349. fullFormatString: "general",
  350. reducedFormatString: "general",
  351. }, nil
  352. }
  353. prefix, reducedFormat, showPercent1, err := parseLiterals(reducedFormat)
  354. if err != nil {
  355. return nil, err
  356. }
  357. reducedFormat, suffixFormat := splitFormatAndSuffixFormat(reducedFormat)
  358. suffix, remaining, showPercent2, err := parseLiterals(suffixFormat)
  359. if err != nil {
  360. return nil, err
  361. }
  362. if len(remaining) > 0 {
  363. // This paradigm of codes consisting of literals, number formats, then more literals is not always correct, they can
  364. // actually be intertwined. Though 99% of the time number formats will not do this.
  365. // Excel uses this format string for Social Security Numbers: 000\-00\-0000
  366. // and this for US phone numbers: [<=9999999]###\-####;\(###\)\ ###\-####
  367. return nil, errors.New("invalid or unsupported format string")
  368. }
  369. return &formatOptions{
  370. fullFormatString: fullFormat,
  371. isTimeFormat: false,
  372. reducedFormatString: reducedFormat,
  373. prefix: prefix,
  374. suffix: suffix,
  375. showPercent: showPercent1 || showPercent2,
  376. }, nil
  377. }
  378. // formattingCharacters will be left in the reducedNumberFormat
  379. // It is important that these be looked for in order so that the slash cases are handled correctly.
  380. // / (slash) is a fraction format if preceded by 0, #, or ?, otherwise it is not a formatting character
  381. // E- E+ e- e+ are scientific notation, but E, e, -, + are not formatting characters independently
  382. // \ (back slash) makes the next character a literal (not formatting)
  383. // " Anything in double quotes is not a formatting character
  384. // _ (underscore) skips the width of the next character, so the next character cannot be formatting
  385. var formattingCharacters = []string{"0/", "#/", "?/", "E-", "E+", "e-", "e+", "0", "#", "?", ".", ",", "@", "*"}
  386. // The following are also time format characters, but since this is only used for detecting, not decoding, they are
  387. // redundant here: ee, gg, ggg, rr, ss, mm, hh, yyyy, dd, ddd, dddd, mm, mmm, mmmm, mmmmm, ss.0000, ss.000, ss.00, ss.0
  388. // The .00 type format is very tricky, because it only counts if it comes after ss or s or [ss] or [s]
  389. // .00 is actually a valid number format by itself.
  390. var timeFormatCharacters = []string{"m", "d", "yy", "h", "m", "AM/PM", "A/P", "am/pm", "a/p", "r", "g", "e", "b1", "b2", "[hh]", "[h]", "[mm]", "[m]",
  391. "s.0000", "s.000", "s.00", "s.0", "s", "[ss].0000", "[ss].000", "[ss].00", "[ss].0", "[ss]", "[s].0000", "[s].000", "[s].00", "[s].0", "[s]"}
  392. func splitFormatAndSuffixFormat(format string) (string, string) {
  393. var i int
  394. for ; i < len(format); i++ {
  395. curReducedFormat := format[i:]
  396. var found bool
  397. for _, special := range formattingCharacters {
  398. if strings.HasPrefix(curReducedFormat, special) {
  399. // Skip ahead if the special character was longer than length 1
  400. i += len(special) - 1
  401. found = true
  402. break
  403. }
  404. }
  405. if !found {
  406. break
  407. }
  408. }
  409. suffixFormat := format[i:]
  410. format = format[:i]
  411. return format, suffixFormat
  412. }
  413. func parseLiterals(format string) (string, string, bool, error) {
  414. var prefix string
  415. showPercent := false
  416. for i := 0; i < len(format); i++ {
  417. curReducedFormat := format[i:]
  418. switch curReducedFormat[0] {
  419. case '\\':
  420. // If there is a slash, skip the next character, and add it to the prefix
  421. if len(curReducedFormat) > 1 {
  422. i++
  423. prefix += curReducedFormat[1:2]
  424. }
  425. case '_':
  426. // If there is an underscore, skip the next character, but don't add it to the prefix
  427. if len(curReducedFormat) > 1 {
  428. i++
  429. }
  430. case '*':
  431. // Asterisks are used to repeat the next character to fill the full cell width.
  432. // There isn't really a cell size in this context, so this will be ignored.
  433. case '"':
  434. // If there is a quote skip to the next quote, and add the quoted characters to the prefix
  435. endQuoteIndex := strings.Index(curReducedFormat[1:], "\"")
  436. if endQuoteIndex == -1 {
  437. return "", "", false, errors.New("invalid formatting code, unmatched double quote")
  438. }
  439. prefix = prefix + curReducedFormat[1:endQuoteIndex+1]
  440. i += endQuoteIndex + 1
  441. case '%':
  442. showPercent = true
  443. prefix += "%"
  444. case '[':
  445. // Brackets can be currency annotations (e.g. [$$-409])
  446. // color formats (e.g. [color1] through [color56], as well as [red] etc.)
  447. // conditionals (e.g. [>100], the valid conditionals are =, >, <, >=, <=, <>)
  448. bracketIndex := strings.Index(curReducedFormat, "]")
  449. if bracketIndex == -1 {
  450. return "", "", false, errors.New("invalid formatting code, invalid brackets")
  451. }
  452. // Currencies in Excel are annotated with this format: [$<Currency String>-<Language Info>]
  453. // Currency String is something like $, ¥, €, or £
  454. // Language Info is three hexadecimal characters
  455. if len(curReducedFormat) > 2 && curReducedFormat[1] == '$' {
  456. dashIndex := strings.Index(curReducedFormat, "-")
  457. if dashIndex != -1 && dashIndex < bracketIndex {
  458. // Get the currency symbol, and skip to the end of the currency format
  459. prefix += curReducedFormat[2:dashIndex]
  460. } else {
  461. return "", "", false, errors.New("invalid formatting code, invalid currency annotation")
  462. }
  463. }
  464. i += bracketIndex
  465. case '$', '-', '+', '/', '(', ')', ':', '!', '^', '&', '\'', '~', '{', '}', '<', '>', '=', ' ':
  466. // These symbols are allowed to be used as literal without escaping
  467. prefix += curReducedFormat[0:1]
  468. default:
  469. for _, special := range formattingCharacters {
  470. if strings.HasPrefix(curReducedFormat, special) {
  471. // This means we found the start of the actual number formatting portion, and should return.
  472. return prefix, format[i:], showPercent, nil
  473. }
  474. }
  475. // Symbols that don't have meaning and aren't in the exempt literal characters and are not escaped.
  476. return "", "", false, errors.New("invalid formatting code: unsupported or unescaped characters")
  477. }
  478. }
  479. return prefix, "", showPercent, nil
  480. }
  481. // parseTime returns a string parsed using time.Time
  482. func (fullFormat *parsedNumberFormat) parseTime(value string, date1904 bool) (string, error) {
  483. f, err := strconv.ParseFloat(value, 64)
  484. if err != nil {
  485. return value, err
  486. }
  487. val := TimeFromExcelTime(f, date1904)
  488. format := fullFormat.numFmt
  489. // Replace Excel placeholders with Go time placeholders.
  490. // For example, replace yyyy with 2006. These are in a specific order,
  491. // due to the fact that m is used in month, minute, and am/pm. It would
  492. // be easier to fix that with regular expressions, but if it's possible
  493. // to keep this simple it would be easier to maintain.
  494. // Full-length month and days (e.g. March, Tuesday) have letters in them that would be replaced
  495. // by other characters below (such as the 'h' in March, or the 'd' in Tuesday) below.
  496. // First we convert them to arbitrary characters unused in Excel Date formats, and then at the end,
  497. // turn them to what they should actually be.
  498. // Based off: http://www.ozgrid.com/Excel/CustomFormats.htm
  499. replacements := []struct{ xltime, gotime string }{
  500. {"yyyy", "2006"},
  501. {"yy", "06"},
  502. {"mmmm", "%%%%"},
  503. {"dddd", "&&&&"},
  504. {"dd", "02"},
  505. {"d", "2"},
  506. {"mmm", "Jan"},
  507. {"mmss", "0405"},
  508. {"ss", "05"},
  509. {"mm:", "04:"},
  510. {":mm", ":04"},
  511. {"mm", "01"},
  512. {"am/pm", "pm"},
  513. {"m/", "1/"},
  514. {"%%%%", "January"},
  515. {"&&&&", "Monday"},
  516. }
  517. // It is the presence of the "am/pm" indicator that determins
  518. // if this is a 12 hour or 24 hours time format, not the
  519. // number of 'h' characters.
  520. if is12HourTime(format) {
  521. format = strings.Replace(format, "hh", "03", 1)
  522. format = strings.Replace(format, "h", "3", 1)
  523. } else {
  524. format = strings.Replace(format, "hh", "15", 1)
  525. format = strings.Replace(format, "h", "15", 1)
  526. }
  527. for _, repl := range replacements {
  528. format = strings.Replace(format, repl.xltime, repl.gotime, 1)
  529. }
  530. // If the hour is optional, strip it out, along with the
  531. // possible dangling colon that would remain.
  532. if val.Hour() < 1 {
  533. format = strings.Replace(format, "]:", "]", 1)
  534. format = strings.Replace(format, "[03]", "", 1)
  535. format = strings.Replace(format, "[3]", "", 1)
  536. format = strings.Replace(format, "[15]", "", 1)
  537. } else {
  538. format = strings.Replace(format, "[3]", "3", 1)
  539. format = strings.Replace(format, "[15]", "15", 1)
  540. }
  541. return val.Format(format), nil
  542. }
  543. // isTimeFormat checks whether an Excel format string represents a time.Time.
  544. // This function is now correct, but it can detect time format strings that cannot be correctly handled by parseTime()
  545. func isTimeFormat(format string) bool {
  546. var foundTimeFormatCharacters bool
  547. for i := 0; i < len(format); i++ {
  548. curReducedFormat := format[i:]
  549. switch curReducedFormat[0] {
  550. case '\\', '_':
  551. // If there is a slash, skip the next character, and add it to the prefix
  552. // If there is an underscore, skip the next character, but don't add it to the prefix
  553. if len(curReducedFormat) > 1 {
  554. i++
  555. }
  556. case '*':
  557. // Asterisks are used to repeat the next character to fill the full cell width.
  558. // There isn't really a cell size in this context, so this will be ignored.
  559. case '"':
  560. // If there is a quote skip to the next quote, and add the quoted characters to the prefix
  561. endQuoteIndex := strings.Index(curReducedFormat[1:], "\"")
  562. if endQuoteIndex == -1 {
  563. // This is not any type of valid format.
  564. return false
  565. }
  566. i += endQuoteIndex + 1
  567. case '$', '-', '+', '/', '(', ')', ':', '!', '^', '&', '\'', '~', '{', '}', '<', '>', '=', ' ':
  568. // These symbols are allowed to be used as literal without escaping
  569. case ',':
  570. // This is not documented in the XLSX spec as far as I can tell, but Excel and Numbers will include
  571. // commas in number formats without escaping them, so this should be supported.
  572. default:
  573. foundInThisLoop := false
  574. for _, special := range timeFormatCharacters {
  575. if strings.HasPrefix(curReducedFormat, special) {
  576. foundTimeFormatCharacters = true
  577. foundInThisLoop = true
  578. i += len(special) - 1
  579. break
  580. }
  581. }
  582. if foundInThisLoop {
  583. continue
  584. }
  585. if curReducedFormat[0] == '[' {
  586. // For number formats, this code would happen above in a case '[': section.
  587. // However, for time formats it must happen after looking for occurrences in timeFormatCharacters
  588. // because there are a few time formats that can be wrapped in brackets.
  589. // Brackets can be currency annotations (e.g. [$$-409])
  590. // color formats (e.g. [color1] through [color56], as well as [red] etc.)
  591. // conditionals (e.g. [>100], the valid conditionals are =, >, <, >=, <=, <>)
  592. bracketIndex := strings.Index(curReducedFormat, "]")
  593. if bracketIndex == -1 {
  594. // This is not any type of valid format.
  595. return false
  596. }
  597. i += bracketIndex
  598. continue
  599. }
  600. // Symbols that don't have meaning, aren't in the exempt literal characters, and aren't escaped are invalid.
  601. // The string could still be a valid number format string.
  602. return false
  603. }
  604. }
  605. // If the string doesn't have any time formatting characters, it could technically be a time format, but it
  606. // would be a pretty weak time format. A valid time format with no time formatting symbols will also be a number
  607. // format with no number formatting symbols, which is essentially a constant string that does not depend on the
  608. // cell's value in anyway. The downstream logic will do the right thing in that case if this returns false.
  609. return foundTimeFormatCharacters
  610. }
  611. // is12HourTime checks whether an Excel time format string is a 12
  612. // hours form.
  613. func is12HourTime(format string) bool {
  614. return strings.Contains(format, "am/pm") || strings.Contains(format, "AM/PM") || strings.Contains(format, "a/p") || strings.Contains(format, "A/P")
  615. }