parser.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package format
  5. import (
  6. "reflect"
  7. "unicode/utf8"
  8. )
  9. // A Parser parses a format string. The result from the parse are set in the
  10. // struct fields.
  11. type Parser struct {
  12. Verb rune
  13. WidthPresent bool
  14. PrecPresent bool
  15. Minus bool
  16. Plus bool
  17. Sharp bool
  18. Space bool
  19. Zero bool
  20. // For the formats %+v %#v, we set the plusV/sharpV flags
  21. // and clear the plus/sharp flags since %+v and %#v are in effect
  22. // different, flagless formats set at the top level.
  23. PlusV bool
  24. SharpV bool
  25. HasIndex bool
  26. Width int
  27. Prec int // precision
  28. // retain arguments across calls.
  29. Args []interface{}
  30. // retain current argument number across calls
  31. ArgNum int
  32. // reordered records whether the format string used argument reordering.
  33. Reordered bool
  34. // goodArgNum records whether the most recent reordering directive was valid.
  35. goodArgNum bool
  36. // position info
  37. format string
  38. startPos int
  39. endPos int
  40. Status Status
  41. }
  42. // Reset initializes a parser to scan format strings for the given args.
  43. func (p *Parser) Reset(args []interface{}) {
  44. p.Args = args
  45. p.ArgNum = 0
  46. p.startPos = 0
  47. p.Reordered = false
  48. }
  49. // Text returns the part of the format string that was parsed by the last call
  50. // to Scan. It returns the original substitution clause if the current scan
  51. // parsed a substitution.
  52. func (p *Parser) Text() string { return p.format[p.startPos:p.endPos] }
  53. // SetFormat sets a new format string to parse. It does not reset the argument
  54. // count.
  55. func (p *Parser) SetFormat(format string) {
  56. p.format = format
  57. p.startPos = 0
  58. p.endPos = 0
  59. }
  60. // Status indicates the result type of a call to Scan.
  61. type Status int
  62. const (
  63. StatusText Status = iota
  64. StatusSubstitution
  65. StatusBadWidthSubstitution
  66. StatusBadPrecSubstitution
  67. StatusNoVerb
  68. StatusBadArgNum
  69. StatusMissingArg
  70. )
  71. // ClearFlags reset the parser to default behavior.
  72. func (p *Parser) ClearFlags() {
  73. p.WidthPresent = false
  74. p.PrecPresent = false
  75. p.Minus = false
  76. p.Plus = false
  77. p.Sharp = false
  78. p.Space = false
  79. p.Zero = false
  80. p.PlusV = false
  81. p.SharpV = false
  82. p.HasIndex = false
  83. }
  84. // Scan scans the next part of the format string and sets the status to
  85. // indicate whether it scanned a string literal, substitution or error.
  86. func (p *Parser) Scan() bool {
  87. p.Status = StatusText
  88. format := p.format
  89. end := len(format)
  90. if p.endPos >= end {
  91. return false
  92. }
  93. afterIndex := false // previous item in format was an index like [3].
  94. p.startPos = p.endPos
  95. p.goodArgNum = true
  96. i := p.startPos
  97. for i < end && format[i] != '%' {
  98. i++
  99. }
  100. if i > p.startPos {
  101. p.endPos = i
  102. return true
  103. }
  104. // Process one verb
  105. i++
  106. p.Status = StatusSubstitution
  107. // Do we have flags?
  108. p.ClearFlags()
  109. simpleFormat:
  110. for ; i < end; i++ {
  111. c := p.format[i]
  112. switch c {
  113. case '#':
  114. p.Sharp = true
  115. case '0':
  116. p.Zero = !p.Minus // Only allow zero padding to the left.
  117. case '+':
  118. p.Plus = true
  119. case '-':
  120. p.Minus = true
  121. p.Zero = false // Do not pad with zeros to the right.
  122. case ' ':
  123. p.Space = true
  124. default:
  125. // Fast path for common case of ascii lower case simple verbs
  126. // without precision or width or argument indices.
  127. if 'a' <= c && c <= 'z' && p.ArgNum < len(p.Args) {
  128. if c == 'v' {
  129. // Go syntax
  130. p.SharpV = p.Sharp
  131. p.Sharp = false
  132. // Struct-field syntax
  133. p.PlusV = p.Plus
  134. p.Plus = false
  135. }
  136. p.Verb = rune(c)
  137. p.ArgNum++
  138. p.endPos = i + 1
  139. return true
  140. }
  141. // Format is more complex than simple flags and a verb or is malformed.
  142. break simpleFormat
  143. }
  144. }
  145. // Do we have an explicit argument index?
  146. i, afterIndex = p.updateArgNumber(format, i)
  147. // Do we have width?
  148. if i < end && format[i] == '*' {
  149. i++
  150. p.Width, p.WidthPresent = p.intFromArg()
  151. if !p.WidthPresent {
  152. p.Status = StatusBadWidthSubstitution
  153. }
  154. // We have a negative width, so take its value and ensure
  155. // that the minus flag is set
  156. if p.Width < 0 {
  157. p.Width = -p.Width
  158. p.Minus = true
  159. p.Zero = false // Do not pad with zeros to the right.
  160. }
  161. afterIndex = false
  162. } else {
  163. p.Width, p.WidthPresent, i = parsenum(format, i, end)
  164. if afterIndex && p.WidthPresent { // "%[3]2d"
  165. p.goodArgNum = false
  166. }
  167. }
  168. // Do we have precision?
  169. if i+1 < end && format[i] == '.' {
  170. i++
  171. if afterIndex { // "%[3].2d"
  172. p.goodArgNum = false
  173. }
  174. i, afterIndex = p.updateArgNumber(format, i)
  175. if i < end && format[i] == '*' {
  176. i++
  177. p.Prec, p.PrecPresent = p.intFromArg()
  178. // Negative precision arguments don't make sense
  179. if p.Prec < 0 {
  180. p.Prec = 0
  181. p.PrecPresent = false
  182. }
  183. if !p.PrecPresent {
  184. p.Status = StatusBadPrecSubstitution
  185. }
  186. afterIndex = false
  187. } else {
  188. p.Prec, p.PrecPresent, i = parsenum(format, i, end)
  189. if !p.PrecPresent {
  190. p.Prec = 0
  191. p.PrecPresent = true
  192. }
  193. }
  194. }
  195. if !afterIndex {
  196. i, afterIndex = p.updateArgNumber(format, i)
  197. }
  198. p.HasIndex = afterIndex
  199. if i >= end {
  200. p.endPos = i
  201. p.Status = StatusNoVerb
  202. return true
  203. }
  204. verb, w := utf8.DecodeRuneInString(format[i:])
  205. p.endPos = i + w
  206. p.Verb = verb
  207. switch {
  208. case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
  209. p.startPos = p.endPos - 1
  210. p.Status = StatusText
  211. case !p.goodArgNum:
  212. p.Status = StatusBadArgNum
  213. case p.ArgNum >= len(p.Args): // No argument left over to print for the current verb.
  214. p.Status = StatusMissingArg
  215. p.ArgNum++
  216. case verb == 'v':
  217. // Go syntax
  218. p.SharpV = p.Sharp
  219. p.Sharp = false
  220. // Struct-field syntax
  221. p.PlusV = p.Plus
  222. p.Plus = false
  223. fallthrough
  224. default:
  225. p.ArgNum++
  226. }
  227. return true
  228. }
  229. // intFromArg gets the ArgNumth element of Args. On return, isInt reports
  230. // whether the argument has integer type.
  231. func (p *Parser) intFromArg() (num int, isInt bool) {
  232. if p.ArgNum < len(p.Args) {
  233. arg := p.Args[p.ArgNum]
  234. num, isInt = arg.(int) // Almost always OK.
  235. if !isInt {
  236. // Work harder.
  237. switch v := reflect.ValueOf(arg); v.Kind() {
  238. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  239. n := v.Int()
  240. if int64(int(n)) == n {
  241. num = int(n)
  242. isInt = true
  243. }
  244. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  245. n := v.Uint()
  246. if int64(n) >= 0 && uint64(int(n)) == n {
  247. num = int(n)
  248. isInt = true
  249. }
  250. default:
  251. // Already 0, false.
  252. }
  253. }
  254. p.ArgNum++
  255. if tooLarge(num) {
  256. num = 0
  257. isInt = false
  258. }
  259. }
  260. return
  261. }
  262. // parseArgNumber returns the value of the bracketed number, minus 1
  263. // (explicit argument numbers are one-indexed but we want zero-indexed).
  264. // The opening bracket is known to be present at format[0].
  265. // The returned values are the index, the number of bytes to consume
  266. // up to the closing paren, if present, and whether the number parsed
  267. // ok. The bytes to consume will be 1 if no closing paren is present.
  268. func parseArgNumber(format string) (index int, wid int, ok bool) {
  269. // There must be at least 3 bytes: [n].
  270. if len(format) < 3 {
  271. return 0, 1, false
  272. }
  273. // Find closing bracket.
  274. for i := 1; i < len(format); i++ {
  275. if format[i] == ']' {
  276. width, ok, newi := parsenum(format, 1, i)
  277. if !ok || newi != i {
  278. return 0, i + 1, false
  279. }
  280. return width - 1, i + 1, true // arg numbers are one-indexed and skip paren.
  281. }
  282. }
  283. return 0, 1, false
  284. }
  285. // updateArgNumber returns the next argument to evaluate, which is either the value of the passed-in
  286. // argNum or the value of the bracketed integer that begins format[i:]. It also returns
  287. // the new value of i, that is, the index of the next byte of the format to process.
  288. func (p *Parser) updateArgNumber(format string, i int) (newi int, found bool) {
  289. if len(format) <= i || format[i] != '[' {
  290. return i, false
  291. }
  292. p.Reordered = true
  293. index, wid, ok := parseArgNumber(format[i:])
  294. if ok && 0 <= index && index < len(p.Args) {
  295. p.ArgNum = index
  296. return i + wid, true
  297. }
  298. p.goodArgNum = false
  299. return i + wid, ok
  300. }
  301. // tooLarge reports whether the magnitude of the integer is
  302. // too large to be used as a formatting width or precision.
  303. func tooLarge(x int) bool {
  304. const max int = 1e6
  305. return x > max || x < -max
  306. }
  307. // parsenum converts ASCII to integer. num is 0 (and isnum is false) if no number present.
  308. func parsenum(s string, start, end int) (num int, isnum bool, newi int) {
  309. if start >= end {
  310. return 0, false, end
  311. }
  312. for newi = start; newi < end && '0' <= s[newi] && s[newi] <= '9'; newi++ {
  313. if tooLarge(num) {
  314. return 0, false, end // Overflow; crazy long number most likely.
  315. }
  316. num = num*10 + int(s[newi]-'0')
  317. isnum = true
  318. }
  319. return
  320. }