parse.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. package toml
  2. import (
  3. "fmt"
  4. "log"
  5. "strconv"
  6. "strings"
  7. "time"
  8. "unicode/utf8"
  9. )
  10. type parser struct {
  11. mapping map[string]interface{}
  12. types map[string]tomlType
  13. lx *lexer
  14. // A list of keys in the order that they appear in the TOML data.
  15. ordered []Key
  16. // the full key for the current hash in scope
  17. context Key
  18. // the base key name for everything except hashes
  19. currentKey string
  20. // rough approximation of line number
  21. approxLine int
  22. // A map of 'key.group.names' to whether they were created implicitly.
  23. implicits map[string]bool
  24. }
  25. type parseError string
  26. func (pe parseError) Error() string {
  27. return string(pe)
  28. }
  29. func parse(data string) (p *parser, err error) {
  30. defer func() {
  31. if r := recover(); r != nil {
  32. var ok bool
  33. if err, ok = r.(parseError); ok {
  34. return
  35. }
  36. panic(r)
  37. }
  38. }()
  39. p = &parser{
  40. mapping: make(map[string]interface{}),
  41. types: make(map[string]tomlType),
  42. lx: lex(data),
  43. ordered: make([]Key, 0),
  44. implicits: make(map[string]bool),
  45. }
  46. for {
  47. item := p.next()
  48. if item.typ == itemEOF {
  49. break
  50. }
  51. p.topLevel(item)
  52. }
  53. return p, nil
  54. }
  55. func (p *parser) panic(format string, v ...interface{}) {
  56. msg := fmt.Sprintf("Near line %d, key '%s': %s",
  57. p.approxLine, p.current(), fmt.Sprintf(format, v...))
  58. panic(parseError(msg))
  59. }
  60. func (p *parser) next() item {
  61. it := p.lx.nextItem()
  62. if it.typ == itemError {
  63. p.panic("Near line %d: %s", it.line, it.val)
  64. }
  65. return it
  66. }
  67. func (p *parser) bug(format string, v ...interface{}) {
  68. log.Fatalf("BUG: %s\n\n", fmt.Sprintf(format, v...))
  69. }
  70. func (p *parser) expect(typ itemType) item {
  71. it := p.next()
  72. p.assertEqual(typ, it.typ)
  73. return it
  74. }
  75. func (p *parser) assertEqual(expected, got itemType) {
  76. if expected != got {
  77. p.bug("Expected '%s' but got '%s'.", expected, got)
  78. }
  79. }
  80. func (p *parser) topLevel(item item) {
  81. switch item.typ {
  82. case itemCommentStart:
  83. p.approxLine = item.line
  84. p.expect(itemText)
  85. case itemTableStart:
  86. kg := p.expect(itemText)
  87. p.approxLine = kg.line
  88. key := make(Key, 0)
  89. for ; kg.typ == itemText; kg = p.next() {
  90. key = append(key, kg.val)
  91. }
  92. p.assertEqual(itemTableEnd, kg.typ)
  93. p.establishContext(key, false)
  94. p.setType("", tomlHash)
  95. p.ordered = append(p.ordered, key)
  96. case itemArrayTableStart:
  97. kg := p.expect(itemText)
  98. p.approxLine = kg.line
  99. key := make(Key, 0)
  100. for ; kg.typ == itemText; kg = p.next() {
  101. key = append(key, kg.val)
  102. }
  103. p.assertEqual(itemArrayTableEnd, kg.typ)
  104. p.establishContext(key, true)
  105. p.setType("", tomlArrayHash)
  106. p.ordered = append(p.ordered, key)
  107. case itemKeyStart:
  108. kname := p.expect(itemText)
  109. p.currentKey = kname.val
  110. p.approxLine = kname.line
  111. val, typ := p.value(p.next())
  112. p.setValue(p.currentKey, val)
  113. p.setType(p.currentKey, typ)
  114. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  115. p.currentKey = ""
  116. default:
  117. p.bug("Unexpected type at top level: %s", item.typ)
  118. }
  119. }
  120. // value translates an expected value from the lexer into a Go value wrapped
  121. // as an empty interface.
  122. func (p *parser) value(it item) (interface{}, tomlType) {
  123. switch it.typ {
  124. case itemString:
  125. return p.replaceUnicode(replaceEscapes(it.val)), p.typeOfPrimitive(it)
  126. case itemBool:
  127. switch it.val {
  128. case "true":
  129. return true, p.typeOfPrimitive(it)
  130. case "false":
  131. return false, p.typeOfPrimitive(it)
  132. }
  133. p.bug("Expected boolean value, but got '%s'.", it.val)
  134. case itemInteger:
  135. num, err := strconv.ParseInt(it.val, 10, 64)
  136. if err != nil {
  137. // See comment below for floats describing why we make a
  138. // distinction between a bug and a user error.
  139. if e, ok := err.(*strconv.NumError); ok &&
  140. e.Err == strconv.ErrRange {
  141. p.panic("Integer '%s' is out of the range of 64-bit "+
  142. "signed integers.", it.val)
  143. } else {
  144. p.bug("Expected integer value, but got '%s'.", it.val)
  145. }
  146. }
  147. return num, p.typeOfPrimitive(it)
  148. case itemFloat:
  149. num, err := strconv.ParseFloat(it.val, 64)
  150. if err != nil {
  151. // Distinguish float values. Normally, it'd be a bug if the lexer
  152. // provides an invalid float, but it's possible that the float is
  153. // out of range of valid values (which the lexer cannot determine).
  154. // So mark the former as a bug but the latter as a legitimate user
  155. // error.
  156. //
  157. // This is also true for integers.
  158. if e, ok := err.(*strconv.NumError); ok &&
  159. e.Err == strconv.ErrRange {
  160. p.panic("Float '%s' is out of the range of 64-bit "+
  161. "IEEE-754 floating-point numbers.", it.val)
  162. } else {
  163. p.bug("Expected float value, but got '%s'.", it.val)
  164. }
  165. }
  166. return num, p.typeOfPrimitive(it)
  167. case itemDatetime:
  168. t, err := time.Parse("2006-01-02T15:04:05Z", it.val)
  169. if err != nil {
  170. p.bug("Expected Zulu formatted DateTime, but got '%s'.", it.val)
  171. }
  172. return t, p.typeOfPrimitive(it)
  173. case itemArray:
  174. array := make([]interface{}, 0)
  175. types := make([]tomlType, 0)
  176. for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
  177. if it.typ == itemCommentStart {
  178. p.expect(itemText)
  179. continue
  180. }
  181. val, typ := p.value(it)
  182. array = append(array, val)
  183. types = append(types, typ)
  184. }
  185. return array, p.typeOfArray(types)
  186. }
  187. p.bug("Unexpected value type: %s", it.typ)
  188. panic("unreachable")
  189. }
  190. // establishContext sets the current context of the parser,
  191. // where the context is either a hash or an array of hashes. Which one is
  192. // set depends on the value of the `array` parameter.
  193. //
  194. // Establishing the context also makes sure that the key isn't a duplicate, and
  195. // will create implicit hashes automatically.
  196. func (p *parser) establishContext(key Key, array bool) {
  197. var ok bool
  198. // Always start at the top level and drill down for our context.
  199. hashContext := p.mapping
  200. keyContext := make(Key, 0)
  201. // We only need implicit hashes for key[0:-1]
  202. for _, k := range key[0 : len(key)-1] {
  203. _, ok = hashContext[k]
  204. keyContext = append(keyContext, k)
  205. // No key? Make an implicit hash and move on.
  206. if !ok {
  207. p.addImplicit(keyContext)
  208. hashContext[k] = make(map[string]interface{})
  209. }
  210. // If the hash context is actually an array of tables, then set
  211. // the hash context to the last element in that array.
  212. //
  213. // Otherwise, it better be a table, since this MUST be a key group (by
  214. // virtue of it not being the last element in a key).
  215. switch t := hashContext[k].(type) {
  216. case []map[string]interface{}:
  217. hashContext = t[len(t)-1]
  218. case map[string]interface{}:
  219. hashContext = t
  220. default:
  221. p.panic("Key '%s' was already created as a hash.", keyContext)
  222. }
  223. }
  224. p.context = keyContext
  225. if array {
  226. // If this is the first element for this array, then allocate a new
  227. // list of tables for it.
  228. k := key[len(key)-1]
  229. if _, ok := hashContext[k]; !ok {
  230. hashContext[k] = make([]map[string]interface{}, 0, 5)
  231. }
  232. // Add a new table. But make sure the key hasn't already been used
  233. // for something else.
  234. if hash, ok := hashContext[k].([]map[string]interface{}); ok {
  235. hashContext[k] = append(hash, make(map[string]interface{}))
  236. } else {
  237. p.panic("Key '%s' was already created and cannot be used as "+
  238. "an array.", keyContext)
  239. }
  240. } else {
  241. p.setValue(key[len(key)-1], make(map[string]interface{}))
  242. }
  243. p.context = append(p.context, key[len(key)-1])
  244. }
  245. // setValue sets the given key to the given value in the current context.
  246. // It will make sure that the key hasn't already been defined, account for
  247. // implicit key groups.
  248. func (p *parser) setValue(key string, value interface{}) {
  249. var tmpHash interface{}
  250. var ok bool
  251. hash := p.mapping
  252. keyContext := make(Key, 0)
  253. for _, k := range p.context {
  254. keyContext = append(keyContext, k)
  255. if tmpHash, ok = hash[k]; !ok {
  256. p.bug("Context for key '%s' has not been established.", keyContext)
  257. }
  258. switch t := tmpHash.(type) {
  259. case []map[string]interface{}:
  260. // The context is a table of hashes. Pick the most recent table
  261. // defined as the current hash.
  262. hash = t[len(t)-1]
  263. case map[string]interface{}:
  264. hash = t
  265. default:
  266. p.bug("Expected hash to have type 'map[string]interface{}', but "+
  267. "it has '%T' instead.", tmpHash)
  268. }
  269. }
  270. keyContext = append(keyContext, key)
  271. if _, ok := hash[key]; ok {
  272. // Typically, if the given key has already been set, then we have
  273. // to raise an error since duplicate keys are disallowed. However,
  274. // it's possible that a key was previously defined implicitly. In this
  275. // case, it is allowed to be redefined concretely. (See the
  276. // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
  277. //
  278. // But we have to make sure to stop marking it as an implicit. (So that
  279. // another redefinition provokes an error.)
  280. //
  281. // Note that since it has already been defined (as a hash), we don't
  282. // want to overwrite it. So our business is done.
  283. if p.isImplicit(keyContext) {
  284. p.removeImplicit(keyContext)
  285. return
  286. }
  287. // Otherwise, we have a concrete key trying to override a previous
  288. // key, which is *always* wrong.
  289. p.panic("Key '%s' has already been defined.", keyContext)
  290. }
  291. hash[key] = value
  292. }
  293. // setType sets the type of a particular value at a given key.
  294. // It should be called immediately AFTER setValue.
  295. //
  296. // Note that if `key` is empty, then the type given will be applied to the
  297. // current context (which is either a table or an array of tables).
  298. func (p *parser) setType(key string, typ tomlType) {
  299. keyContext := make(Key, 0, len(p.context)+1)
  300. for _, k := range p.context {
  301. keyContext = append(keyContext, k)
  302. }
  303. if len(key) > 0 { // allow type setting for hashes
  304. keyContext = append(keyContext, key)
  305. }
  306. p.types[keyContext.String()] = typ
  307. }
  308. // addImplicit sets the given Key as having been created implicitly.
  309. func (p *parser) addImplicit(key Key) {
  310. p.implicits[key.String()] = true
  311. }
  312. // removeImplicit stops tagging the given key as having been implicitly created.
  313. func (p *parser) removeImplicit(key Key) {
  314. p.implicits[key.String()] = false
  315. }
  316. // isImplicit returns true if the key group pointed to by the key was created
  317. // implicitly.
  318. func (p *parser) isImplicit(key Key) bool {
  319. return p.implicits[key.String()]
  320. }
  321. // current returns the full key name of the current context.
  322. func (p *parser) current() string {
  323. if len(p.currentKey) == 0 {
  324. return p.context.String()
  325. }
  326. if len(p.context) == 0 {
  327. return p.currentKey
  328. }
  329. return fmt.Sprintf("%s.%s", p.context, p.currentKey)
  330. }
  331. func replaceEscapes(s string) string {
  332. return strings.NewReplacer(
  333. "\\b", "\u0008",
  334. "\\t", "\u0009",
  335. "\\n", "\u000A",
  336. "\\f", "\u000C",
  337. "\\r", "\u000D",
  338. "\\\"", "\u0022",
  339. "\\/", "\u002F",
  340. "\\\\", "\u005C",
  341. ).Replace(s)
  342. }
  343. func (p *parser) replaceUnicode(s string) string {
  344. indexEsc := func() int {
  345. return strings.Index(s, "\\u")
  346. }
  347. for i := indexEsc(); i != -1; i = indexEsc() {
  348. asciiBytes := s[i+2 : i+6]
  349. s = strings.Replace(s, s[i:i+6], p.asciiEscapeToUnicode(asciiBytes), -1)
  350. }
  351. return s
  352. }
  353. func (p *parser) asciiEscapeToUnicode(s string) string {
  354. hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
  355. if err != nil {
  356. p.bug("Could not parse '%s' as a hexadecimal number, but the "+
  357. "lexer claims it's OK: %s", s, err)
  358. }
  359. // BUG(burntsushi)
  360. // I honestly don't understand how this works. I can't seem
  361. // to find a way to make this fail. I figured this would fail on invalid
  362. // UTF-8 characters like U+DCFF, but it doesn't.
  363. r := string(rune(hex))
  364. if !utf8.ValidString(r) {
  365. p.panic("Escaped character '\\u%s' is not valid UTF-8.", s)
  366. }
  367. return string(r)
  368. }