parse.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. package toml
  2. import (
  3. "fmt"
  4. "log"
  5. "strconv"
  6. "strings"
  7. "time"
  8. "unicode/utf8"
  9. )
  10. type parser struct {
  11. mapping map[string]interface{}
  12. types map[string]tomlType
  13. lx *lexer
  14. // A list of keys in the order that they appear in the TOML data.
  15. ordered []Key
  16. // the full key for the current hash in scope
  17. context Key
  18. // the base key name for everything except hashes
  19. currentKey string
  20. // rough approximation of line number
  21. approxLine int
  22. // A map of 'key.group.names' to whether they were created implicitly.
  23. implicits map[string]bool
  24. }
  25. type parseError string
  26. func (pe parseError) Error() string {
  27. return string(pe)
  28. }
  29. func parse(data string) (p *parser, err error) {
  30. defer func() {
  31. if r := recover(); r != nil {
  32. var ok bool
  33. if err, ok = r.(parseError); ok {
  34. return
  35. }
  36. panic(r)
  37. }
  38. }()
  39. p = &parser{
  40. mapping: make(map[string]interface{}),
  41. types: make(map[string]tomlType),
  42. lx: lex(data),
  43. ordered: make([]Key, 0),
  44. implicits: make(map[string]bool),
  45. }
  46. for {
  47. item := p.next()
  48. if item.typ == itemEOF {
  49. break
  50. }
  51. p.topLevel(item)
  52. }
  53. return p, nil
  54. }
  55. func (p *parser) panic(format string, v ...interface{}) {
  56. msg := fmt.Sprintf("Near line %d, key '%s': %s",
  57. p.approxLine, p.current(), fmt.Sprintf(format, v...))
  58. panic(parseError(msg))
  59. }
  60. func (p *parser) next() item {
  61. it := p.lx.nextItem()
  62. if it.typ == itemError {
  63. p.panic("Near line %d: %s", it.line, it.val)
  64. }
  65. return it
  66. }
  67. func (p *parser) bug(format string, v ...interface{}) {
  68. log.Fatalf("BUG: %s\n\n", fmt.Sprintf(format, v...))
  69. }
  70. func (p *parser) expect(typ itemType) item {
  71. it := p.next()
  72. p.assertEqual(typ, it.typ)
  73. return it
  74. }
  75. func (p *parser) assertEqual(expected, got itemType) {
  76. if expected != got {
  77. p.bug("Expected '%s' but got '%s'.", expected, got)
  78. }
  79. }
  80. func (p *parser) topLevel(item item) {
  81. switch item.typ {
  82. case itemCommentStart:
  83. p.approxLine = item.line
  84. p.expect(itemText)
  85. case itemTableStart:
  86. kg := p.expect(itemText)
  87. p.approxLine = kg.line
  88. key := make(Key, 0)
  89. for ; kg.typ == itemText; kg = p.next() {
  90. key = append(key, kg.val)
  91. }
  92. p.assertEqual(itemTableEnd, kg.typ)
  93. p.establishContext(key, false)
  94. p.setType("", tomlHash)
  95. p.ordered = append(p.ordered, key)
  96. case itemArrayTableStart:
  97. kg := p.expect(itemText)
  98. p.approxLine = kg.line
  99. key := make(Key, 0)
  100. for ; kg.typ == itemText; kg = p.next() {
  101. key = append(key, kg.val)
  102. }
  103. p.assertEqual(itemArrayTableEnd, kg.typ)
  104. p.establishContext(key, true)
  105. p.setType("", tomlArrayHash)
  106. p.ordered = append(p.ordered, key)
  107. case itemKeyStart:
  108. kname := p.expect(itemText)
  109. p.currentKey = kname.val
  110. p.approxLine = kname.line
  111. val, typ := p.value(p.next())
  112. p.setValue(p.currentKey, val)
  113. p.setType(p.currentKey, typ)
  114. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  115. p.currentKey = ""
  116. default:
  117. p.bug("Unexpected type at top level: %s", item.typ)
  118. }
  119. }
  120. // value translates an expected value from the lexer into a Go value wrapped
  121. // as an empty interface.
  122. func (p *parser) value(it item) (interface{}, tomlType) {
  123. switch it.typ {
  124. case itemString:
  125. return p.replaceUnicode(replaceEscapes(it.val)), p.typeOfPrimitive(it)
  126. case itemBool:
  127. switch it.val {
  128. case "true":
  129. return true, p.typeOfPrimitive(it)
  130. case "false":
  131. return false, p.typeOfPrimitive(it)
  132. }
  133. p.bug("Expected boolean value, but got '%s'.", it.val)
  134. case itemInteger:
  135. num, err := strconv.ParseInt(it.val, 10, 64)
  136. if err != nil {
  137. if e, ok := err.(*strconv.NumError); ok &&
  138. e.Err == strconv.ErrRange {
  139. p.panic("Integer '%s' is out of the range of 64-bit "+
  140. "signed integers.", it.val)
  141. } else {
  142. p.bug("Expected integer value, but got '%s'.", it.val)
  143. }
  144. }
  145. return num, p.typeOfPrimitive(it)
  146. case itemFloat:
  147. num, err := strconv.ParseFloat(it.val, 64)
  148. if err != nil {
  149. if e, ok := err.(*strconv.NumError); ok &&
  150. e.Err == strconv.ErrRange {
  151. p.panic("Float '%s' is out of the range of 64-bit "+
  152. "IEEE-754 floating-point numbers.", it.val)
  153. } else {
  154. p.bug("Expected float value, but got '%s'.", it.val)
  155. }
  156. }
  157. return num, p.typeOfPrimitive(it)
  158. case itemDatetime:
  159. t, err := time.Parse("2006-01-02T15:04:05Z", it.val)
  160. if err != nil {
  161. p.bug("Expected Zulu formatted DateTime, but got '%s'.", it.val)
  162. }
  163. return t, p.typeOfPrimitive(it)
  164. case itemArray:
  165. array := make([]interface{}, 0)
  166. types := make([]tomlType, 0)
  167. for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
  168. if it.typ == itemCommentStart {
  169. p.expect(itemText)
  170. continue
  171. }
  172. val, typ := p.value(it)
  173. array = append(array, val)
  174. types = append(types, typ)
  175. }
  176. return array, p.typeOfArray(types)
  177. }
  178. p.bug("Unexpected value type: %s", it.typ)
  179. panic("unreachable")
  180. }
  181. // establishContext sets the current context of the parser,
  182. // where the context is the hash currently in scope.
  183. //
  184. // Establishing the context also makes sure that the key isn't a duplicate, and
  185. // will create implicit hashes automatically.
  186. func (p *parser) establishContext(key Key, array bool) {
  187. var ok bool
  188. // Always start at the top level and drill down for our context.
  189. hashContext := p.mapping
  190. keyContext := make(Key, 0)
  191. // We only need implicit hashes for key[0:-1]
  192. for _, k := range key[0 : len(key)-1] {
  193. _, ok = hashContext[k]
  194. keyContext = append(keyContext, k)
  195. // No key? Make an implicit hash and move on.
  196. if !ok {
  197. p.addImplicit(keyContext)
  198. hashContext[k] = make(map[string]interface{})
  199. }
  200. // If the hash context is actually an array of tables, then set
  201. // the hash context to the last element in that array.
  202. //
  203. // Otherwise, it better be a table, since this MUST be a key group (by
  204. // virtue of it not being the last element in a key).
  205. switch t := hashContext[k].(type) {
  206. case []map[string]interface{}:
  207. hashContext = t[len(t)-1]
  208. case map[string]interface{}:
  209. hashContext = t
  210. default:
  211. p.panic("Key '%s' was already created as a hash.", keyContext)
  212. }
  213. }
  214. p.context = keyContext
  215. if array {
  216. k := key[len(key)-1]
  217. if _, ok := hashContext[k]; !ok {
  218. hashContext[k] = make([]map[string]interface{}, 0, 5)
  219. }
  220. if hash, ok := hashContext[k].([]map[string]interface{}); ok {
  221. hashContext[k] = append(hash, make(map[string]interface{}))
  222. } else {
  223. p.panic("Key '%s' was already created and cannot be used as "+
  224. "an array.", keyContext)
  225. }
  226. } else {
  227. p.setValue(key[len(key)-1], make(map[string]interface{}))
  228. }
  229. p.context = append(p.context, key[len(key)-1])
  230. }
  231. // setValue sets the given key to the given value in the current context.
  232. // It will make sure that the key hasn't already been defined, account for
  233. // implicit key groups.
  234. func (p *parser) setValue(key string, value interface{}) {
  235. var tmpHash interface{}
  236. var ok bool
  237. hash := p.mapping
  238. keyContext := make(Key, 0)
  239. for _, k := range p.context {
  240. keyContext = append(keyContext, k)
  241. if tmpHash, ok = hash[k]; !ok {
  242. p.bug("Context for key '%s' has not been established.", keyContext)
  243. }
  244. switch t := tmpHash.(type) {
  245. case []map[string]interface{}:
  246. hash = t[len(t)-1]
  247. case map[string]interface{}:
  248. hash = t
  249. default:
  250. p.bug("Expected hash to have type 'map[string]interface{}', but "+
  251. "it has '%T' instead.", tmpHash)
  252. }
  253. }
  254. keyContext = append(keyContext, key)
  255. if _, ok := hash[key]; ok {
  256. // We need to do some fancy footwork here. If `hash[key]` was implcitly
  257. // created AND `value` is a hash, then let this go through and stop
  258. // tagging this table as implicit.
  259. if p.isImplicit(keyContext) {
  260. p.removeImplicit(keyContext)
  261. return
  262. }
  263. // Otherwise, we have a concrete key trying to override a previous
  264. // key, which is *always* wrong.
  265. p.panic("Key '%s' has already been defined.", keyContext)
  266. }
  267. hash[key] = value
  268. }
  269. // setType sets the type of a particular value at a given key.
  270. // It should be called immediately AFTER setValue.
  271. func (p *parser) setType(key string, typ tomlType) {
  272. keyContext := make(Key, 0, len(p.context)+1)
  273. for _, k := range p.context {
  274. keyContext = append(keyContext, k)
  275. }
  276. if len(key) > 0 { // allow type setting for hashes
  277. keyContext = append(keyContext, key)
  278. }
  279. p.types[keyContext.String()] = typ
  280. }
  281. // addImplicit sets the given Key as having been created implicitly.
  282. func (p *parser) addImplicit(key Key) {
  283. p.implicits[key.String()] = true
  284. }
  285. // removeImplicit stops tagging the given key as having been implicitly created.
  286. func (p *parser) removeImplicit(key Key) {
  287. p.implicits[key.String()] = false
  288. }
  289. // isImplicit returns true if the key group pointed to by the key was created
  290. // implicitly.
  291. func (p *parser) isImplicit(key Key) bool {
  292. return p.implicits[key.String()]
  293. }
  294. // current returns the full key name of the current context.
  295. func (p *parser) current() string {
  296. if len(p.currentKey) == 0 {
  297. return p.context.String()
  298. }
  299. if len(p.context) == 0 {
  300. return p.currentKey
  301. }
  302. return fmt.Sprintf("%s.%s", p.context, p.currentKey)
  303. }
  304. func replaceEscapes(s string) string {
  305. return strings.NewReplacer(
  306. "\\b", "\u0008",
  307. "\\t", "\u0009",
  308. "\\n", "\u000A",
  309. "\\f", "\u000C",
  310. "\\r", "\u000D",
  311. "\\\"", "\u0022",
  312. "\\/", "\u002F",
  313. "\\\\", "\u005C",
  314. ).Replace(s)
  315. }
  316. func (p *parser) replaceUnicode(s string) string {
  317. indexEsc := func() int {
  318. return strings.Index(s, "\\u")
  319. }
  320. for i := indexEsc(); i != -1; i = indexEsc() {
  321. asciiBytes := s[i+2 : i+6]
  322. s = strings.Replace(s, s[i:i+6], p.asciiEscapeToUnicode(asciiBytes), -1)
  323. }
  324. return s
  325. }
  326. func (p *parser) asciiEscapeToUnicode(s string) string {
  327. hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
  328. if err != nil {
  329. p.bug("Could not parse '%s' as a hexadecimal number, but the "+
  330. "lexer claims it's OK: %s", s, err)
  331. }
  332. // I honestly don't understand how this works. I can't seem to find
  333. // a way to make this fail. I figured this would fail on invalid UTF-8
  334. // characters like U+DCFF, but it doesn't.
  335. r := string(rune(hex))
  336. if !utf8.ValidString(r) {
  337. p.panic("Escaped character '\\u%s' is not valid UTF-8.", s)
  338. }
  339. return string(r)
  340. }