decode.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. package yaml
  2. import (
  3. "encoding"
  4. "encoding/base64"
  5. "fmt"
  6. "io"
  7. "math"
  8. "reflect"
  9. "strconv"
  10. "time"
  11. )
  12. const (
  13. documentNode = 1 << iota
  14. mappingNode
  15. sequenceNode
  16. scalarNode
  17. aliasNode
  18. )
  19. type node struct {
  20. kind int
  21. line, column int
  22. tag string
  23. // For an alias node, alias holds the resolved alias.
  24. alias *node
  25. value string
  26. implicit bool
  27. children []*node
  28. anchors map[string]*node
  29. }
  30. // ----------------------------------------------------------------------------
  31. // Parser, produces a node tree out of a libyaml event stream.
  32. type parser struct {
  33. parser yaml_parser_t
  34. event yaml_event_t
  35. doc *node
  36. doneInit bool
  37. }
  38. func newParser(b []byte) *parser {
  39. p := parser{}
  40. if !yaml_parser_initialize(&p.parser) {
  41. panic("failed to initialize YAML emitter")
  42. }
  43. if len(b) == 0 {
  44. b = []byte{'\n'}
  45. }
  46. yaml_parser_set_input_string(&p.parser, b)
  47. return &p
  48. }
  49. func newParserFromReader(r io.Reader) *parser {
  50. p := parser{}
  51. if !yaml_parser_initialize(&p.parser) {
  52. panic("failed to initialize YAML emitter")
  53. }
  54. yaml_parser_set_input_reader(&p.parser, r)
  55. return &p
  56. }
  57. func (p *parser) init() {
  58. if p.doneInit {
  59. return
  60. }
  61. p.skip()
  62. if p.event.typ != yaml_STREAM_START_EVENT {
  63. panic("expected stream start event, got " + strconv.Itoa(int(p.event.typ)))
  64. }
  65. p.skip()
  66. p.doneInit = true
  67. }
  68. func (p *parser) destroy() {
  69. if p.event.typ != yaml_NO_EVENT {
  70. yaml_event_delete(&p.event)
  71. }
  72. yaml_parser_delete(&p.parser)
  73. }
  74. func (p *parser) skip() {
  75. if p.event.typ != yaml_NO_EVENT {
  76. if p.event.typ == yaml_STREAM_END_EVENT {
  77. failf("attempted to go past the end of stream; corrupted value?")
  78. }
  79. yaml_event_delete(&p.event)
  80. }
  81. if !yaml_parser_parse(&p.parser, &p.event) {
  82. p.fail()
  83. }
  84. }
  85. func (p *parser) fail() {
  86. var where string
  87. var line int
  88. if p.parser.problem_mark.line != 0 {
  89. line = p.parser.problem_mark.line
  90. } else if p.parser.context_mark.line != 0 {
  91. line = p.parser.context_mark.line
  92. }
  93. if line != 0 {
  94. where = "line " + strconv.Itoa(line) + ": "
  95. }
  96. var msg string
  97. if len(p.parser.problem) > 0 {
  98. msg = p.parser.problem
  99. } else {
  100. msg = "unknown problem parsing YAML content"
  101. }
  102. failf("%s%s", where, msg)
  103. }
  104. func (p *parser) anchor(n *node, anchor []byte) {
  105. if anchor != nil {
  106. p.doc.anchors[string(anchor)] = n
  107. }
  108. }
  109. func (p *parser) parse() *node {
  110. p.init()
  111. switch p.event.typ {
  112. case yaml_SCALAR_EVENT:
  113. return p.scalar()
  114. case yaml_ALIAS_EVENT:
  115. return p.alias()
  116. case yaml_MAPPING_START_EVENT:
  117. return p.mapping()
  118. case yaml_SEQUENCE_START_EVENT:
  119. return p.sequence()
  120. case yaml_DOCUMENT_START_EVENT:
  121. return p.document()
  122. case yaml_STREAM_END_EVENT:
  123. // Happens when attempting to decode an empty buffer.
  124. return nil
  125. default:
  126. panic("attempted to parse unknown event: " + strconv.Itoa(int(p.event.typ)))
  127. }
  128. }
  129. func (p *parser) node(kind int) *node {
  130. return &node{
  131. kind: kind,
  132. line: p.event.start_mark.line,
  133. column: p.event.start_mark.column,
  134. }
  135. }
  136. func (p *parser) document() *node {
  137. n := p.node(documentNode)
  138. n.anchors = make(map[string]*node)
  139. p.doc = n
  140. p.skip()
  141. n.children = append(n.children, p.parse())
  142. if p.event.typ != yaml_DOCUMENT_END_EVENT {
  143. panic("expected end of document event but got " + strconv.Itoa(int(p.event.typ)))
  144. }
  145. p.skip()
  146. return n
  147. }
  148. func (p *parser) alias() *node {
  149. n := p.node(aliasNode)
  150. n.value = string(p.event.anchor)
  151. n.alias = p.doc.anchors[n.value]
  152. if n.alias == nil {
  153. failf("unknown anchor '%s' referenced", n.value)
  154. }
  155. p.skip()
  156. return n
  157. }
  158. func (p *parser) scalar() *node {
  159. n := p.node(scalarNode)
  160. n.value = string(p.event.value)
  161. n.tag = string(p.event.tag)
  162. n.implicit = p.event.implicit
  163. p.anchor(n, p.event.anchor)
  164. p.skip()
  165. return n
  166. }
  167. func (p *parser) sequence() *node {
  168. n := p.node(sequenceNode)
  169. p.anchor(n, p.event.anchor)
  170. p.skip()
  171. for p.event.typ != yaml_SEQUENCE_END_EVENT {
  172. n.children = append(n.children, p.parse())
  173. }
  174. p.skip()
  175. return n
  176. }
  177. func (p *parser) mapping() *node {
  178. n := p.node(mappingNode)
  179. p.anchor(n, p.event.anchor)
  180. p.skip()
  181. for p.event.typ != yaml_MAPPING_END_EVENT {
  182. n.children = append(n.children, p.parse(), p.parse())
  183. }
  184. p.skip()
  185. return n
  186. }
  187. // ----------------------------------------------------------------------------
  188. // Decoder, unmarshals a node into a provided value.
  189. type decoder struct {
  190. doc *node
  191. aliases map[*node]bool
  192. mapType reflect.Type
  193. terrors []string
  194. strict bool
  195. }
  196. var (
  197. mapItemType = reflect.TypeOf(MapItem{})
  198. durationType = reflect.TypeOf(time.Duration(0))
  199. defaultMapType = reflect.TypeOf(map[interface{}]interface{}{})
  200. ifaceType = defaultMapType.Elem()
  201. )
  202. func newDecoder(strict bool) *decoder {
  203. d := &decoder{mapType: defaultMapType, strict: strict}
  204. d.aliases = make(map[*node]bool)
  205. return d
  206. }
  207. func (d *decoder) terror(n *node, tag string, out reflect.Value) {
  208. if n.tag != "" {
  209. tag = n.tag
  210. }
  211. value := n.value
  212. if tag != yaml_SEQ_TAG && tag != yaml_MAP_TAG {
  213. if len(value) > 10 {
  214. value = " `" + value[:7] + "...`"
  215. } else {
  216. value = " `" + value + "`"
  217. }
  218. }
  219. d.terrors = append(d.terrors, fmt.Sprintf("line %d: cannot unmarshal %s%s into %s", n.line+1, shortTag(tag), value, out.Type()))
  220. }
  221. func (d *decoder) callUnmarshaler(n *node, u Unmarshaler) (good bool) {
  222. terrlen := len(d.terrors)
  223. err := u.UnmarshalYAML(func(v interface{}) (err error) {
  224. defer handleErr(&err)
  225. d.unmarshal(n, reflect.ValueOf(v))
  226. if len(d.terrors) > terrlen {
  227. issues := d.terrors[terrlen:]
  228. d.terrors = d.terrors[:terrlen]
  229. return &TypeError{issues}
  230. }
  231. return nil
  232. })
  233. if e, ok := err.(*TypeError); ok {
  234. d.terrors = append(d.terrors, e.Errors...)
  235. return false
  236. }
  237. if err != nil {
  238. fail(err)
  239. }
  240. return true
  241. }
  242. // d.prepare initializes and dereferences pointers and calls UnmarshalYAML
  243. // if a value is found to implement it.
  244. // It returns the initialized and dereferenced out value, whether
  245. // unmarshalling was already done by UnmarshalYAML, and if so whether
  246. // its types unmarshalled appropriately.
  247. //
  248. // If n holds a null value, prepare returns before doing anything.
  249. func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) {
  250. if n.tag == yaml_NULL_TAG || n.kind == scalarNode && n.tag == "" && (n.value == "null" || n.value == "~" || n.value == "" && n.implicit) {
  251. return out, false, false
  252. }
  253. again := true
  254. for again {
  255. again = false
  256. if out.Kind() == reflect.Ptr {
  257. if out.IsNil() {
  258. out.Set(reflect.New(out.Type().Elem()))
  259. }
  260. out = out.Elem()
  261. again = true
  262. }
  263. if out.CanAddr() {
  264. if u, ok := out.Addr().Interface().(Unmarshaler); ok {
  265. good = d.callUnmarshaler(n, u)
  266. return out, true, good
  267. }
  268. }
  269. }
  270. return out, false, false
  271. }
  272. func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
  273. switch n.kind {
  274. case documentNode:
  275. return d.document(n, out)
  276. case aliasNode:
  277. return d.alias(n, out)
  278. }
  279. out, unmarshaled, good := d.prepare(n, out)
  280. if unmarshaled {
  281. return good
  282. }
  283. switch n.kind {
  284. case scalarNode:
  285. good = d.scalar(n, out)
  286. case mappingNode:
  287. good = d.mapping(n, out)
  288. case sequenceNode:
  289. good = d.sequence(n, out)
  290. default:
  291. panic("internal error: unknown node kind: " + strconv.Itoa(n.kind))
  292. }
  293. return good
  294. }
  295. func (d *decoder) document(n *node, out reflect.Value) (good bool) {
  296. if len(n.children) == 1 {
  297. d.doc = n
  298. d.unmarshal(n.children[0], out)
  299. return true
  300. }
  301. return false
  302. }
  303. func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
  304. if d.aliases[n] {
  305. // TODO this could actually be allowed in some circumstances.
  306. failf("anchor '%s' value contains itself", n.value)
  307. }
  308. d.aliases[n] = true
  309. good = d.unmarshal(n.alias, out)
  310. delete(d.aliases, n)
  311. return good
  312. }
  313. var zeroValue reflect.Value
  314. func resetMap(out reflect.Value) {
  315. for _, k := range out.MapKeys() {
  316. out.SetMapIndex(k, zeroValue)
  317. }
  318. }
  319. func (d *decoder) scalar(n *node, out reflect.Value) (good bool) {
  320. var tag string
  321. var resolved interface{}
  322. if n.tag == "" && !n.implicit {
  323. tag = yaml_STR_TAG
  324. resolved = n.value
  325. } else {
  326. tag, resolved = resolve(n.tag, n.value)
  327. if tag == yaml_BINARY_TAG {
  328. data, err := base64.StdEncoding.DecodeString(resolved.(string))
  329. if err != nil {
  330. failf("!!binary value contains invalid base64 data")
  331. }
  332. resolved = string(data)
  333. }
  334. }
  335. if resolved == nil {
  336. if out.Kind() == reflect.Map && !out.CanAddr() {
  337. resetMap(out)
  338. } else {
  339. out.Set(reflect.Zero(out.Type()))
  340. }
  341. return true
  342. }
  343. if s, ok := resolved.(string); ok && out.CanAddr() {
  344. if u, ok := out.Addr().Interface().(encoding.TextUnmarshaler); ok {
  345. err := u.UnmarshalText([]byte(s))
  346. if err != nil {
  347. fail(err)
  348. }
  349. return true
  350. }
  351. }
  352. switch out.Kind() {
  353. case reflect.String:
  354. if tag == yaml_BINARY_TAG {
  355. out.SetString(resolved.(string))
  356. good = true
  357. } else if resolved != nil {
  358. out.SetString(n.value)
  359. good = true
  360. }
  361. case reflect.Interface:
  362. if resolved == nil {
  363. out.Set(reflect.Zero(out.Type()))
  364. } else {
  365. out.Set(reflect.ValueOf(resolved))
  366. }
  367. good = true
  368. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  369. switch resolved := resolved.(type) {
  370. case int:
  371. if !out.OverflowInt(int64(resolved)) {
  372. out.SetInt(int64(resolved))
  373. good = true
  374. }
  375. case int64:
  376. if !out.OverflowInt(resolved) {
  377. out.SetInt(resolved)
  378. good = true
  379. }
  380. case uint64:
  381. if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) {
  382. out.SetInt(int64(resolved))
  383. good = true
  384. }
  385. case float64:
  386. if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) {
  387. out.SetInt(int64(resolved))
  388. good = true
  389. }
  390. case string:
  391. if out.Type() == durationType {
  392. d, err := time.ParseDuration(resolved)
  393. if err == nil {
  394. out.SetInt(int64(d))
  395. good = true
  396. }
  397. }
  398. }
  399. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  400. switch resolved := resolved.(type) {
  401. case int:
  402. if resolved >= 0 && !out.OverflowUint(uint64(resolved)) {
  403. out.SetUint(uint64(resolved))
  404. good = true
  405. }
  406. case int64:
  407. if resolved >= 0 && !out.OverflowUint(uint64(resolved)) {
  408. out.SetUint(uint64(resolved))
  409. good = true
  410. }
  411. case uint64:
  412. if !out.OverflowUint(uint64(resolved)) {
  413. out.SetUint(uint64(resolved))
  414. good = true
  415. }
  416. case float64:
  417. if resolved <= math.MaxUint64 && !out.OverflowUint(uint64(resolved)) {
  418. out.SetUint(uint64(resolved))
  419. good = true
  420. }
  421. }
  422. case reflect.Bool:
  423. switch resolved := resolved.(type) {
  424. case bool:
  425. out.SetBool(resolved)
  426. good = true
  427. }
  428. case reflect.Float32, reflect.Float64:
  429. switch resolved := resolved.(type) {
  430. case int:
  431. out.SetFloat(float64(resolved))
  432. good = true
  433. case int64:
  434. out.SetFloat(float64(resolved))
  435. good = true
  436. case uint64:
  437. out.SetFloat(float64(resolved))
  438. good = true
  439. case float64:
  440. out.SetFloat(resolved)
  441. good = true
  442. }
  443. case reflect.Ptr:
  444. if out.Type().Elem() == reflect.TypeOf(resolved) {
  445. // TODO DOes this make sense? When is out a Ptr except when decoding a nil value?
  446. elem := reflect.New(out.Type().Elem())
  447. elem.Elem().Set(reflect.ValueOf(resolved))
  448. out.Set(elem)
  449. good = true
  450. }
  451. }
  452. if !good {
  453. d.terror(n, tag, out)
  454. }
  455. return good
  456. }
  457. func settableValueOf(i interface{}) reflect.Value {
  458. v := reflect.ValueOf(i)
  459. sv := reflect.New(v.Type()).Elem()
  460. sv.Set(v)
  461. return sv
  462. }
  463. func (d *decoder) sequence(n *node, out reflect.Value) (good bool) {
  464. l := len(n.children)
  465. var iface reflect.Value
  466. switch out.Kind() {
  467. case reflect.Slice:
  468. out.Set(reflect.MakeSlice(out.Type(), l, l))
  469. case reflect.Interface:
  470. // No type hints. Will have to use a generic sequence.
  471. iface = out
  472. out = settableValueOf(make([]interface{}, l))
  473. default:
  474. d.terror(n, yaml_SEQ_TAG, out)
  475. return false
  476. }
  477. et := out.Type().Elem()
  478. j := 0
  479. for i := 0; i < l; i++ {
  480. e := reflect.New(et).Elem()
  481. if ok := d.unmarshal(n.children[i], e); ok {
  482. out.Index(j).Set(e)
  483. j++
  484. }
  485. }
  486. out.Set(out.Slice(0, j))
  487. if iface.IsValid() {
  488. iface.Set(out)
  489. }
  490. return true
  491. }
  492. func (d *decoder) mapping(n *node, out reflect.Value) (good bool) {
  493. switch out.Kind() {
  494. case reflect.Struct:
  495. return d.mappingStruct(n, out)
  496. case reflect.Slice:
  497. return d.mappingSlice(n, out)
  498. case reflect.Map:
  499. // okay
  500. case reflect.Interface:
  501. if d.mapType.Kind() == reflect.Map {
  502. iface := out
  503. out = reflect.MakeMap(d.mapType)
  504. iface.Set(out)
  505. } else {
  506. slicev := reflect.New(d.mapType).Elem()
  507. if !d.mappingSlice(n, slicev) {
  508. return false
  509. }
  510. out.Set(slicev)
  511. return true
  512. }
  513. default:
  514. d.terror(n, yaml_MAP_TAG, out)
  515. return false
  516. }
  517. outt := out.Type()
  518. kt := outt.Key()
  519. et := outt.Elem()
  520. mapType := d.mapType
  521. if outt.Key() == ifaceType && outt.Elem() == ifaceType {
  522. d.mapType = outt
  523. }
  524. if out.IsNil() {
  525. out.Set(reflect.MakeMap(outt))
  526. }
  527. l := len(n.children)
  528. for i := 0; i < l; i += 2 {
  529. if isMerge(n.children[i]) {
  530. d.merge(n.children[i+1], out)
  531. continue
  532. }
  533. k := reflect.New(kt).Elem()
  534. if d.unmarshal(n.children[i], k) {
  535. kkind := k.Kind()
  536. if kkind == reflect.Interface {
  537. kkind = k.Elem().Kind()
  538. }
  539. if kkind == reflect.Map || kkind == reflect.Slice {
  540. failf("invalid map key: %#v", k.Interface())
  541. }
  542. e := reflect.New(et).Elem()
  543. if d.unmarshal(n.children[i+1], e) {
  544. out.SetMapIndex(k, e)
  545. }
  546. }
  547. }
  548. d.mapType = mapType
  549. return true
  550. }
  551. func (d *decoder) mappingSlice(n *node, out reflect.Value) (good bool) {
  552. outt := out.Type()
  553. if outt.Elem() != mapItemType {
  554. d.terror(n, yaml_MAP_TAG, out)
  555. return false
  556. }
  557. mapType := d.mapType
  558. d.mapType = outt
  559. var slice []MapItem
  560. var l = len(n.children)
  561. for i := 0; i < l; i += 2 {
  562. if isMerge(n.children[i]) {
  563. d.merge(n.children[i+1], out)
  564. continue
  565. }
  566. item := MapItem{}
  567. k := reflect.ValueOf(&item.Key).Elem()
  568. if d.unmarshal(n.children[i], k) {
  569. v := reflect.ValueOf(&item.Value).Elem()
  570. if d.unmarshal(n.children[i+1], v) {
  571. slice = append(slice, item)
  572. }
  573. }
  574. }
  575. out.Set(reflect.ValueOf(slice))
  576. d.mapType = mapType
  577. return true
  578. }
  579. func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) {
  580. sinfo, err := getStructInfo(out.Type())
  581. if err != nil {
  582. panic(err)
  583. }
  584. name := settableValueOf("")
  585. l := len(n.children)
  586. var inlineMap reflect.Value
  587. var elemType reflect.Type
  588. if sinfo.InlineMap != -1 {
  589. inlineMap = out.Field(sinfo.InlineMap)
  590. inlineMap.Set(reflect.New(inlineMap.Type()).Elem())
  591. elemType = inlineMap.Type().Elem()
  592. }
  593. for i := 0; i < l; i += 2 {
  594. ni := n.children[i]
  595. if isMerge(ni) {
  596. d.merge(n.children[i+1], out)
  597. continue
  598. }
  599. if !d.unmarshal(ni, name) {
  600. continue
  601. }
  602. if info, ok := sinfo.FieldsMap[name.String()]; ok {
  603. var field reflect.Value
  604. if info.Inline == nil {
  605. field = out.Field(info.Num)
  606. } else {
  607. field = out.FieldByIndex(info.Inline)
  608. }
  609. d.unmarshal(n.children[i+1], field)
  610. } else if sinfo.InlineMap != -1 {
  611. if inlineMap.IsNil() {
  612. inlineMap.Set(reflect.MakeMap(inlineMap.Type()))
  613. }
  614. value := reflect.New(elemType).Elem()
  615. d.unmarshal(n.children[i+1], value)
  616. inlineMap.SetMapIndex(name, value)
  617. } else if d.strict {
  618. d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s not found in struct %s", ni.line+1, name.String(), out.Type()))
  619. }
  620. }
  621. return true
  622. }
  623. func failWantMap() {
  624. failf("map merge requires map or sequence of maps as the value")
  625. }
  626. func (d *decoder) merge(n *node, out reflect.Value) {
  627. switch n.kind {
  628. case mappingNode:
  629. d.unmarshal(n, out)
  630. case aliasNode:
  631. an, ok := d.doc.anchors[n.value]
  632. if ok && an.kind != mappingNode {
  633. failWantMap()
  634. }
  635. d.unmarshal(n, out)
  636. case sequenceNode:
  637. // Step backwards as earlier nodes take precedence.
  638. for i := len(n.children) - 1; i >= 0; i-- {
  639. ni := n.children[i]
  640. if ni.kind == aliasNode {
  641. an, ok := d.doc.anchors[ni.value]
  642. if ok && an.kind != mappingNode {
  643. failWantMap()
  644. }
  645. } else if ni.kind != mappingNode {
  646. failWantMap()
  647. }
  648. d.unmarshal(ni, out)
  649. }
  650. default:
  651. failWantMap()
  652. }
  653. }
  654. func isMerge(n *node) bool {
  655. return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG)
  656. }