decode.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. package goyaml
  2. // #include "helpers.h"
  3. import "C"
  4. import (
  5. "unsafe"
  6. "reflect"
  7. "strconv"
  8. )
  9. const (
  10. documentNode = 1 << iota
  11. mappingNode
  12. sequenceNode
  13. scalarNode
  14. aliasNode
  15. )
  16. type node struct {
  17. kind int
  18. line, column int
  19. tag string
  20. value string
  21. implicit bool
  22. children []*node
  23. anchors map[string]*node
  24. }
  25. func GoYString(s *C.yaml_char_t) string {
  26. return C.GoString((*C.char)(unsafe.Pointer(s)))
  27. }
  28. // ----------------------------------------------------------------------------
  29. // Parser, produces a node tree out of a libyaml event stream.
  30. type parser struct {
  31. parser C.yaml_parser_t
  32. event C.yaml_event_t
  33. doc *node
  34. }
  35. func newParser(b []byte) *parser {
  36. p := parser{}
  37. if C.yaml_parser_initialize(&p.parser) == 0 {
  38. panic("Failed to initialize YAML emitter")
  39. }
  40. if len(b) == 0 {
  41. b = []byte{'\n'}
  42. }
  43. // How unsafe is this really? Will this break if the GC becomes compacting?
  44. // Probably not, otherwise that would likely break &parse below as well.
  45. input := (*C.uchar)(unsafe.Pointer(&b[0]))
  46. C.yaml_parser_set_input_string(&p.parser, input, (C.size_t)(len(b)))
  47. p.skip()
  48. if p.event._type != C.YAML_STREAM_START_EVENT {
  49. panic("Expected stream start event, got " +
  50. strconv.Itoa(int(p.event._type)))
  51. }
  52. p.skip()
  53. return &p
  54. }
  55. func (p *parser) destroy() {
  56. if p.event._type != C.YAML_NO_EVENT {
  57. C.yaml_event_delete(&p.event)
  58. }
  59. C.yaml_parser_delete(&p.parser)
  60. }
  61. func (p *parser) skip() {
  62. if p.event._type != C.YAML_NO_EVENT {
  63. if p.event._type == C.YAML_STREAM_END_EVENT {
  64. panic("Attempted to go past the end of stream. Corrupted value?")
  65. }
  66. C.yaml_event_delete(&p.event)
  67. }
  68. if C.yaml_parser_parse(&p.parser, &p.event) == 0 {
  69. p.fail()
  70. }
  71. }
  72. func (p *parser) fail() {
  73. var where string
  74. var line int
  75. if p.parser.problem_mark.line != 0 {
  76. line = int(C.int(p.parser.problem_mark.line))
  77. } else if p.parser.context_mark.line != 0 {
  78. line = int(C.int(p.parser.context_mark.line))
  79. }
  80. if line != 0 {
  81. where = "line " + strconv.Itoa(line) + ": "
  82. }
  83. var msg string
  84. if p.parser.problem != nil {
  85. msg = C.GoString(p.parser.problem)
  86. } else {
  87. msg = "Unknown problem parsing YAML content"
  88. }
  89. panic(where + msg)
  90. }
  91. func (p *parser) anchor(n *node, anchor *C.yaml_char_t) {
  92. if anchor != nil {
  93. p.doc.anchors[GoYString(anchor)] = n
  94. }
  95. }
  96. func (p *parser) parse() *node {
  97. switch p.event._type {
  98. case C.YAML_SCALAR_EVENT:
  99. return p.scalar()
  100. case C.YAML_ALIAS_EVENT:
  101. return p.alias()
  102. case C.YAML_MAPPING_START_EVENT:
  103. return p.mapping()
  104. case C.YAML_SEQUENCE_START_EVENT:
  105. return p.sequence()
  106. case C.YAML_DOCUMENT_START_EVENT:
  107. return p.document()
  108. case C.YAML_STREAM_END_EVENT:
  109. // Happens when attempting to decode an empty buffer.
  110. return nil
  111. default:
  112. panic("Attempted to parse unknown event: " +
  113. strconv.Itoa(int(p.event._type)))
  114. }
  115. panic("Unreachable")
  116. }
  117. func (p *parser) node(kind int) *node {
  118. return &node{kind: kind,
  119. line: int(C.int(p.event.start_mark.line)),
  120. column: int(C.int(p.event.start_mark.column))}
  121. }
  122. func (p *parser) document() *node {
  123. n := p.node(documentNode)
  124. n.anchors = make(map[string]*node)
  125. p.doc = n
  126. p.skip()
  127. n.children = append(n.children, p.parse())
  128. if p.event._type != C.YAML_DOCUMENT_END_EVENT {
  129. panic("Expected end of document event but got " +
  130. strconv.Itoa(int(p.event._type)))
  131. }
  132. p.skip()
  133. return n
  134. }
  135. func (p *parser) alias() *node {
  136. alias := C.event_alias(&p.event)
  137. n := p.node(aliasNode)
  138. n.value = GoYString(alias.anchor)
  139. p.skip()
  140. return n
  141. }
  142. func (p *parser) scalar() *node {
  143. scalar := C.event_scalar(&p.event)
  144. n := p.node(scalarNode)
  145. n.value = GoYString(scalar.value)
  146. n.tag = GoYString(scalar.tag)
  147. n.implicit = (scalar.plain_implicit != 0)
  148. p.anchor(n, scalar.anchor)
  149. p.skip()
  150. return n
  151. }
  152. func (p *parser) sequence() *node {
  153. n := p.node(sequenceNode)
  154. p.anchor(n, C.event_sequence_start(&p.event).anchor)
  155. p.skip()
  156. for p.event._type != C.YAML_SEQUENCE_END_EVENT {
  157. n.children = append(n.children, p.parse())
  158. }
  159. p.skip()
  160. return n
  161. }
  162. func (p *parser) mapping() *node {
  163. n := p.node(mappingNode)
  164. p.anchor(n, C.event_mapping_start(&p.event).anchor)
  165. p.skip()
  166. for p.event._type != C.YAML_MAPPING_END_EVENT {
  167. n.children = append(n.children, p.parse(), p.parse())
  168. }
  169. p.skip()
  170. return n
  171. }
  172. // ----------------------------------------------------------------------------
  173. // Decoder, unmarshals a node into a provided value.
  174. type decoder struct {
  175. doc *node
  176. aliases map[string]bool
  177. }
  178. func newDecoder() *decoder {
  179. d := &decoder{}
  180. d.aliases = make(map[string]bool)
  181. return d
  182. }
  183. // d.setter deals with setters and pointer dereferencing and initialization.
  184. //
  185. // It's a slightly convoluted case to handle properly:
  186. //
  187. // - Nil pointers should be zeroed out, unless being set to nil
  188. // - We don't know at this point yet what's the value to SetYAML() with.
  189. // - We can't separate pointer deref/init and setter checking, because
  190. // a setter may be found while going down a pointer chain.
  191. //
  192. // Thus, here is how it takes care of it:
  193. //
  194. // - out is provided as a pointer, so that it can be replaced.
  195. // - when looking at a non-setter ptr, *out=ptr.Elem(), unless tag=!!null
  196. // - when a setter is found, *out=interface{}, and a set() function is
  197. // returned to call SetYAML() with the value of *out once it's defined.
  198. //
  199. func (d *decoder) setter(tag string, out *reflect.Value, good *bool) (set func()) {
  200. again := true
  201. for again {
  202. again = false
  203. setter, _ := (*out).Interface().(Setter)
  204. if tag != "!!null" || setter != nil {
  205. if pv, ok := (*out).(*reflect.PtrValue); ok {
  206. if pv.IsNil() {
  207. *out = reflect.MakeZero(pv.Type().(*reflect.PtrType).Elem())
  208. pv.PointTo(*out)
  209. } else {
  210. *out = pv.Elem()
  211. }
  212. setter, _ = pv.Interface().(Setter)
  213. again = true
  214. }
  215. }
  216. if setter != nil {
  217. var arg interface{}
  218. *out = reflect.NewValue(&arg).(*reflect.PtrValue).Elem()
  219. return func() {
  220. *good = setter.SetYAML(tag, arg)
  221. }
  222. }
  223. }
  224. return nil
  225. }
  226. func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
  227. switch n.kind {
  228. case documentNode:
  229. good = d.document(n, out)
  230. case scalarNode:
  231. good = d.scalar(n, out)
  232. case aliasNode:
  233. good = d.alias(n, out)
  234. case mappingNode:
  235. good = d.mapping(n, out)
  236. case sequenceNode:
  237. good = d.sequence(n, out)
  238. default:
  239. panic("Internal error: unknown node kind: " + strconv.Itoa(n.kind))
  240. }
  241. return
  242. }
  243. func (d *decoder) document(n *node, out reflect.Value) (good bool) {
  244. if len(n.children) == 1 {
  245. d.doc = n
  246. d.unmarshal(n.children[0], out)
  247. return true
  248. }
  249. return false
  250. }
  251. func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
  252. an, ok := d.doc.anchors[n.value]
  253. if !ok {
  254. panic("Unknown anchor '" + n.value + "' referenced")
  255. }
  256. if d.aliases[n.value] {
  257. panic("Anchor '" + n.value + "' value contains itself")
  258. }
  259. d.aliases[n.value] = true
  260. good = d.unmarshal(an, out)
  261. d.aliases[n.value] = false, false
  262. return good
  263. }
  264. func (d *decoder) scalar(n *node, out reflect.Value) (good bool) {
  265. var tag string
  266. var resolved interface{}
  267. if n.tag == "" && !n.implicit {
  268. resolved = n.value
  269. } else {
  270. tag, resolved = resolve(n.tag, n.value)
  271. if set := d.setter(tag, &out, &good); set != nil {
  272. defer set()
  273. }
  274. }
  275. switch out := out.(type) {
  276. case *reflect.StringValue:
  277. out.Set(n.value)
  278. good = true
  279. case *reflect.InterfaceValue:
  280. out.Set(reflect.NewValue(resolved))
  281. good = true
  282. case *reflect.IntValue:
  283. switch resolved := resolved.(type) {
  284. case int:
  285. if !out.Overflow(int64(resolved)) {
  286. out.Set(int64(resolved))
  287. good = true
  288. }
  289. case int64:
  290. if !out.Overflow(resolved) {
  291. out.Set(resolved)
  292. good = true
  293. }
  294. }
  295. case *reflect.UintValue:
  296. switch resolved := resolved.(type) {
  297. case int:
  298. if resolved >= 0 {
  299. out.Set(uint64(resolved))
  300. good = true
  301. }
  302. case int64:
  303. if resolved >= 0 {
  304. out.Set(uint64(resolved))
  305. good = true
  306. }
  307. }
  308. case *reflect.BoolValue:
  309. switch resolved := resolved.(type) {
  310. case bool:
  311. out.Set(resolved)
  312. good = true
  313. }
  314. case *reflect.FloatValue:
  315. switch resolved := resolved.(type) {
  316. case float:
  317. out.Set(float64(resolved))
  318. good = true
  319. }
  320. case *reflect.PtrValue:
  321. switch resolved := resolved.(type) {
  322. case nil:
  323. out.PointTo(nil)
  324. good = true
  325. }
  326. default:
  327. panic("Can't handle type yet: " + out.Type().String())
  328. }
  329. return good
  330. }
  331. func (d *decoder) sequence(n *node, out reflect.Value) (good bool) {
  332. if set := d.setter("!!seq", &out, &good); set != nil {
  333. defer set()
  334. }
  335. if iface, ok := out.(*reflect.InterfaceValue); ok {
  336. // No type hints. Will have to use a generic sequence.
  337. out = reflect.NewValue(make([]interface{}, 0))
  338. iface.SetValue(out)
  339. }
  340. sv, ok := out.(*reflect.SliceValue)
  341. if !ok {
  342. return false
  343. }
  344. st := sv.Type().(*reflect.SliceType)
  345. et := st.Elem()
  346. l := len(n.children)
  347. for i := 0; i < l; i++ {
  348. e := reflect.MakeZero(et)
  349. if ok := d.unmarshal(n.children[i], e); ok {
  350. sv.SetValue(reflect.Append(sv, e))
  351. }
  352. }
  353. return true
  354. }
  355. func (d *decoder) mapping(n *node, out reflect.Value) (good bool) {
  356. if set := d.setter("!!map", &out, &good); set != nil {
  357. defer set()
  358. }
  359. if s, ok := out.(*reflect.StructValue); ok {
  360. return d.mappingStruct(n, s)
  361. }
  362. if iface, ok := out.(*reflect.InterfaceValue); ok {
  363. // No type hints. Will have to use a generic map.
  364. out = reflect.NewValue(make(map[interface{}]interface{}))
  365. iface.SetValue(out)
  366. }
  367. mv, ok := out.(*reflect.MapValue)
  368. if !ok {
  369. return false
  370. }
  371. mt := mv.Type().(*reflect.MapType)
  372. kt := mt.Key()
  373. et := mt.Elem()
  374. l := len(n.children)
  375. for i := 0; i < l; i += 2 {
  376. k := reflect.MakeZero(kt)
  377. if d.unmarshal(n.children[i], k) {
  378. e := reflect.MakeZero(et)
  379. if d.unmarshal(n.children[i+1], e) {
  380. mv.SetElem(k, e)
  381. }
  382. }
  383. }
  384. return true
  385. }
  386. func (d *decoder) mappingStruct(n *node, out *reflect.StructValue) (good bool) {
  387. fields, err := getStructFields(out.Type().(*reflect.StructType))
  388. if err != nil {
  389. panic(err)
  390. }
  391. name := reflect.NewValue("").(*reflect.StringValue)
  392. fieldsMap := fields.Map
  393. l := len(n.children)
  394. for i := 0; i < l; i += 2 {
  395. if !d.unmarshal(n.children[i], name) {
  396. continue
  397. }
  398. if info, ok := fieldsMap[name.Get()]; ok {
  399. d.unmarshal(n.children[i+1], out.Field(info.Num))
  400. }
  401. }
  402. return true
  403. }