decode.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. package goyaml
  2. // #cgo LDFLAGS: -lm -lpthread
  3. // #cgo CFLAGS: -I. -DHAVE_CONFIG_H=1
  4. //
  5. // #include "helpers.h"
  6. import "C"
  7. import (
  8. "reflect"
  9. "strconv"
  10. "unsafe"
  11. )
  12. const (
  13. documentNode = 1 << iota
  14. mappingNode
  15. sequenceNode
  16. scalarNode
  17. aliasNode
  18. )
  19. type node struct {
  20. kind int
  21. line, column int
  22. tag string
  23. value string
  24. implicit bool
  25. children []*node
  26. anchors map[string]*node
  27. }
  28. func stry(s *C.yaml_char_t) string {
  29. return C.GoString((*C.char)(unsafe.Pointer(s)))
  30. }
  31. // ----------------------------------------------------------------------------
  32. // Parser, produces a node tree out of a libyaml event stream.
  33. type parser struct {
  34. parser C.yaml_parser_t
  35. event C.yaml_event_t
  36. doc *node
  37. }
  38. func newParser(b []byte) *parser {
  39. p := parser{}
  40. if C.yaml_parser_initialize(&p.parser) == 0 {
  41. panic("Failed to initialize YAML emitter")
  42. }
  43. if len(b) == 0 {
  44. b = []byte{'\n'}
  45. }
  46. // How unsafe is this really? Will this break if the GC becomes compacting?
  47. // Probably not, otherwise that would likely break &parse below as well.
  48. input := (*C.uchar)(unsafe.Pointer(&b[0]))
  49. C.yaml_parser_set_input_string(&p.parser, input, (C.size_t)(len(b)))
  50. p.skip()
  51. if p.event._type != C.YAML_STREAM_START_EVENT {
  52. panic("Expected stream start event, got " +
  53. strconv.Itoa(int(p.event._type)))
  54. }
  55. p.skip()
  56. return &p
  57. }
  58. func (p *parser) destroy() {
  59. if p.event._type != C.YAML_NO_EVENT {
  60. C.yaml_event_delete(&p.event)
  61. }
  62. C.yaml_parser_delete(&p.parser)
  63. }
  64. func (p *parser) skip() {
  65. if p.event._type != C.YAML_NO_EVENT {
  66. if p.event._type == C.YAML_STREAM_END_EVENT {
  67. panic("Attempted to go past the end of stream. Corrupted value?")
  68. }
  69. C.yaml_event_delete(&p.event)
  70. }
  71. if C.yaml_parser_parse(&p.parser, &p.event) == 0 {
  72. p.fail()
  73. }
  74. }
  75. func (p *parser) fail() {
  76. var where string
  77. var line int
  78. if p.parser.problem_mark.line != 0 {
  79. line = int(C.int(p.parser.problem_mark.line))
  80. } else if p.parser.context_mark.line != 0 {
  81. line = int(C.int(p.parser.context_mark.line))
  82. }
  83. if line != 0 {
  84. where = "line " + strconv.Itoa(line) + ": "
  85. }
  86. var msg string
  87. if p.parser.problem != nil {
  88. msg = C.GoString(p.parser.problem)
  89. } else {
  90. msg = "Unknown problem parsing YAML content"
  91. }
  92. panic(where + msg)
  93. }
  94. func (p *parser) anchor(n *node, anchor *C.yaml_char_t) {
  95. if anchor != nil {
  96. p.doc.anchors[stry(anchor)] = n
  97. }
  98. }
  99. func (p *parser) parse() *node {
  100. switch p.event._type {
  101. case C.YAML_SCALAR_EVENT:
  102. return p.scalar()
  103. case C.YAML_ALIAS_EVENT:
  104. return p.alias()
  105. case C.YAML_MAPPING_START_EVENT:
  106. return p.mapping()
  107. case C.YAML_SEQUENCE_START_EVENT:
  108. return p.sequence()
  109. case C.YAML_DOCUMENT_START_EVENT:
  110. return p.document()
  111. case C.YAML_STREAM_END_EVENT:
  112. // Happens when attempting to decode an empty buffer.
  113. return nil
  114. default:
  115. panic("Attempted to parse unknown event: " +
  116. strconv.Itoa(int(p.event._type)))
  117. }
  118. panic("Unreachable")
  119. }
  120. func (p *parser) node(kind int) *node {
  121. return &node{kind: kind,
  122. line: int(C.int(p.event.start_mark.line)),
  123. column: int(C.int(p.event.start_mark.column))}
  124. }
  125. func (p *parser) document() *node {
  126. n := p.node(documentNode)
  127. n.anchors = make(map[string]*node)
  128. p.doc = n
  129. p.skip()
  130. n.children = append(n.children, p.parse())
  131. if p.event._type != C.YAML_DOCUMENT_END_EVENT {
  132. panic("Expected end of document event but got " +
  133. strconv.Itoa(int(p.event._type)))
  134. }
  135. p.skip()
  136. return n
  137. }
  138. func (p *parser) alias() *node {
  139. alias := C.event_alias(&p.event)
  140. n := p.node(aliasNode)
  141. n.value = stry(alias.anchor)
  142. p.skip()
  143. return n
  144. }
  145. func (p *parser) scalar() *node {
  146. scalar := C.event_scalar(&p.event)
  147. n := p.node(scalarNode)
  148. n.value = stry(scalar.value)
  149. n.tag = stry(scalar.tag)
  150. n.implicit = (scalar.plain_implicit != 0)
  151. p.anchor(n, scalar.anchor)
  152. p.skip()
  153. return n
  154. }
  155. func (p *parser) sequence() *node {
  156. n := p.node(sequenceNode)
  157. p.anchor(n, C.event_sequence_start(&p.event).anchor)
  158. p.skip()
  159. for p.event._type != C.YAML_SEQUENCE_END_EVENT {
  160. n.children = append(n.children, p.parse())
  161. }
  162. p.skip()
  163. return n
  164. }
  165. func (p *parser) mapping() *node {
  166. n := p.node(mappingNode)
  167. p.anchor(n, C.event_mapping_start(&p.event).anchor)
  168. p.skip()
  169. for p.event._type != C.YAML_MAPPING_END_EVENT {
  170. n.children = append(n.children, p.parse(), p.parse())
  171. }
  172. p.skip()
  173. return n
  174. }
  175. // ----------------------------------------------------------------------------
  176. // Decoder, unmarshals a node into a provided value.
  177. type decoder struct {
  178. doc *node
  179. aliases map[string]bool
  180. }
  181. func newDecoder() *decoder {
  182. d := &decoder{}
  183. d.aliases = make(map[string]bool)
  184. return d
  185. }
  186. // d.setter deals with setters and pointer dereferencing and initialization.
  187. //
  188. // It's a slightly convoluted case to handle properly:
  189. //
  190. // - nil pointers should be initialized, unless being set to nil
  191. // - we don't know at this point yet what's the value to SetYAML() with.
  192. // - we can't separate pointer deref/init and setter checking, because
  193. // a setter may be found while going down a pointer chain.
  194. //
  195. // Thus, here is how it takes care of it:
  196. //
  197. // - out is provided as a pointer, so that it can be replaced.
  198. // - when looking at a non-setter ptr, *out=ptr.Elem(), unless tag=!!null
  199. // - when a setter is found, *out=interface{}, and a set() function is
  200. // returned to call SetYAML() with the value of *out once it's defined.
  201. //
  202. func (d *decoder) setter(tag string, out *reflect.Value, good *bool) (set func()) {
  203. again := true
  204. for again {
  205. again = false
  206. setter, _ := (*out).Interface().(Setter)
  207. if tag != "!!null" || setter != nil {
  208. if pv := (*out); pv.Kind() == reflect.Ptr {
  209. if pv.IsNil() {
  210. *out = reflect.New(pv.Type().Elem()).Elem()
  211. pv.Set((*out).Addr())
  212. } else {
  213. *out = pv.Elem()
  214. }
  215. setter, _ = pv.Interface().(Setter)
  216. again = true
  217. }
  218. }
  219. if setter != nil {
  220. var arg interface{}
  221. *out = reflect.ValueOf(&arg).Elem()
  222. return func() {
  223. *good = setter.SetYAML(tag, arg)
  224. }
  225. }
  226. }
  227. return nil
  228. }
  229. func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
  230. switch n.kind {
  231. case documentNode:
  232. good = d.document(n, out)
  233. case scalarNode:
  234. good = d.scalar(n, out)
  235. case aliasNode:
  236. good = d.alias(n, out)
  237. case mappingNode:
  238. good = d.mapping(n, out)
  239. case sequenceNode:
  240. good = d.sequence(n, out)
  241. default:
  242. panic("Internal error: unknown node kind: " + strconv.Itoa(n.kind))
  243. }
  244. return
  245. }
  246. func (d *decoder) document(n *node, out reflect.Value) (good bool) {
  247. if len(n.children) == 1 {
  248. d.doc = n
  249. d.unmarshal(n.children[0], out)
  250. return true
  251. }
  252. return false
  253. }
  254. func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
  255. an, ok := d.doc.anchors[n.value]
  256. if !ok {
  257. panic("Unknown anchor '" + n.value + "' referenced")
  258. }
  259. if d.aliases[n.value] {
  260. panic("Anchor '" + n.value + "' value contains itself")
  261. }
  262. d.aliases[n.value] = true
  263. good = d.unmarshal(an, out)
  264. delete(d.aliases, n.value)
  265. return good
  266. }
  267. func (d *decoder) scalar(n *node, out reflect.Value) (good bool) {
  268. var tag string
  269. var resolved interface{}
  270. if n.tag == "" && !n.implicit {
  271. resolved = n.value
  272. } else {
  273. tag, resolved = resolve(n.tag, n.value)
  274. if set := d.setter(tag, &out, &good); set != nil {
  275. defer set()
  276. }
  277. }
  278. switch out.Kind() {
  279. case reflect.String:
  280. if resolved != nil {
  281. out.SetString(n.value)
  282. good = true
  283. }
  284. case reflect.Interface:
  285. if resolved == nil {
  286. out.Set(reflect.Zero(out.Type()))
  287. } else {
  288. out.Set(reflect.ValueOf(resolved))
  289. }
  290. good = true
  291. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  292. switch resolved := resolved.(type) {
  293. case int:
  294. if !out.OverflowInt(int64(resolved)) {
  295. out.SetInt(int64(resolved))
  296. good = true
  297. }
  298. case int64:
  299. if !out.OverflowInt(resolved) {
  300. out.SetInt(resolved)
  301. good = true
  302. }
  303. }
  304. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  305. switch resolved := resolved.(type) {
  306. case int:
  307. if resolved >= 0 {
  308. out.SetUint(uint64(resolved))
  309. good = true
  310. }
  311. case int64:
  312. if resolved >= 0 {
  313. out.SetUint(uint64(resolved))
  314. good = true
  315. }
  316. }
  317. case reflect.Bool:
  318. switch resolved := resolved.(type) {
  319. case bool:
  320. out.SetBool(resolved)
  321. good = true
  322. }
  323. case reflect.Float32, reflect.Float64:
  324. switch resolved := resolved.(type) {
  325. case float64:
  326. out.SetFloat(resolved)
  327. good = true
  328. }
  329. case reflect.Ptr:
  330. switch resolved.(type) {
  331. case nil:
  332. out.Set(reflect.Zero(out.Type()))
  333. good = true
  334. default:
  335. if out.Type().Elem() == reflect.TypeOf(resolved) {
  336. elem := reflect.New(out.Type().Elem())
  337. elem.Elem().Set(reflect.ValueOf(resolved))
  338. out.Set(elem)
  339. good = true
  340. }
  341. }
  342. }
  343. return good
  344. }
  345. func settableValueOf(i interface{}) reflect.Value {
  346. v := reflect.ValueOf(i)
  347. sv := reflect.New(v.Type()).Elem()
  348. sv.Set(v)
  349. return sv
  350. }
  351. func (d *decoder) sequence(n *node, out reflect.Value) (good bool) {
  352. if set := d.setter("!!seq", &out, &good); set != nil {
  353. defer set()
  354. }
  355. var iface reflect.Value
  356. if out.Kind() == reflect.Interface {
  357. // No type hints. Will have to use a generic sequence.
  358. iface = out
  359. out = settableValueOf(make([]interface{}, 0))
  360. }
  361. if out.Kind() != reflect.Slice {
  362. return false
  363. }
  364. et := out.Type().Elem()
  365. l := len(n.children)
  366. for i := 0; i < l; i++ {
  367. e := reflect.New(et).Elem()
  368. if ok := d.unmarshal(n.children[i], e); ok {
  369. out.Set(reflect.Append(out, e))
  370. }
  371. }
  372. if iface.IsValid() {
  373. iface.Set(out)
  374. }
  375. return true
  376. }
  377. func (d *decoder) mapping(n *node, out reflect.Value) (good bool) {
  378. if set := d.setter("!!map", &out, &good); set != nil {
  379. defer set()
  380. }
  381. if out.Kind() == reflect.Struct {
  382. return d.mappingStruct(n, out)
  383. }
  384. if out.Kind() == reflect.Interface {
  385. // No type hints. Will have to use a generic map.
  386. iface := out
  387. out = settableValueOf(make(map[interface{}]interface{}))
  388. iface.Set(out)
  389. }
  390. if out.Kind() != reflect.Map {
  391. return false
  392. }
  393. outt := out.Type()
  394. kt := outt.Key()
  395. et := outt.Elem()
  396. if out.IsNil() {
  397. out.Set(reflect.MakeMap(outt))
  398. }
  399. l := len(n.children)
  400. for i := 0; i < l; i += 2 {
  401. k := reflect.New(kt).Elem()
  402. if d.unmarshal(n.children[i], k) {
  403. e := reflect.New(et).Elem()
  404. if d.unmarshal(n.children[i+1], e) {
  405. out.SetMapIndex(k, e)
  406. }
  407. }
  408. }
  409. return true
  410. }
  411. func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) {
  412. fields, err := getStructFields(out.Type())
  413. if err != nil {
  414. panic(err)
  415. }
  416. name := settableValueOf("")
  417. fieldsMap := fields.Map
  418. l := len(n.children)
  419. for i := 0; i < l; i += 2 {
  420. if !d.unmarshal(n.children[i], name) {
  421. continue
  422. }
  423. if info, ok := fieldsMap[name.String()]; ok {
  424. d.unmarshal(n.children[i+1], out.Field(info.Num))
  425. }
  426. }
  427. return true
  428. }