decode.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. package goyaml
  2. // #cgo LDFLAGS: -lm -lpthread
  3. // #cgo windows CFLAGS: -DYAML_DECLARE_STATIC=1 -Dstrdup=_strdup
  4. // #cgo CFLAGS: -I. -DHAVE_CONFIG_H=1
  5. //
  6. // #include "helpers.h"
  7. import "C"
  8. import (
  9. "reflect"
  10. "strconv"
  11. "unsafe"
  12. )
  13. const (
  14. documentNode = 1 << iota
  15. mappingNode
  16. sequenceNode
  17. scalarNode
  18. aliasNode
  19. )
  20. type node struct {
  21. kind int
  22. line, column int
  23. tag string
  24. value string
  25. implicit bool
  26. children []*node
  27. anchors map[string]*node
  28. }
  29. func stry(s *C.yaml_char_t) string {
  30. return C.GoString((*C.char)(unsafe.Pointer(s)))
  31. }
  32. // ----------------------------------------------------------------------------
  33. // Parser, produces a node tree out of a libyaml event stream.
  34. type parser struct {
  35. parser C.yaml_parser_t
  36. event C.yaml_event_t
  37. doc *node
  38. }
  39. func newParser(b []byte) *parser {
  40. p := parser{}
  41. if C.yaml_parser_initialize(&p.parser) == 0 {
  42. panic("Failed to initialize YAML emitter")
  43. }
  44. if len(b) == 0 {
  45. b = []byte{'\n'}
  46. }
  47. // How unsafe is this really? Will this break if the GC becomes compacting?
  48. // Probably not, otherwise that would likely break &parse below as well.
  49. input := (*C.uchar)(unsafe.Pointer(&b[0]))
  50. C.yaml_parser_set_input_string(&p.parser, input, (C.size_t)(len(b)))
  51. p.skip()
  52. if p.event._type != C.YAML_STREAM_START_EVENT {
  53. panic("Expected stream start event, got " +
  54. strconv.Itoa(int(p.event._type)))
  55. }
  56. p.skip()
  57. return &p
  58. }
  59. func (p *parser) destroy() {
  60. if p.event._type != C.YAML_NO_EVENT {
  61. C.yaml_event_delete(&p.event)
  62. }
  63. C.yaml_parser_delete(&p.parser)
  64. }
  65. func (p *parser) skip() {
  66. if p.event._type != C.YAML_NO_EVENT {
  67. if p.event._type == C.YAML_STREAM_END_EVENT {
  68. panic("Attempted to go past the end of stream. Corrupted value?")
  69. }
  70. C.yaml_event_delete(&p.event)
  71. }
  72. if C.yaml_parser_parse(&p.parser, &p.event) == 0 {
  73. p.fail()
  74. }
  75. }
  76. func (p *parser) fail() {
  77. var where string
  78. var line int
  79. if p.parser.problem_mark.line != 0 {
  80. line = int(C.int(p.parser.problem_mark.line))
  81. } else if p.parser.context_mark.line != 0 {
  82. line = int(C.int(p.parser.context_mark.line))
  83. }
  84. if line != 0 {
  85. where = "line " + strconv.Itoa(line) + ": "
  86. }
  87. var msg string
  88. if p.parser.problem != nil {
  89. msg = C.GoString(p.parser.problem)
  90. } else {
  91. msg = "Unknown problem parsing YAML content"
  92. }
  93. panic(where + msg)
  94. }
  95. func (p *parser) anchor(n *node, anchor *C.yaml_char_t) {
  96. if anchor != nil {
  97. p.doc.anchors[stry(anchor)] = n
  98. }
  99. }
  100. func (p *parser) parse() *node {
  101. switch p.event._type {
  102. case C.YAML_SCALAR_EVENT:
  103. return p.scalar()
  104. case C.YAML_ALIAS_EVENT:
  105. return p.alias()
  106. case C.YAML_MAPPING_START_EVENT:
  107. return p.mapping()
  108. case C.YAML_SEQUENCE_START_EVENT:
  109. return p.sequence()
  110. case C.YAML_DOCUMENT_START_EVENT:
  111. return p.document()
  112. case C.YAML_STREAM_END_EVENT:
  113. // Happens when attempting to decode an empty buffer.
  114. return nil
  115. default:
  116. panic("Attempted to parse unknown event: " +
  117. strconv.Itoa(int(p.event._type)))
  118. }
  119. panic("Unreachable")
  120. }
  121. func (p *parser) node(kind int) *node {
  122. return &node{kind: kind,
  123. line: int(C.int(p.event.start_mark.line)),
  124. column: int(C.int(p.event.start_mark.column))}
  125. }
  126. func (p *parser) document() *node {
  127. n := p.node(documentNode)
  128. n.anchors = make(map[string]*node)
  129. p.doc = n
  130. p.skip()
  131. n.children = append(n.children, p.parse())
  132. if p.event._type != C.YAML_DOCUMENT_END_EVENT {
  133. panic("Expected end of document event but got " +
  134. strconv.Itoa(int(p.event._type)))
  135. }
  136. p.skip()
  137. return n
  138. }
  139. func (p *parser) alias() *node {
  140. alias := C.event_alias(&p.event)
  141. n := p.node(aliasNode)
  142. n.value = stry(alias.anchor)
  143. p.skip()
  144. return n
  145. }
  146. func (p *parser) scalar() *node {
  147. scalar := C.event_scalar(&p.event)
  148. n := p.node(scalarNode)
  149. n.value = stry(scalar.value)
  150. n.tag = stry(scalar.tag)
  151. n.implicit = (scalar.plain_implicit != 0)
  152. p.anchor(n, scalar.anchor)
  153. p.skip()
  154. return n
  155. }
  156. func (p *parser) sequence() *node {
  157. n := p.node(sequenceNode)
  158. p.anchor(n, C.event_sequence_start(&p.event).anchor)
  159. p.skip()
  160. for p.event._type != C.YAML_SEQUENCE_END_EVENT {
  161. n.children = append(n.children, p.parse())
  162. }
  163. p.skip()
  164. return n
  165. }
  166. func (p *parser) mapping() *node {
  167. n := p.node(mappingNode)
  168. p.anchor(n, C.event_mapping_start(&p.event).anchor)
  169. p.skip()
  170. for p.event._type != C.YAML_MAPPING_END_EVENT {
  171. n.children = append(n.children, p.parse(), p.parse())
  172. }
  173. p.skip()
  174. return n
  175. }
  176. // ----------------------------------------------------------------------------
  177. // Decoder, unmarshals a node into a provided value.
  178. type decoder struct {
  179. doc *node
  180. aliases map[string]bool
  181. }
  182. func newDecoder() *decoder {
  183. d := &decoder{}
  184. d.aliases = make(map[string]bool)
  185. return d
  186. }
  187. // d.setter deals with setters and pointer dereferencing and initialization.
  188. //
  189. // It's a slightly convoluted case to handle properly:
  190. //
  191. // - nil pointers should be initialized, unless being set to nil
  192. // - we don't know at this point yet what's the value to SetYAML() with.
  193. // - we can't separate pointer deref/init and setter checking, because
  194. // a setter may be found while going down a pointer chain.
  195. //
  196. // Thus, here is how it takes care of it:
  197. //
  198. // - out is provided as a pointer, so that it can be replaced.
  199. // - when looking at a non-setter ptr, *out=ptr.Elem(), unless tag=!!null
  200. // - when a setter is found, *out=interface{}, and a set() function is
  201. // returned to call SetYAML() with the value of *out once it's defined.
  202. //
  203. func (d *decoder) setter(tag string, out *reflect.Value, good *bool) (set func()) {
  204. again := true
  205. for again {
  206. again = false
  207. setter, _ := (*out).Interface().(Setter)
  208. if tag != "!!null" || setter != nil {
  209. if pv := (*out); pv.Kind() == reflect.Ptr {
  210. if pv.IsNil() {
  211. *out = reflect.New(pv.Type().Elem()).Elem()
  212. pv.Set((*out).Addr())
  213. } else {
  214. *out = pv.Elem()
  215. }
  216. setter, _ = pv.Interface().(Setter)
  217. again = true
  218. }
  219. }
  220. if setter != nil {
  221. var arg interface{}
  222. *out = reflect.ValueOf(&arg).Elem()
  223. return func() {
  224. *good = setter.SetYAML(tag, arg)
  225. }
  226. }
  227. }
  228. return nil
  229. }
  230. func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
  231. switch n.kind {
  232. case documentNode:
  233. good = d.document(n, out)
  234. case scalarNode:
  235. good = d.scalar(n, out)
  236. case aliasNode:
  237. good = d.alias(n, out)
  238. case mappingNode:
  239. good = d.mapping(n, out)
  240. case sequenceNode:
  241. good = d.sequence(n, out)
  242. default:
  243. panic("Internal error: unknown node kind: " + strconv.Itoa(n.kind))
  244. }
  245. return
  246. }
  247. func (d *decoder) document(n *node, out reflect.Value) (good bool) {
  248. if len(n.children) == 1 {
  249. d.doc = n
  250. d.unmarshal(n.children[0], out)
  251. return true
  252. }
  253. return false
  254. }
  255. func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
  256. an, ok := d.doc.anchors[n.value]
  257. if !ok {
  258. panic("Unknown anchor '" + n.value + "' referenced")
  259. }
  260. if d.aliases[n.value] {
  261. panic("Anchor '" + n.value + "' value contains itself")
  262. }
  263. d.aliases[n.value] = true
  264. good = d.unmarshal(an, out)
  265. delete(d.aliases, n.value)
  266. return good
  267. }
  268. func (d *decoder) scalar(n *node, out reflect.Value) (good bool) {
  269. var tag string
  270. var resolved interface{}
  271. if n.tag == "" && !n.implicit {
  272. resolved = n.value
  273. } else {
  274. tag, resolved = resolve(n.tag, n.value)
  275. if set := d.setter(tag, &out, &good); set != nil {
  276. defer set()
  277. }
  278. }
  279. switch out.Kind() {
  280. case reflect.String:
  281. out.SetString(n.value)
  282. good = true
  283. case reflect.Interface:
  284. if resolved == nil {
  285. out.Set(reflect.Zero(out.Type()))
  286. } else {
  287. out.Set(reflect.ValueOf(resolved))
  288. }
  289. good = true
  290. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  291. switch resolved := resolved.(type) {
  292. case int:
  293. if !out.OverflowInt(int64(resolved)) {
  294. out.SetInt(int64(resolved))
  295. good = true
  296. }
  297. case int64:
  298. if !out.OverflowInt(resolved) {
  299. out.SetInt(resolved)
  300. good = true
  301. }
  302. }
  303. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  304. switch resolved := resolved.(type) {
  305. case int:
  306. if resolved >= 0 {
  307. out.SetUint(uint64(resolved))
  308. good = true
  309. }
  310. case int64:
  311. if resolved >= 0 {
  312. out.SetUint(uint64(resolved))
  313. good = true
  314. }
  315. }
  316. case reflect.Bool:
  317. switch resolved := resolved.(type) {
  318. case bool:
  319. out.SetBool(resolved)
  320. good = true
  321. }
  322. case reflect.Float32, reflect.Float64:
  323. switch resolved := resolved.(type) {
  324. case float64:
  325. out.SetFloat(resolved)
  326. good = true
  327. }
  328. case reflect.Ptr:
  329. switch resolved.(type) {
  330. case nil:
  331. out.Set(reflect.Zero(out.Type()))
  332. good = true
  333. }
  334. }
  335. return good
  336. }
  337. func settableValueOf(i interface{}) reflect.Value {
  338. v := reflect.ValueOf(i)
  339. sv := reflect.New(v.Type()).Elem()
  340. sv.Set(v)
  341. return sv
  342. }
  343. func (d *decoder) sequence(n *node, out reflect.Value) (good bool) {
  344. if set := d.setter("!!seq", &out, &good); set != nil {
  345. defer set()
  346. }
  347. var iface reflect.Value
  348. if out.Kind() == reflect.Interface {
  349. // No type hints. Will have to use a generic sequence.
  350. iface = out
  351. out = settableValueOf(make([]interface{}, 0))
  352. }
  353. if out.Kind() != reflect.Slice {
  354. return false
  355. }
  356. et := out.Type().Elem()
  357. l := len(n.children)
  358. for i := 0; i < l; i++ {
  359. e := reflect.New(et).Elem()
  360. if ok := d.unmarshal(n.children[i], e); ok {
  361. out.Set(reflect.Append(out, e))
  362. }
  363. }
  364. if iface.IsValid() {
  365. iface.Set(out)
  366. }
  367. return true
  368. }
  369. func (d *decoder) mapping(n *node, out reflect.Value) (good bool) {
  370. if set := d.setter("!!map", &out, &good); set != nil {
  371. defer set()
  372. }
  373. if out.Kind() == reflect.Struct {
  374. return d.mappingStruct(n, out)
  375. }
  376. if out.Kind() == reflect.Interface {
  377. // No type hints. Will have to use a generic map.
  378. iface := out
  379. out = settableValueOf(make(map[interface{}]interface{}))
  380. iface.Set(out)
  381. }
  382. if out.Kind() != reflect.Map {
  383. return false
  384. }
  385. outt := out.Type()
  386. kt := outt.Key()
  387. et := outt.Elem()
  388. if out.IsNil() {
  389. out.Set(reflect.MakeMap(outt))
  390. }
  391. l := len(n.children)
  392. for i := 0; i < l; i += 2 {
  393. k := reflect.New(kt).Elem()
  394. if d.unmarshal(n.children[i], k) {
  395. e := reflect.New(et).Elem()
  396. if d.unmarshal(n.children[i+1], e) {
  397. out.SetMapIndex(k, e)
  398. }
  399. }
  400. }
  401. return true
  402. }
  403. func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) {
  404. fields, err := getStructFields(out.Type())
  405. if err != nil {
  406. panic(err)
  407. }
  408. name := settableValueOf("")
  409. fieldsMap := fields.Map
  410. l := len(n.children)
  411. for i := 0; i < l; i += 2 {
  412. if !d.unmarshal(n.children[i], name) {
  413. continue
  414. }
  415. if info, ok := fieldsMap[name.String()]; ok {
  416. d.unmarshal(n.children[i+1], out.Field(info.Num))
  417. }
  418. }
  419. return true
  420. }