Gustavo Niemeyer 15 лет назад
Родитель
Сommit
41168bb7ed
7 измененных файлов с 395 добавлено и 11 удалено
  1. 4 2
      Makefile
  2. 172 0
      decode.go
  3. 4 0
      goyaml.go
  4. 70 5
      goyaml_test.go
  5. 8 0
      helpers.c
  6. 0 4
      parser.go
  7. 137 0
      resolve.go

+ 4 - 2
Makefile

@@ -7,11 +7,13 @@ TARG=goyaml
 
 GOFILES=\
 	goyaml.go\
+	resolve.go\
 
 CGOFILES=\
-	parser.go\
+	decode.go\
 
 CGO_LDFLAGS+=-lm -lpthread
+CGO_CFLAGS+=-I$(YAML)/include
 CGO_OFILES+=_lib/*.o
 
 
@@ -28,4 +30,4 @@ CLEANFILES=_lib
 
 include $(GOROOT)/src/Make.pkg
 
-#_cgo_defun.c: helpers.c
+_cgo_defun.c: helpers.c

+ 172 - 0
decode.go

@@ -0,0 +1,172 @@
+package goyaml
+
+/* #include "helpers.c" */
+import "C"
+
+import (
+    "unsafe"
+    "reflect"
+    "strconv"
+)
+
+
+type decoder struct {
+    parser *C.yaml_parser_t
+    event *C.yaml_event_t
+}
+
+func newDecoder(b []byte) *decoder {
+    if len(b) == 0 {
+        panic("Can't handle empty buffers yet") // XXX Fix this.
+    }
+
+    d := decoder{}
+    d.event = &C.yaml_event_t{}
+    d.parser = &C.yaml_parser_t{}
+    C.yaml_parser_initialize(d.parser)
+
+    // How unsafe is this really?  Will this break if the GC becomes compacting?
+    // Probably not, otherwise that would likely break &parse below as well.
+    input := (*C.uchar)(unsafe.Pointer(&b[0]))
+    C.yaml_parser_set_input_string(d.parser, input, (C.size_t)(len(b)))
+
+    d.next()
+    if d.event._type != C.YAML_STREAM_START_EVENT {
+        panic("Expected stream start event, got " +
+              strconv.Itoa(int(d.event._type)))
+    }
+    d.next()
+    return &d
+}
+
+func (d *decoder) destroy() {
+    if d.event._type != C.YAML_NO_EVENT {
+        C.yaml_event_delete(d.event)
+    }
+    C.yaml_parser_delete(d.parser)
+}
+
+func (d *decoder) next() {
+    if d.event._type != C.YAML_NO_EVENT {
+        if d.event._type == C.YAML_STREAM_END_EVENT {
+            panic("Attempted to go past the end of stream. Corrupted value?")
+        }
+        C.yaml_event_delete(d.event)
+    }
+    if C.yaml_parser_parse(d.parser, d.event) == 0 {
+        panic("Parsing failed.") // XXX Need better error handling here.
+    }
+}
+
+func (d *decoder) skip(_type C.yaml_event_type_t) {
+    for d.event._type != _type {
+        d.next()
+    }
+    d.next()
+}
+
+func (d *decoder) unmarshal(out reflect.Value) bool {
+    switch d.event._type {
+    case C.YAML_SCALAR_EVENT:
+        return d.scalar(out)
+    case C.YAML_MAPPING_START_EVENT:
+        return d.mapping(out)
+    case C.YAML_SEQUENCE_START_EVENT:
+        return d.sequence(out)
+    case C.YAML_DOCUMENT_START_EVENT:
+        return d.document(out)
+    default:
+        panic("Attempted to unmarshal unexpected event: " +
+              strconv.Itoa(int(d.event._type)))
+    }
+    return true
+}
+
+func (d *decoder) document(out reflect.Value) bool {
+    d.next()
+    result := d.unmarshal(out)
+    if d.event._type != C.YAML_DOCUMENT_END_EVENT {
+        panic("Expected end of document event but got " +
+              strconv.Itoa(int(d.event._type)))
+    }
+    d.next()
+    return result
+}
+
+func (d *decoder) scalar(out reflect.Value) (ok bool) {
+    scalar := C.event_scalar(d.event)
+    str := GoYString(scalar.value)
+    resolved, _ := resolve(str)
+    switch out := out.(type) {
+    case *reflect.StringValue:
+        out.Set(str)
+        ok = true
+    case *reflect.InterfaceValue:
+        out.Set(reflect.NewValue(resolved))
+        ok = true
+    case *reflect.IntValue:
+        switch resolved := resolved.(type) {
+        case int:
+            out.Set(int64(resolved))
+            ok = true
+        case int64:
+            out.Set(resolved)
+            // ok = true // XXX TEST ME
+        }
+    default:
+        panic("Can't handle scalar type yet: " + out.Type().String())
+    }
+    d.next()
+    return ok
+}
+
+func (d *decoder) sequence(out reflect.Value) bool {
+    sv, ok := out.(*reflect.SliceValue)
+    if !ok {
+        d.skip(C.YAML_SEQUENCE_END_EVENT)
+        return false
+    }
+    st := sv.Type().(*reflect.SliceType)
+    et := st.Elem()
+
+    d.next()
+    for d.event._type != C.YAML_SEQUENCE_END_EVENT {
+        e := reflect.MakeZero(et)
+        if ok := d.unmarshal(e); ok {
+            sv.SetValue(reflect.Append(sv, e))
+        }
+    }
+    d.next()
+    return true
+}
+
+func (d *decoder) mapping(out reflect.Value) bool {
+    //if iface, ok := out.(*reflect.InterfaceValue); ok {
+
+    // XXX What if it's an interface{}?
+    mv, ok := out.(*reflect.MapValue)
+    if !ok {
+        d.skip(C.YAML_MAPPING_END_EVENT)
+        return false
+    }
+    mt := mv.Type().(*reflect.MapType)
+    kt := mt.Key()
+    et := mt.Elem()
+
+    d.next()
+    for d.event._type != C.YAML_MAPPING_END_EVENT {
+        k := reflect.MakeZero(kt)
+        kok := d.unmarshal(k)
+        e := reflect.MakeZero(et)
+        eok := d.unmarshal(e)
+        if kok && eok {
+            mv.SetElem(k, e)
+        }
+    }
+    d.next()
+    return false
+}
+
+func GoYString(s *C.yaml_char_t) string {
+    return C.GoString((*C.char)(unsafe.Pointer(s)))
+}

+ 4 - 0
goyaml.go

@@ -1,10 +1,14 @@
 package goyaml
 
 import (
+    "reflect"
     "os"
 )
 
 
 func Unmarshal(in []byte, out interface{}) os.Error {
+    d := newDecoder(in)
+    defer d.destroy()
+    d.unmarshal(reflect.NewValue(out))
     return nil
 }

+ 70 - 5
goyaml_test.go

@@ -5,17 +5,82 @@ import (
     . "gocheck"
     "testing"
     "goyaml"
+    "reflect"
+    "math"
 )
 
 func Test(t *testing.T) { TestingT(t) }
 
 type S struct{}
 
+var _ = Suite(&S{})
+
+type testItem struct {
+    data string
+    value interface{}
+}
+
+
+var twoWayTests = []testItem{
+    // It will encode either value as a string if asked for.
+    {"hello: world", map[string]string{"hello": "world"}},
+    {"hello: true", map[string]string{"hello": "true"}},
+
+    // And when given the option, will preserve the YAML type.
+    {"hello: world", map[string]interface{}{"hello": "world"}},
+    {"hello: true", map[string]interface{}{"hello": true}},
+    {"hello: 10", map[string]interface{}{"hello": 10}},
+    {"hello: 0b10", map[string]interface{}{"hello": 2}},
+    {"hello: 0xA", map[string]interface{}{"hello": 10}},
+    {"hello: 4294967296", map[string]interface{}{"hello": int64(4294967296)}},
+    {"hello: 0.1", map[string]interface{}{"hello": 0.1}},
+    {"hello: .1", map[string]interface{}{"hello": 0.1}},
+    {"hello: .Inf", map[string]interface{}{"hello": math.Inf(+1)}},
+    {"hello: -.Inf", map[string]interface{}{"hello": math.Inf(-1)}},
+    {"hello: -10", map[string]interface{}{"hello": -10}},
+    {"hello: -.1", map[string]interface{}{"hello": -0.1}},
+
+    // Floats from spec
+    {"canonical: 6.8523e+5", map[string]interface{}{"canonical": 6.8523e+5}},
+    {"expo: 685.230_15e+03", map[string]interface{}{"expo": 685.23015e+03}},
+    {"fixed: 685_230.15", map[string]interface{}{"fixed": 685230.15}},
+    //{"sexa: 190:20:30.15", map[string]interface{}{"sexa": 0}}, // Unsupported
+    {"neginf: -.inf", map[string]interface{}{"neginf": math.Inf(-1)}},
+    {"notanum: .NaN", map[string]interface{}{"notanum": math.NaN}},
+
+    // Bools from spec
+    {"canonical: y", map[string]interface{}{"canonical": true}},
+    {"answer: NO", map[string]interface{}{"answer": false}},
+    {"logical: True", map[string]interface{}{"logical": true}},
+    {"option: on", map[string]interface{}{"option": true}},
+
+    // Ints from spec
+    {"canonical: 685230", map[string]interface{}{"canonical": 685230}},
+    {"decimal: +685_230", map[string]interface{}{"decimal": 685230}},
+    {"octal: 02472256", map[string]interface{}{"octal": 685230}},
+    {"hexa: 0x_0A_74_AE", map[string]interface{}{"hexa": 685230}},
+    {"bin: 0b1010_0111_0100_1010_1110", map[string]interface{}{"bin": 685230}},
+    //{"sexa: 190:20:30", map[string]interface{}{"sexa": 0}}, // Unsupported
+
+    // Sequence
+    {"seq: [A,B,C]", map[string][]string{"seq": []string{"A", "B", "C"}}},
+    {"seq: [A,1,C]", map[string][]string{"seq": []string{"A", "1", "C"}}},
+    {"seq: [A,1,C]", map[string][]int{"seq": []int{1}}},
+}
+
 
 func (s *S) TestHelloWorld(c *C) {
-    data := []byte("hello: world")
-    value := map[string]string{}
-    err := goyaml.Unmarshal(data, value)
-    c.Assert(err, IsNil)
-    c.Assert(value["hello"], Equals, "world")
+    for _, item := range twoWayTests {
+        t := reflect.NewValue(item.value).Type()
+        var value interface{}
+        if t, ok := t.(*reflect.MapType); ok {
+            value = reflect.MakeMap(t).Interface()
+        } else {
+            zero := reflect.MakeZero(reflect.NewValue(item.value).Type())
+            value = zero.Interface()
+        }
+        err := goyaml.Unmarshal([]byte(item.data), value)
+        c.Assert(err, IsNil)
+        c.Assert(value, Equals, item.value)
+    }
 }

+ 8 - 0
helpers.c

@@ -0,0 +1,8 @@
+#include <yaml.h>
+
+
+__typeof__(((yaml_event_t *)0)->data.scalar) * // Sadness.
+event_scalar(yaml_event_t *event)
+{
+	return &event->data.scalar;
+}

+ 0 - 4
parser.go

@@ -1,4 +0,0 @@
-package goyaml
-
-/* */
-import "C"

+ 137 - 0
resolve.go

@@ -0,0 +1,137 @@
+package goyaml
+
+import (
+    "strconv"
+    "strings"
+    "math"
+)
+
+
+// TODO: Support merge, timestamps, and base 60 floats.
+
+
+type stdTag int
+
+var StrTag = stdTag(1)
+var BoolTag = stdTag(2)
+var IntTag = stdTag(3)
+var FloatTag = stdTag(4)
+
+func (t stdTag) String() string {
+    switch t {
+    case StrTag:
+        return "tag:yaml.org,2002:str"
+    case BoolTag:
+        return "tag:yaml.org,2002:bool"
+    case IntTag:
+        return "tag:yaml.org,2002:int"
+    case FloatTag:
+        return "tag:yaml.org,2002:float"
+    default:
+        panic("Internal error: missing tag case")
+    }
+    return ""
+}
+
+
+type resolveMapItem struct {
+    value interface{}
+    tag stdTag
+}
+
+var resolveTable = make([]byte, 256)
+var resolveMap = make(map[string]resolveMapItem)
+
+
+func init() {
+    t := resolveTable
+    t[int('+')] = 'S' // Sign
+    t[int('-')] = 'S'
+    for _, c := range "0123456789" {
+        t[int(c)] = 'D' // Digit
+    }
+    for _, c := range "yYnNtTfFoO" {
+        t[int(c)] = 'M' // In map
+    }
+    t[int('.')] = '.' // Float (potentially in map)
+    t[int('<')] = '<' // Merge
+
+    var resolveMapList = []struct{v interface{}; tag stdTag; l []string} {
+        {true, BoolTag, []string{"y", "Y", "yes", "Yes", "YES"}},
+        {true, BoolTag, []string{"true", "True", "TRUE"}},
+        {true, BoolTag, []string{"on", "On", "ON"}},
+        {false, BoolTag, []string{"n", "N", "no", "No", "NO"}},
+        {false, BoolTag, []string{"false", "False", "FALSE"}},
+        {false, BoolTag, []string{"off", "Off", "OFF"}},
+        {math.NaN, FloatTag, []string{".nan", ".NaN", ".NAN"}},
+        {math.Inf(+1), FloatTag, []string{".inf", ".Inf", ".INF"}},
+        {math.Inf(+1), FloatTag, []string{"+.inf", "+.Inf", "+.INF"}},
+        {math.Inf(-1), FloatTag, []string{"-.inf", "-.Inf", "-.INF"}},
+    }
+
+    m := resolveMap
+    for _, item := range resolveMapList {
+        for _, s := range item.l {
+            m[s] = resolveMapItem{item.v, item.tag}
+        }
+    }
+}
+
+func resolve(in string) (out interface{}, tag stdTag) {
+    if in == "" {
+        return in, tag
+    }
+    c := resolveTable[in[0]]
+    if c == 0 {
+        // It's a string for sure. Nothing to do.
+        return in, StrTag
+    }
+
+    // Handle things we can lookup in a map.
+    if item, ok := resolveMap[in]; ok {
+        return item.value, item.tag
+    }
+
+    switch c {
+    case 'M':
+        // We've already checked the map above.
+
+    case '.':
+        // Not in the map, so maybe a normal float.
+        floatv, err := strconv.Atof(in)
+        if err == nil {
+            return floatv, FloatTag
+        }
+        // XXX Handle base 60 floats here.
+
+    case 'D', 'S':
+        // Int, float, or timestamp.
+        for i := 0; i != len(in); i++ {
+            if in[i] == '_' {
+                in = strings.Replace(in, "_", "", -1)
+                break
+            }
+        }
+        intv, err := strconv.Btoi64(in, 0)
+        if err == nil {
+            if intv == int64(int(intv)) {
+                return int(intv), IntTag
+            } else {
+                return intv, IntTag
+            }
+        }
+        floatv, err := strconv.Atof(in)
+        if err == nil {
+            return floatv, FloatTag
+        }
+        // XXX Handle timestamps here.
+
+    case '<':
+        // XXX Handle merge (<<) here.
+
+    default:
+        panic("resolveTable item not yet handled: " +
+              string([]byte{c}) + " (with " + in +")")
+    }
+    return in, StrTag
+}