decode.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. // Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license found in the LICENSE file.
  3. package codec
  4. import (
  5. "io"
  6. "reflect"
  7. //"math"
  8. //"fmt"
  9. )
  10. // Some tagging information for error messages.
  11. var (
  12. msgTagDec = "codec.decoder"
  13. msgBadDesc = "Unrecognized descriptor byte"
  14. )
  15. type decodeNakedContext uint8
  16. const (
  17. dncHandled decodeNakedContext = iota
  18. dncNil
  19. dncExt
  20. dncContainer
  21. )
  22. // decReader abstracts the reading source, allowing implementations that can
  23. // read from an io.Reader or directly off a byte slice with zero-copying.
  24. type decReader interface {
  25. readn(n int) []byte
  26. readb([]byte)
  27. readn1() uint8
  28. readUint16() uint16
  29. readUint32() uint32
  30. readUint64() uint64
  31. }
  32. type decDriver interface {
  33. initReadNext()
  34. currentIsNil() bool
  35. decodeBuiltinType(rt reflect.Type, rv reflect.Value) bool
  36. //decodeNaked should completely handle extensions, builtins, primitives, etc.
  37. //Numbers are decoded as int64, uint64, float64 only (no smaller sized number types).
  38. decodeNaked(h decodeHandleI) (rv reflect.Value, ctx decodeNakedContext)
  39. decodeInt(bitsize uint8) (i int64)
  40. decodeUint(bitsize uint8) (ui uint64)
  41. decodeFloat(chkOverflow32 bool) (f float64)
  42. decodeBool() (b bool)
  43. // decodeString can also decode symbols
  44. decodeString() (s string)
  45. decodeBytes(bs []byte) (bsOut []byte, changed bool)
  46. decodeExt(tag byte) []byte
  47. readMapLen() int
  48. readArrayLen() int
  49. }
  50. // A Decoder reads and decodes an object from an input stream in the codec format.
  51. type Decoder struct {
  52. r decReader
  53. d decDriver
  54. h decodeHandleI
  55. }
  56. // ioDecReader is a decReader that reads off an io.Reader
  57. type ioDecReader struct {
  58. r io.Reader
  59. x [8]byte //temp byte array re-used internally for efficiency
  60. }
  61. // bytesDecReader is a decReader that reads off a byte slice with zero copying
  62. type bytesDecReader struct {
  63. b []byte // data
  64. c int // cursor
  65. a int // available
  66. }
  67. type decExtTagFn struct {
  68. fn func(reflect.Value, []byte) error
  69. tag byte
  70. }
  71. type decExtTypeTagFn struct {
  72. rt reflect.Type
  73. decExtTagFn
  74. }
  75. type decodeHandleI interface {
  76. getDecodeExt(rt reflect.Type) (tag byte, fn func(reflect.Value, []byte) error)
  77. errorIfNoField() bool
  78. }
  79. type decHandle struct {
  80. // put word-aligned fields first (before bools, etc)
  81. exts []decExtTypeTagFn
  82. extFuncs map[reflect.Type]decExtTagFn
  83. // if an extension for byte slice is defined, then always decode Raw as strings
  84. rawToStringOverride bool
  85. }
  86. type DecodeOptions struct {
  87. // An instance of MapType is used during schema-less decoding of a map in the stream.
  88. // If nil, we use map[interface{}]interface{}
  89. MapType reflect.Type
  90. // An instance of SliceType is used during schema-less decoding of an array in the stream.
  91. // If nil, we use []interface{}
  92. SliceType reflect.Type
  93. // ErrorIfNoField controls whether an error is returned when decoding a map
  94. // from a codec stream into a struct, and no matching struct field is found.
  95. ErrorIfNoField bool
  96. }
  97. func (o *DecodeOptions) errorIfNoField() bool {
  98. return o.ErrorIfNoField
  99. }
  100. // addDecodeExt registers a function to handle decoding into a given type when an
  101. // extension type and specific tag byte is detected in the codec stream.
  102. // To remove an extension, pass fn=nil.
  103. func (o *decHandle) addDecodeExt(rt reflect.Type, tag byte, fn func(reflect.Value, []byte) error) {
  104. if o.exts == nil {
  105. o.exts = make([]decExtTypeTagFn, 0, 2)
  106. o.extFuncs = make(map[reflect.Type]decExtTagFn, 2)
  107. }
  108. if _, ok := o.extFuncs[rt]; ok {
  109. delete(o.extFuncs, rt)
  110. if rt == byteSliceTyp {
  111. o.rawToStringOverride = false
  112. }
  113. }
  114. if fn != nil {
  115. o.extFuncs[rt] = decExtTagFn{fn, tag}
  116. if rt == byteSliceTyp {
  117. o.rawToStringOverride = true
  118. }
  119. }
  120. if leno := len(o.extFuncs); leno > cap(o.exts) {
  121. o.exts = make([]decExtTypeTagFn, leno, (leno * 3 / 2))
  122. } else {
  123. o.exts = o.exts[0:leno]
  124. }
  125. var i int
  126. for k, v := range o.extFuncs {
  127. o.exts[i] = decExtTypeTagFn{k, v}
  128. i++
  129. }
  130. }
  131. func (o *decHandle) getDecodeExtForTag(tag byte) (rt reflect.Type, fn func(reflect.Value, []byte) error) {
  132. for i, l := 0, len(o.exts); i < l; i++ {
  133. if o.exts[i].tag == tag {
  134. return o.exts[i].rt, o.exts[i].fn
  135. }
  136. }
  137. return
  138. }
  139. func (o *decHandle) getDecodeExt(rt reflect.Type) (tag byte, fn func(reflect.Value, []byte) error) {
  140. if l := len(o.exts); l == 0 {
  141. return
  142. } else if l < mapAccessThreshold {
  143. for i := 0; i < l; i++ {
  144. if o.exts[i].rt == rt {
  145. x := o.exts[i].decExtTagFn
  146. return x.tag, x.fn
  147. }
  148. }
  149. } else {
  150. x := o.extFuncs[rt]
  151. return x.tag, x.fn
  152. }
  153. return
  154. }
  155. // NewDecoder returns a Decoder for decoding a stream of bytes from an io.Reader.
  156. func NewDecoder(r io.Reader, h Handle) *Decoder {
  157. z := ioDecReader{
  158. r: r,
  159. }
  160. return &Decoder{r: &z, d: h.newDecDriver(&z), h: h}
  161. }
  162. // NewDecoderBytes returns a Decoder which efficiently decodes directly
  163. // from a byte slice with zero copying.
  164. func NewDecoderBytes(in []byte, h Handle) *Decoder {
  165. z := bytesDecReader{
  166. b: in,
  167. a: len(in),
  168. }
  169. return &Decoder{r: &z, d: h.newDecDriver(&z), h: h}
  170. }
  171. // Decode decodes the stream from reader and stores the result in the
  172. // value pointed to by v. v cannot be a nil pointer. v can also be
  173. // a reflect.Value of a pointer.
  174. //
  175. // Note that a pointer to a nil interface is not a nil pointer.
  176. // If you do not know what type of stream it is, pass in a pointer to a nil interface.
  177. // We will decode and store a value in that nil interface.
  178. //
  179. // Sample usages:
  180. // // Decoding into a non-nil typed value
  181. // var f float32
  182. // err = codec.NewDecoder(r, handle).Decode(&f)
  183. //
  184. // // Decoding into nil interface
  185. // var v interface{}
  186. // dec := codec.NewDecoder(r, handle)
  187. // err = dec.Decode(&v)
  188. //
  189. // There are some special rules when decoding into containers (slice/array/map/struct).
  190. // Decode will typically use the stream contents to UPDATE the container.
  191. // - This means that for a struct or map, we just update matching fields or keys.
  192. // - For a slice/array, we just update the first n elements, where n is length of the stream.
  193. // - However, if decoding into a nil map/slice and the length of the stream is 0,
  194. // we reset the destination map/slice to be a zero-length non-nil map/slice.
  195. // - Also, if the encoded value is Nil in the stream, then we try to set
  196. // the container to its "zero" value (e.g. nil for slice/map).
  197. //
  198. func (d *Decoder) Decode(v interface{}) (err error) {
  199. defer panicToErr(&err)
  200. d.decode(v)
  201. return
  202. }
  203. func (d *Decoder) decode(iv interface{}) {
  204. d.d.initReadNext()
  205. // Fast path included for various pointer types which cannot be registered as extensions
  206. switch v := iv.(type) {
  207. case nil:
  208. decErr("Cannot decode into nil.")
  209. case reflect.Value:
  210. d.chkPtrValue(v)
  211. d.decodeValue(v)
  212. case *string:
  213. *v = d.d.decodeString()
  214. case *bool:
  215. *v = d.d.decodeBool()
  216. case *int:
  217. *v = int(d.d.decodeInt(intBitsize))
  218. case *int8:
  219. *v = int8(d.d.decodeInt(8))
  220. case *int16:
  221. *v = int16(d.d.decodeInt(16))
  222. case *int32:
  223. *v = int32(d.d.decodeInt(32))
  224. case *int64:
  225. *v = int64(d.d.decodeInt(64))
  226. case *uint:
  227. *v = uint(d.d.decodeUint(uintBitsize))
  228. case *uint8:
  229. *v = uint8(d.d.decodeUint(8))
  230. case *uint16:
  231. *v = uint16(d.d.decodeUint(16))
  232. case *uint32:
  233. *v = uint32(d.d.decodeUint(32))
  234. case *uint64:
  235. *v = uint64(d.d.decodeUint(64))
  236. case *float32:
  237. *v = float32(d.d.decodeFloat(true))
  238. case *float64:
  239. *v = d.d.decodeFloat(false)
  240. case *interface{}:
  241. d.decodeValue(reflect.ValueOf(iv).Elem())
  242. default:
  243. rv := reflect.ValueOf(iv)
  244. d.chkPtrValue(rv)
  245. d.decodeValue(rv)
  246. }
  247. }
  248. func (d *Decoder) decodeValue(rv reflect.Value) {
  249. // Note: if stream is set to nil, we set the corresponding value to its "zero" value
  250. // var ctr int (define this above the function if trying to do this run)
  251. // ctr++
  252. // log(".. [%v] enter decode: rv: %v <==> %T <==> %v", ctr, rv, rv.Interface(), rv.Interface())
  253. // defer func(ctr2 int) {
  254. // log(".... [%v] exit decode: rv: %v <==> %T <==> %v", ctr2, rv, rv.Interface(), rv.Interface())
  255. // }(ctr)
  256. dd := d.d //so we don't dereference constantly
  257. dd.initReadNext()
  258. rvOrig := rv
  259. wasNilIntf := rv.Kind() == reflect.Interface && rv.IsNil()
  260. rt := rv.Type()
  261. var ndesc decodeNakedContext
  262. //if nil interface, use some hieristics to set the nil interface to an
  263. //appropriate value based on the first byte read (byte descriptor bd)
  264. if wasNilIntf {
  265. if dd.currentIsNil() {
  266. return
  267. }
  268. //Prevent from decoding into e.g. error, io.Reader, etc if it's nil and non-nil value in stream.
  269. //We can only decode into interface{} (0 methods). Else reflect.Set fails later.
  270. if num := rt.NumMethod(); num > 0 {
  271. decErr("decodeValue: Cannot decode non-nil codec value into nil %v (%v methods)", rt, num)
  272. } else {
  273. rv, ndesc = dd.decodeNaked(d.h)
  274. if ndesc == dncHandled {
  275. rvOrig.Set(rv)
  276. return
  277. }
  278. rt = rv.Type()
  279. }
  280. } else if dd.currentIsNil() {
  281. // Note: if stream is set to nil, we set the dereferenced value to its "zero" value (if settable).
  282. for rv.Kind() == reflect.Ptr {
  283. rv = rv.Elem()
  284. }
  285. if rv.CanSet() {
  286. rv.Set(reflect.Zero(rv.Type()))
  287. }
  288. return
  289. }
  290. // An extension can be registered for any type, regardless of the Kind
  291. // (e.g. type BitSet int64, type MyStruct { / * unexported fields * / }, type X []int, etc.
  292. //
  293. // We can't check if it's an extension byte here first, because the user may have
  294. // registered a pointer or non-pointer type, meaning we may have to recurse first
  295. // before matching a mapped type, even though the extension byte is already detected.
  296. //
  297. // If we are checking for builtin or ext type here, it means we didn't go through decodeNaked,
  298. // Because decodeNaked would have handled it. It also means wasNilIntf = false.
  299. if dd.decodeBuiltinType(rt, rv) {
  300. return
  301. }
  302. if bfnTag, bfnFn := d.h.getDecodeExt(rt); bfnFn != nil {
  303. xbs := dd.decodeExt(bfnTag)
  304. if fnerr := bfnFn(rv, xbs); fnerr != nil {
  305. panic(fnerr)
  306. }
  307. return
  308. }
  309. // NOTE: if decoding into a nil interface{}, we return a non-nil
  310. // value except even if the container registers a length of 0.
  311. //
  312. // NOTE: Do not make blocks for struct, slice, map, etc individual methods.
  313. // It ends up being more expensive, because they recursively calls decodeValue
  314. //
  315. // (Mar 7, 2013. DON'T REARRANGE ... code clarity)
  316. // tried arranging in sequence of most probable ones.
  317. // string, bool, integer, float, struct, ptr, slice, array, map, interface, uint.
  318. switch rk := rv.Kind(); rk {
  319. case reflect.String:
  320. rv.SetString(dd.decodeString())
  321. case reflect.Bool:
  322. rv.SetBool(dd.decodeBool())
  323. case reflect.Int:
  324. rv.SetInt(dd.decodeInt(intBitsize))
  325. case reflect.Int64:
  326. rv.SetInt(dd.decodeInt(64))
  327. case reflect.Int32:
  328. rv.SetInt(dd.decodeInt(32))
  329. case reflect.Int8:
  330. rv.SetInt(dd.decodeInt(8))
  331. case reflect.Int16:
  332. rv.SetInt(dd.decodeInt(16))
  333. case reflect.Float32:
  334. rv.SetFloat(dd.decodeFloat(true))
  335. case reflect.Float64:
  336. rv.SetFloat(dd.decodeFloat(false))
  337. case reflect.Uint8:
  338. rv.SetUint(dd.decodeUint(8))
  339. case reflect.Uint64:
  340. rv.SetUint(dd.decodeUint(64))
  341. case reflect.Uint:
  342. rv.SetUint(dd.decodeUint(uintBitsize))
  343. case reflect.Uint32:
  344. rv.SetUint(dd.decodeUint(32))
  345. case reflect.Uint16:
  346. rv.SetUint(dd.decodeUint(16))
  347. case reflect.Ptr:
  348. if rv.IsNil() {
  349. if wasNilIntf {
  350. rv = reflect.New(rt.Elem())
  351. } else {
  352. rv.Set(reflect.New(rt.Elem()))
  353. }
  354. }
  355. d.decodeValue(rv.Elem())
  356. case reflect.Interface:
  357. d.decodeValue(rv.Elem())
  358. case reflect.Struct:
  359. containerLen := dd.readMapLen()
  360. if containerLen == 0 {
  361. break
  362. }
  363. sfi := getStructFieldInfos(rt)
  364. for j := 0; j < containerLen; j++ {
  365. // var rvkencname string
  366. // ddecode(&rvkencname)
  367. dd.initReadNext()
  368. rvkencname := dd.decodeString()
  369. // rvksi := sfi.getForEncName(rvkencname)
  370. if k := sfi.indexForEncName(rvkencname); k > -1 {
  371. sfik := sfi[k]
  372. if sfik.i > -1 {
  373. d.decodeValue(rv.Field(int(sfik.i)))
  374. } else {
  375. d.decodeValue(rv.FieldByIndex(sfik.is))
  376. }
  377. // d.decodeValue(sfi.field(k, rv))
  378. } else {
  379. if d.h.errorIfNoField() {
  380. decErr("No matching struct field found when decoding stream map with key: %v", rvkencname)
  381. } else {
  382. var nilintf0 interface{}
  383. d.decodeValue(reflect.ValueOf(&nilintf0).Elem())
  384. }
  385. }
  386. }
  387. case reflect.Slice:
  388. // Be more careful calling Set() here, because a reflect.Value from an array
  389. // may have come in here (which may not be settable).
  390. // In places where the slice got from an array could be, we should guard with CanSet() calls.
  391. if rt == byteSliceTyp { // rawbytes
  392. if bs2, changed2 := dd.decodeBytes(rv.Bytes()); changed2 {
  393. rv.SetBytes(bs2)
  394. }
  395. if wasNilIntf && rv.IsNil() {
  396. rv.SetBytes([]byte{})
  397. }
  398. break
  399. }
  400. containerLen := dd.readArrayLen()
  401. if wasNilIntf {
  402. rv = reflect.MakeSlice(rt, containerLen, containerLen)
  403. }
  404. if containerLen == 0 {
  405. if rv.IsNil() {
  406. rv.Set(reflect.MakeSlice(rt, containerLen, containerLen))
  407. }
  408. break
  409. }
  410. if rv.IsNil() {
  411. // wasNilIntf only applies if rv is nil (since that's what we did earlier)
  412. rv.Set(reflect.MakeSlice(rt, containerLen, containerLen))
  413. } else {
  414. // if we need to reset rv but it cannot be set, we should err out.
  415. // for example, if slice is got from unaddressable array, CanSet = false
  416. if rvcap, rvlen := rv.Len(), rv.Cap(); containerLen > rvcap {
  417. if rv.CanSet() {
  418. rvn := reflect.MakeSlice(rt, containerLen, containerLen)
  419. if rvlen > 0 {
  420. reflect.Copy(rvn, rv)
  421. }
  422. rv.Set(rvn)
  423. } else {
  424. decErr("Cannot reset slice with less cap: %v that stream contents: %v", rvcap, containerLen)
  425. }
  426. } else if containerLen > rvlen {
  427. rv.SetLen(containerLen)
  428. }
  429. }
  430. for j := 0; j < containerLen; j++ {
  431. d.decodeValue(rv.Index(j))
  432. }
  433. case reflect.Array:
  434. d.decodeValue(rv.Slice(0, rv.Len()))
  435. case reflect.Map:
  436. containerLen := dd.readMapLen()
  437. if rv.IsNil() {
  438. rv.Set(reflect.MakeMap(rt))
  439. }
  440. if containerLen == 0 {
  441. break
  442. }
  443. ktype, vtype := rt.Key(), rt.Elem()
  444. for j := 0; j < containerLen; j++ {
  445. rvk := reflect.New(ktype).Elem()
  446. d.decodeValue(rvk)
  447. if ktype == intfTyp {
  448. rvk = rvk.Elem()
  449. if rvk.Type() == byteSliceTyp {
  450. rvk = reflect.ValueOf(string(rvk.Bytes()))
  451. }
  452. }
  453. rvv := rv.MapIndex(rvk)
  454. if !rvv.IsValid() {
  455. rvv = reflect.New(vtype).Elem()
  456. }
  457. d.decodeValue(rvv)
  458. rv.SetMapIndex(rvk, rvv)
  459. }
  460. default:
  461. decErr("Unhandled value for kind: %v: %s", rk, msgBadDesc)
  462. }
  463. if wasNilIntf {
  464. rvOrig.Set(rv)
  465. }
  466. return
  467. }
  468. func (d *Decoder) chkPtrValue(rv reflect.Value) {
  469. // We cannot marshal into a non-pointer or a nil pointer
  470. // (at least pass a nil interface so we can marshal into it)
  471. if rv.Kind() != reflect.Ptr || rv.IsNil() {
  472. var rvi interface{} = rv
  473. if rv.IsValid() && rv.CanInterface() {
  474. rvi = rv.Interface()
  475. }
  476. decErr("Decode: Expecting valid pointer to decode into. Got: %v, %T, %v",
  477. rv.Kind(), rvi, rvi)
  478. }
  479. }
  480. // ------------------------------------
  481. func (z *ioDecReader) readn(n int) (bs []byte) {
  482. bs = make([]byte, n)
  483. if _, err := io.ReadFull(z.r, bs); err != nil {
  484. panic(err)
  485. }
  486. return
  487. }
  488. func (z *ioDecReader) readb(bs []byte) {
  489. if _, err := io.ReadFull(z.r, bs); err != nil {
  490. panic(err)
  491. }
  492. }
  493. func (z *ioDecReader) readn1() uint8 {
  494. z.readb(z.x[:1])
  495. return z.x[0]
  496. }
  497. func (z *ioDecReader) readUint16() uint16 {
  498. z.readb(z.x[:2])
  499. return bigen.Uint16(z.x[:2])
  500. }
  501. func (z *ioDecReader) readUint32() uint32 {
  502. z.readb(z.x[:4])
  503. return bigen.Uint32(z.x[:4])
  504. }
  505. func (z *ioDecReader) readUint64() uint64 {
  506. z.readb(z.x[:8])
  507. return bigen.Uint64(z.x[:8])
  508. }
  509. // ------------------------------------
  510. func (z *bytesDecReader) consume(n int) (oldcursor int) {
  511. if z.a == 0 {
  512. panic(io.EOF)
  513. }
  514. if n > z.a {
  515. doPanic(msgTagDec, "Trying to read %v bytes. Only %v available", n, z.a)
  516. }
  517. // z.checkAvailable(n)
  518. oldcursor = z.c
  519. z.c = oldcursor + n
  520. z.a = z.a - n
  521. return
  522. }
  523. func (z *bytesDecReader) readn(n int) (bs []byte) {
  524. c0 := z.consume(n)
  525. bs = z.b[c0:z.c]
  526. return
  527. }
  528. func (z *bytesDecReader) readb(bs []byte) {
  529. copy(bs, z.readn(len(bs)))
  530. }
  531. func (z *bytesDecReader) readn1() uint8 {
  532. c0 := z.consume(1)
  533. return z.b[c0]
  534. }
  535. // Use binaryEncoding helper for 4 and 8 bits, but inline it for 2 bits
  536. // creating temp slice variable and copying it to helper function is expensive
  537. // for just 2 bits.
  538. func (z *bytesDecReader) readUint16() uint16 {
  539. c0 := z.consume(2)
  540. return uint16(z.b[c0+1]) | uint16(z.b[c0])<<8
  541. }
  542. func (z *bytesDecReader) readUint32() uint32 {
  543. c0 := z.consume(4)
  544. return bigen.Uint32(z.b[c0:z.c])
  545. }
  546. func (z *bytesDecReader) readUint64() uint64 {
  547. c0 := z.consume(8)
  548. return bigen.Uint64(z.b[c0:z.c])
  549. }
  550. // ----------------------------------------
  551. func decErr(format string, params ...interface{}) {
  552. doPanic(msgTagDec, format, params...)
  553. }