encode.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. // Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license found in the LICENSE file.
  3. package codec
  4. import (
  5. //"bufio"
  6. "io"
  7. "reflect"
  8. //"fmt"
  9. )
  10. //var _ = fmt.Printf
  11. const (
  12. // Some tagging information for error messages.
  13. msgTagEnc = "codec.encoder"
  14. defEncByteBufSize = 1 << 6 // 4:16, 6:64, 8:256, 10:1024
  15. // maxTimeSecs32 = math.MaxInt32 / 60 / 24 / 366
  16. )
  17. // encWriter abstracting writing to a byte array or to an io.Writer.
  18. type encWriter interface {
  19. writeUint16(uint16)
  20. writeUint32(uint32)
  21. writeUint64(uint64)
  22. writeb([]byte)
  23. writestr(string)
  24. writen1(byte)
  25. writen2(byte, byte)
  26. atEndOfEncode()
  27. }
  28. type encDriver interface {
  29. encodeBuiltinType(rt uintptr, rv reflect.Value) bool
  30. encodeNil()
  31. encodeInt(i int64)
  32. encodeUint(i uint64)
  33. encodeBool(b bool)
  34. encodeFloat32(f float32)
  35. encodeFloat64(f float64)
  36. encodeExtPreamble(xtag byte, length int)
  37. encodeArrayPreamble(length int)
  38. encodeMapPreamble(length int)
  39. encodeString(c charEncoding, v string)
  40. encodeSymbol(v string)
  41. encodeStringBytes(c charEncoding, v []byte)
  42. //TODO
  43. //encBignum(f *big.Int)
  44. //encStringRunes(c charEncoding, v []rune)
  45. }
  46. type encodeHandleI interface {
  47. getEncodeExt(rt uintptr) (tag byte, fn func(reflect.Value) ([]byte, error))
  48. writeExt() bool
  49. structToArray() bool
  50. }
  51. // An Encoder writes an object to an output stream in the codec format.
  52. type Encoder struct {
  53. w encWriter
  54. e encDriver
  55. h encodeHandleI
  56. }
  57. type ioEncWriterWriter interface {
  58. WriteByte(c byte) error
  59. WriteString(s string) (n int, err error)
  60. Write(p []byte) (n int, err error)
  61. }
  62. type ioEncStringWriter interface {
  63. WriteString(s string) (n int, err error)
  64. }
  65. type simpleIoEncWriterWriter struct {
  66. w io.Writer
  67. bw io.ByteWriter
  68. sw ioEncStringWriter
  69. }
  70. // ioEncWriter implements encWriter and can write to an io.Writer implementation
  71. type ioEncWriter struct {
  72. w ioEncWriterWriter
  73. x [8]byte // temp byte array re-used internally for efficiency
  74. }
  75. // bytesEncWriter implements encWriter and can write to an byte slice.
  76. // It is used by Marshal function.
  77. type bytesEncWriter struct {
  78. b []byte
  79. c int // cursor
  80. out *[]byte // write out on atEndOfEncode
  81. }
  82. type encExtTagFn struct {
  83. fn func(reflect.Value) ([]byte, error)
  84. tag byte
  85. }
  86. type encExtTypeTagFn struct {
  87. rt uintptr
  88. encExtTagFn
  89. }
  90. // EncoderOptions contain options for the encoder, e.g. registered extension functions.
  91. type encHandle struct {
  92. extFuncs map[uintptr]encExtTagFn
  93. exts []encExtTypeTagFn
  94. }
  95. type EncodeOptions struct {
  96. // Encode a struct as an array, and not as a map.
  97. StructToArray bool
  98. }
  99. func (o *simpleIoEncWriterWriter) WriteByte(c byte) (err error) {
  100. if o.bw != nil {
  101. return o.bw.WriteByte(c)
  102. }
  103. _, err = o.w.Write([]byte{c})
  104. return
  105. }
  106. func (o *simpleIoEncWriterWriter) WriteString(s string) (n int, err error) {
  107. if o.sw != nil {
  108. return o.sw.WriteString(s)
  109. }
  110. return o.w.Write([]byte(s))
  111. }
  112. func (o *simpleIoEncWriterWriter) Write(p []byte) (n int, err error) {
  113. return o.w.Write(p)
  114. }
  115. // addEncodeExt registers a function to handle encoding a given type as an extension
  116. // with a specific specific tag byte.
  117. // To remove an extension, pass fn=nil.
  118. func (o *encHandle) addEncodeExt(rt uintptr, tag byte, fn func(reflect.Value) ([]byte, error)) {
  119. if o.exts == nil {
  120. o.exts = make([]encExtTypeTagFn, 0, 8)
  121. o.extFuncs = make(map[uintptr]encExtTagFn, 8)
  122. } else {
  123. if _, ok := o.extFuncs[rt]; ok {
  124. delete(o.extFuncs, rt)
  125. for i := 0; i < len(o.exts); i++ {
  126. if o.exts[i].rt == rt {
  127. o.exts = append(o.exts[:i], o.exts[i+1:]...)
  128. break
  129. }
  130. }
  131. }
  132. }
  133. if fn != nil {
  134. o.extFuncs[rt] = encExtTagFn{fn, tag}
  135. o.exts = append(o.exts, encExtTypeTagFn{rt, encExtTagFn{fn, tag}})
  136. }
  137. }
  138. func (o *encHandle) getEncodeExt(rt uintptr) (tag byte, fn func(reflect.Value) ([]byte, error)) {
  139. if l := len(o.exts); l == 0 {
  140. return
  141. } else if l < mapAccessThreshold {
  142. for i := 0; i < l; i++ {
  143. if o.exts[i].rt == rt {
  144. x := o.exts[i].encExtTagFn
  145. return x.tag, x.fn
  146. }
  147. }
  148. } else {
  149. x := o.extFuncs[rt]
  150. return x.tag, x.fn
  151. }
  152. return
  153. }
  154. func (o *EncodeOptions) structToArray() bool {
  155. return o.StructToArray
  156. }
  157. // NewEncoder returns an Encoder for encoding into an io.Writer.
  158. //
  159. // For efficiency, Users are encouraged to pass in a memory buffered writer
  160. // (eg bufio.Writer, bytes.Buffer).
  161. func NewEncoder(w io.Writer, h Handle) *Encoder {
  162. ww, ok := w.(ioEncWriterWriter)
  163. if !ok {
  164. sww := simpleIoEncWriterWriter{w: w}
  165. sww.bw, _ = w.(io.ByteWriter)
  166. sww.sw, _ = w.(ioEncStringWriter)
  167. ww = &sww
  168. //ww = bufio.NewWriterSize(w, defEncByteBufSize)
  169. }
  170. z := ioEncWriter{
  171. w: ww,
  172. }
  173. return &Encoder{w: &z, h: h, e: h.newEncDriver(&z)}
  174. }
  175. // NewEncoderBytes returns an encoder for encoding directly and efficiently
  176. // into a byte slice, using zero-copying to temporary slices.
  177. //
  178. // It will potentially replace the output byte slice pointed to.
  179. // After encoding, the out parameter contains the encoded contents.
  180. func NewEncoderBytes(out *[]byte, h Handle) *Encoder {
  181. in := *out
  182. if in == nil {
  183. in = make([]byte, defEncByteBufSize)
  184. }
  185. z := bytesEncWriter{
  186. b: in,
  187. out: out,
  188. }
  189. return &Encoder{w: &z, h: h, e: h.newEncDriver(&z)}
  190. }
  191. // Encode writes an object into a stream in the codec format.
  192. //
  193. // Struct values "usually" encode as maps. Each exported struct field is encoded unless:
  194. // - the field's tag is "-", OR
  195. // - the field is empty and its tag specifies the "omitempty" option.
  196. //
  197. // However, struct values may encode as arrays. This happens if:
  198. // - StructToArray Encode option is set, OR
  199. // - the tag on the _struct field sets the "toarray" option
  200. //
  201. // The empty values are false, 0, any nil pointer or interface value,
  202. // and any array, slice, map, or string of length zero.
  203. //
  204. // Anonymous fields are encoded inline if no struct tag is present.
  205. // Else they are encoded as regular fields.
  206. //
  207. // The object's default key string is the struct field name but can be
  208. // specified in the struct field's tag value.
  209. // The "codec" key in struct field's tag value is the key name,
  210. // followed by an optional comma and options.
  211. //
  212. // To set an option on all fields (e.g. omitempty on all fields), you
  213. // can create a field called _struct, and set flags on it.
  214. //
  215. // Examples:
  216. //
  217. // type MyStruct struct {
  218. // _struct bool `codec:",omitempty"` //set omitempty for every field
  219. // Field1 string `codec:"-"` //skip this field
  220. // Field2 int `codec:"myName"` //Use key "myName" in encode stream
  221. // Field3 int32 `codec:",omitempty"` //use key "Field3". Omit if empty.
  222. // Field4 bool `codec:"f4,omitempty"` //use key "f4". Omit if empty.
  223. // ...
  224. // }
  225. //
  226. // type MyStruct struct {
  227. // _struct bool `codec:",omitempty,toarray"` //set omitempty for every field
  228. // //and encode struct as an array
  229. // }
  230. //
  231. // Note:
  232. // - Encode will treat struct field names and keys in map[string]XXX as symbols.
  233. // Some formats support symbols (e.g. binc) and will properly encode the string
  234. // only once in the stream, and use a tag to refer to it thereafter.
  235. func (e *Encoder) Encode(v interface{}) (err error) {
  236. defer panicToErr(&err)
  237. e.encode(v)
  238. e.w.atEndOfEncode()
  239. return
  240. }
  241. func (e *Encoder) encode(iv interface{}) {
  242. switch v := iv.(type) {
  243. case nil:
  244. e.e.encodeNil()
  245. case reflect.Value:
  246. e.encodeValue(v)
  247. case string:
  248. e.e.encodeString(c_UTF8, v)
  249. case bool:
  250. e.e.encodeBool(v)
  251. case int:
  252. e.e.encodeInt(int64(v))
  253. case int8:
  254. e.e.encodeInt(int64(v))
  255. case int16:
  256. e.e.encodeInt(int64(v))
  257. case int32:
  258. e.e.encodeInt(int64(v))
  259. case int64:
  260. e.e.encodeInt(v)
  261. case uint:
  262. e.e.encodeUint(uint64(v))
  263. case uint8:
  264. e.e.encodeUint(uint64(v))
  265. case uint16:
  266. e.e.encodeUint(uint64(v))
  267. case uint32:
  268. e.e.encodeUint(uint64(v))
  269. case uint64:
  270. e.e.encodeUint(v)
  271. case float32:
  272. e.e.encodeFloat32(v)
  273. case float64:
  274. e.e.encodeFloat64(v)
  275. case *string:
  276. e.e.encodeString(c_UTF8, *v)
  277. case *bool:
  278. e.e.encodeBool(*v)
  279. case *int:
  280. e.e.encodeInt(int64(*v))
  281. case *int8:
  282. e.e.encodeInt(int64(*v))
  283. case *int16:
  284. e.e.encodeInt(int64(*v))
  285. case *int32:
  286. e.e.encodeInt(int64(*v))
  287. case *int64:
  288. e.e.encodeInt(*v)
  289. case *uint:
  290. e.e.encodeUint(uint64(*v))
  291. case *uint8:
  292. e.e.encodeUint(uint64(*v))
  293. case *uint16:
  294. e.e.encodeUint(uint64(*v))
  295. case *uint32:
  296. e.e.encodeUint(uint64(*v))
  297. case *uint64:
  298. e.e.encodeUint(*v)
  299. case *float32:
  300. e.e.encodeFloat32(*v)
  301. case *float64:
  302. e.e.encodeFloat64(*v)
  303. default:
  304. e.encodeValue(reflect.ValueOf(iv))
  305. }
  306. }
  307. func (e *Encoder) encodeValue(rv reflect.Value) {
  308. rt := rv.Type()
  309. rtid := reflect.ValueOf(rt).Pointer()
  310. //encode based on type first, since over-rides are based on type.
  311. ee := e.e //don't dereference everytime
  312. if ee.encodeBuiltinType(rtid, rv) {
  313. return
  314. }
  315. //Note: tagFn must handle returning nil if value should be encoded as a nil.
  316. if xfTag, xfFn := e.h.getEncodeExt(rtid); xfFn != nil {
  317. bs, fnerr := xfFn(rv)
  318. if fnerr != nil {
  319. panic(fnerr)
  320. }
  321. if bs == nil {
  322. ee.encodeNil()
  323. return
  324. }
  325. if e.h.writeExt() {
  326. ee.encodeExtPreamble(xfTag, len(bs))
  327. e.w.writeb(bs)
  328. } else {
  329. ee.encodeStringBytes(c_RAW, bs)
  330. }
  331. return
  332. }
  333. // TODO: Encode if type is an encoding.BinaryMarshaler: MarshalBinary() (data []byte, err error)
  334. // There is a cost, as we need to change the rv to an interface{} first.
  335. // ensure more common cases appear early in switch.
  336. rk := rv.Kind()
  337. switch rk {
  338. case reflect.Bool:
  339. ee.encodeBool(rv.Bool())
  340. case reflect.String:
  341. ee.encodeString(c_UTF8, rv.String())
  342. case reflect.Float64:
  343. ee.encodeFloat64(rv.Float())
  344. case reflect.Float32:
  345. ee.encodeFloat32(float32(rv.Float()))
  346. case reflect.Slice:
  347. if rv.IsNil() {
  348. ee.encodeNil()
  349. break
  350. }
  351. if rt == byteSliceTyp {
  352. ee.encodeStringBytes(c_RAW, rv.Bytes())
  353. break
  354. }
  355. l := rv.Len()
  356. ee.encodeArrayPreamble(l)
  357. if l == 0 {
  358. break
  359. }
  360. for j := 0; j < l; j++ {
  361. e.encodeValue(rv.Index(j))
  362. }
  363. case reflect.Array:
  364. e.encodeValue(rv.Slice(0, rv.Len()))
  365. case reflect.Map:
  366. if rv.IsNil() {
  367. ee.encodeNil()
  368. break
  369. }
  370. l := rv.Len()
  371. ee.encodeMapPreamble(l)
  372. if l == 0 {
  373. break
  374. }
  375. keyTypeIsString := rt.Key().Kind() == reflect.String
  376. mks := rv.MapKeys()
  377. // for j, lmks := 0, len(mks); j < lmks; j++ {
  378. for j := range mks {
  379. if keyTypeIsString {
  380. ee.encodeSymbol(mks[j].String())
  381. } else {
  382. e.encodeValue(mks[j])
  383. }
  384. e.encodeValue(rv.MapIndex(mks[j]))
  385. }
  386. case reflect.Struct:
  387. sis := getStructFieldInfos(rtid, rt)
  388. e.encStruct(sis, rv)
  389. case reflect.Ptr:
  390. if rv.IsNil() {
  391. ee.encodeNil()
  392. break
  393. }
  394. e.encodeValue(rv.Elem())
  395. case reflect.Interface:
  396. if rv.IsNil() {
  397. ee.encodeNil()
  398. break
  399. }
  400. e.encodeValue(rv.Elem())
  401. case reflect.Int, reflect.Int8, reflect.Int64, reflect.Int32, reflect.Int16:
  402. ee.encodeInt(rv.Int())
  403. case reflect.Uint8, reflect.Uint64, reflect.Uint, reflect.Uint32, reflect.Uint16:
  404. ee.encodeUint(rv.Uint())
  405. case reflect.Invalid:
  406. ee.encodeNil()
  407. default:
  408. encErr("Unsupported kind: %s, for: %#v", rk, rv)
  409. }
  410. return
  411. }
  412. func (e *Encoder) encStruct(sis structFieldInfos, rv reflect.Value) {
  413. newlen := len(sis.sis)
  414. rvals := make([]reflect.Value, newlen)
  415. var encnames []string
  416. toMap := !(sis.toArray || e.h.structToArray())
  417. if toMap {
  418. encnames = make([]string, newlen)
  419. }
  420. newlen = 0
  421. // var rv0 reflect.Value
  422. // for i := 0; i < l; i++ {
  423. // si := sis[i]
  424. for _, si := range sis.sis {
  425. if si.i != -1 {
  426. rvals[newlen] = rv.Field(int(si.i))
  427. } else {
  428. rvals[newlen] = rv.FieldByIndex(si.is)
  429. }
  430. if toMap {
  431. if si.omitEmpty && isEmptyValue(rvals[newlen]) {
  432. continue
  433. }
  434. encnames[newlen] = si.encName
  435. } else {
  436. if si.omitEmpty && isEmptyValue(rvals[newlen]) {
  437. rvals[newlen] = reflect.Value{}
  438. }
  439. }
  440. newlen++
  441. }
  442. if toMap {
  443. ee := e.e //don't dereference everytime
  444. ee.encodeMapPreamble(newlen)
  445. for j := 0; j < newlen; j++ {
  446. ee.encodeSymbol(encnames[j])
  447. e.encodeValue(rvals[j])
  448. }
  449. } else {
  450. e.e.encodeArrayPreamble(newlen)
  451. for j := 0; j < newlen; j++ {
  452. e.encodeValue(rvals[j])
  453. }
  454. }
  455. }
  456. // ----------------------------------------
  457. func (z *ioEncWriter) writeUint16(v uint16) {
  458. bigen.PutUint16(z.x[:2], v)
  459. z.writeb(z.x[:2])
  460. }
  461. func (z *ioEncWriter) writeUint32(v uint32) {
  462. bigen.PutUint32(z.x[:4], v)
  463. z.writeb(z.x[:4])
  464. }
  465. func (z *ioEncWriter) writeUint64(v uint64) {
  466. bigen.PutUint64(z.x[:8], v)
  467. z.writeb(z.x[:8])
  468. }
  469. func (z *ioEncWriter) writeb(bs []byte) {
  470. n, err := z.w.Write(bs)
  471. if err != nil {
  472. panic(err)
  473. }
  474. if n != len(bs) {
  475. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(bs), n)
  476. }
  477. }
  478. func (z *ioEncWriter) writestr(s string) {
  479. n, err := z.w.WriteString(s)
  480. if err != nil {
  481. panic(err)
  482. }
  483. if n != len(s) {
  484. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(s), n)
  485. }
  486. }
  487. func (z *ioEncWriter) writen1(b byte) {
  488. if err := z.w.WriteByte(b); err != nil {
  489. panic(err)
  490. }
  491. }
  492. func (z *ioEncWriter) writen2(b1 byte, b2 byte) {
  493. z.writen1(b1)
  494. z.writen1(b2)
  495. }
  496. func (z *ioEncWriter) atEndOfEncode() { }
  497. // ----------------------------------------
  498. func (z *bytesEncWriter) writeUint16(v uint16) {
  499. c := z.grow(2)
  500. z.b[c] = byte(v >> 8)
  501. z.b[c+1] = byte(v)
  502. }
  503. func (z *bytesEncWriter) writeUint32(v uint32) {
  504. c := z.grow(4)
  505. z.b[c] = byte(v >> 24)
  506. z.b[c+1] = byte(v >> 16)
  507. z.b[c+2] = byte(v >> 8)
  508. z.b[c+3] = byte(v)
  509. }
  510. func (z *bytesEncWriter) writeUint64(v uint64) {
  511. c := z.grow(8)
  512. z.b[c] = byte(v >> 56)
  513. z.b[c+1] = byte(v >> 48)
  514. z.b[c+2] = byte(v >> 40)
  515. z.b[c+3] = byte(v >> 32)
  516. z.b[c+4] = byte(v >> 24)
  517. z.b[c+5] = byte(v >> 16)
  518. z.b[c+6] = byte(v >> 8)
  519. z.b[c+7] = byte(v)
  520. }
  521. func (z *bytesEncWriter) writeb(s []byte) {
  522. c := z.grow(len(s))
  523. copy(z.b[c:], s)
  524. }
  525. func (z *bytesEncWriter) writestr(s string) {
  526. c := z.grow(len(s))
  527. copy(z.b[c:], s)
  528. }
  529. func (z *bytesEncWriter) writen1(b1 byte) {
  530. c := z.grow(1)
  531. z.b[c] = b1
  532. }
  533. func (z *bytesEncWriter) writen2(b1 byte, b2 byte) {
  534. c := z.grow(2)
  535. z.b[c] = b1
  536. z.b[c+1] = b2
  537. }
  538. func (z *bytesEncWriter) atEndOfEncode() {
  539. *(z.out) = z.b[:z.c]
  540. }
  541. func (z *bytesEncWriter) grow(n int) (oldcursor int) {
  542. oldcursor = z.c
  543. z.c = oldcursor + n
  544. if z.c > cap(z.b) {
  545. // It tried using appendslice logic: (if cap < 1024, *2, else *1.25).
  546. // However, it was too expensive, causing too many iterations of copy.
  547. // Using bytes.Buffer model was much better (2*cap + n)
  548. bs := make([]byte, 2*cap(z.b)+n)
  549. copy(bs, z.b[:oldcursor])
  550. z.b = bs
  551. } else if z.c > len(z.b) {
  552. z.b = z.b[:cap(z.b)]
  553. }
  554. return
  555. }
  556. // ----------------------------------------
  557. func encErr(format string, params ...interface{}) {
  558. doPanic(msgTagEnc, format, params...)
  559. }