encode.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. // Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license found in the LICENSE file.
  3. package codec
  4. import (
  5. //"bufio"
  6. "io"
  7. "reflect"
  8. //"fmt"
  9. )
  10. //var _ = fmt.Printf
  11. const (
  12. // Some tagging information for error messages.
  13. msgTagEnc = "codec.encoder"
  14. defEncByteBufSize = 1 << 6 // 4:16, 6:64, 8:256, 10:1024
  15. // maxTimeSecs32 = math.MaxInt32 / 60 / 24 / 366
  16. )
  17. // encWriter abstracting writing to a byte array or to an io.Writer.
  18. type encWriter interface {
  19. writeUint16(uint16)
  20. writeUint32(uint32)
  21. writeUint64(uint64)
  22. writeb([]byte)
  23. writestr(string)
  24. writen1(byte)
  25. writen2(byte, byte)
  26. atEndOfEncode()
  27. }
  28. type encDriver interface {
  29. encodeBuiltinType(rt uintptr, rv reflect.Value) bool
  30. encodeNil()
  31. encodeInt(i int64)
  32. encodeUint(i uint64)
  33. encodeBool(b bool)
  34. encodeFloat32(f float32)
  35. encodeFloat64(f float64)
  36. encodeExtPreamble(xtag byte, length int)
  37. encodeArrayPreamble(length int)
  38. encodeMapPreamble(length int)
  39. encodeString(c charEncoding, v string)
  40. encodeSymbol(v string)
  41. encodeStringBytes(c charEncoding, v []byte)
  42. //TODO
  43. //encBignum(f *big.Int)
  44. //encStringRunes(c charEncoding, v []rune)
  45. }
  46. type encodeHandleI interface {
  47. getEncodeExt(rt uintptr) (tag byte, fn func(reflect.Value) ([]byte, error))
  48. writeExt() bool
  49. structToArray() bool
  50. }
  51. // An Encoder writes an object to an output stream in the codec format.
  52. type Encoder struct {
  53. w encWriter
  54. e encDriver
  55. h encodeHandleI
  56. }
  57. type ioEncWriterWriter interface {
  58. WriteByte(c byte) error
  59. WriteString(s string) (n int, err error)
  60. Write(p []byte) (n int, err error)
  61. }
  62. type ioEncStringWriter interface {
  63. WriteString(s string) (n int, err error)
  64. }
  65. type simpleIoEncWriterWriter struct {
  66. w io.Writer
  67. bw io.ByteWriter
  68. sw ioEncStringWriter
  69. }
  70. // ioEncWriter implements encWriter and can write to an io.Writer implementation
  71. type ioEncWriter struct {
  72. w ioEncWriterWriter
  73. x [8]byte // temp byte array re-used internally for efficiency
  74. }
  75. // bytesEncWriter implements encWriter and can write to an byte slice.
  76. // It is used by Marshal function.
  77. type bytesEncWriter struct {
  78. b []byte
  79. c int // cursor
  80. out *[]byte // write out on atEndOfEncode
  81. }
  82. type encExtTagFn struct {
  83. fn func(reflect.Value) ([]byte, error)
  84. tag byte
  85. }
  86. type encExtTypeTagFn struct {
  87. rt uintptr
  88. encExtTagFn
  89. }
  90. // EncoderOptions contain options for the encoder, e.g. registered extension functions.
  91. type encHandle struct {
  92. extFuncs map[uintptr]encExtTagFn
  93. exts []encExtTypeTagFn
  94. }
  95. type EncodeOptions struct {
  96. // Encode a struct as an array, and not as a map.
  97. StructToArray bool
  98. }
  99. func (o *simpleIoEncWriterWriter) WriteByte(c byte) (err error) {
  100. if o.bw != nil {
  101. return o.bw.WriteByte(c)
  102. }
  103. _, err = o.w.Write([]byte{c})
  104. return
  105. }
  106. func (o *simpleIoEncWriterWriter) WriteString(s string) (n int, err error) {
  107. if o.sw != nil {
  108. return o.sw.WriteString(s)
  109. }
  110. return o.w.Write([]byte(s))
  111. }
  112. func (o *simpleIoEncWriterWriter) Write(p []byte) (n int, err error) {
  113. return o.w.Write(p)
  114. }
  115. // addEncodeExt registers a function to handle encoding a given type as an extension
  116. // with a specific specific tag byte.
  117. // To remove an extension, pass fn=nil.
  118. func (o *encHandle) addEncodeExt(rt uintptr, tag byte, fn func(reflect.Value) ([]byte, error)) {
  119. if o.exts == nil {
  120. o.exts = make([]encExtTypeTagFn, 0, 8)
  121. o.extFuncs = make(map[uintptr]encExtTagFn, 8)
  122. } else {
  123. if _, ok := o.extFuncs[rt]; ok {
  124. delete(o.extFuncs, rt)
  125. for i := 0; i < len(o.exts); i++ {
  126. if o.exts[i].rt == rt {
  127. o.exts = append(o.exts[:i], o.exts[i+1:]...)
  128. break
  129. }
  130. }
  131. }
  132. }
  133. if fn != nil {
  134. o.extFuncs[rt] = encExtTagFn{fn, tag}
  135. o.exts = append(o.exts, encExtTypeTagFn{rt, encExtTagFn{fn, tag}})
  136. }
  137. }
  138. func (o *encHandle) getEncodeExt(rt uintptr) (tag byte, fn func(reflect.Value) ([]byte, error)) {
  139. if l := len(o.exts); l == 0 {
  140. return
  141. } else if l < mapAccessThreshold {
  142. for i := 0; i < l; i++ {
  143. if o.exts[i].rt == rt {
  144. x := o.exts[i].encExtTagFn
  145. return x.tag, x.fn
  146. }
  147. }
  148. } else {
  149. x := o.extFuncs[rt]
  150. return x.tag, x.fn
  151. }
  152. return
  153. }
  154. func (o *EncodeOptions) structToArray() bool {
  155. return o.StructToArray
  156. }
  157. // NewEncoder returns an Encoder for encoding into an io.Writer.
  158. //
  159. // For efficiency, Users are encouraged to pass in a memory buffered writer
  160. // (eg bufio.Writer, bytes.Buffer).
  161. func NewEncoder(w io.Writer, h Handle) *Encoder {
  162. ww, ok := w.(ioEncWriterWriter)
  163. if !ok {
  164. sww := simpleIoEncWriterWriter{w: w}
  165. sww.bw, _ = w.(io.ByteWriter)
  166. sww.sw, _ = w.(ioEncStringWriter)
  167. ww = &sww
  168. //ww = bufio.NewWriterSize(w, defEncByteBufSize)
  169. }
  170. z := ioEncWriter{
  171. w: ww,
  172. }
  173. return &Encoder{w: &z, h: h, e: h.newEncDriver(&z)}
  174. }
  175. // NewEncoderBytes returns an encoder for encoding directly and efficiently
  176. // into a byte slice, using zero-copying to temporary slices.
  177. //
  178. // It will potentially replace the output byte slice pointed to.
  179. // After encoding, the out parameter contains the encoded contents.
  180. func NewEncoderBytes(out *[]byte, h Handle) *Encoder {
  181. in := *out
  182. if in == nil {
  183. in = make([]byte, defEncByteBufSize)
  184. }
  185. z := bytesEncWriter{
  186. b: in,
  187. out: out,
  188. }
  189. return &Encoder{w: &z, h: h, e: h.newEncDriver(&z)}
  190. }
  191. // Encode writes an object into a stream in the codec format.
  192. //
  193. // Encoding can be configured via the "codec" struct tag for the fields.
  194. //
  195. // The "codec" key in struct field's tag value is the key name,
  196. // followed by an optional comma and options.
  197. //
  198. // To set an option on all fields (e.g. omitempty on all fields), you
  199. // can create a field called _struct, and set flags on it.
  200. //
  201. // Struct values "usually" encode as maps. Each exported struct field is encoded unless:
  202. // - the field's codec tag is "-", OR
  203. // - the field is empty and its codec tag specifies the "omitempty" option.
  204. //
  205. // When encoding as a map, the first string in the tag (before the comma)
  206. // is the map key string to use when encoding.
  207. //
  208. // However, struct values may encode as arrays. This happens when:
  209. // - StructToArray Encode option is set, OR
  210. // - the codec tag on the _struct field sets the "toarray" option
  211. //
  212. // The empty values (for omitempty option) are false, 0, any nil pointer
  213. // or interface value, and any array, slice, map, or string of length zero.
  214. //
  215. // Anonymous fields are encoded inline if no struct tag is present.
  216. // Else they are encoded as regular fields.
  217. //
  218. // Examples:
  219. //
  220. // type MyStruct struct {
  221. // _struct bool `codec:",omitempty"` //set omitempty for every field
  222. // Field1 string `codec:"-"` //skip this field
  223. // Field2 int `codec:"myName"` //Use key "myName" in encode stream
  224. // Field3 int32 `codec:",omitempty"` //use key "Field3". Omit if empty.
  225. // Field4 bool `codec:"f4,omitempty"` //use key "f4". Omit if empty.
  226. // ...
  227. // }
  228. //
  229. // type MyStruct struct {
  230. // _struct bool `codec:",omitempty,toarray"` //set omitempty for every field
  231. // //and encode struct as an array
  232. // }
  233. //
  234. // Note:
  235. // - Encode will treat struct field names and keys in map[string]XXX as symbols.
  236. // Some formats support symbols (e.g. binc) and will properly encode the string
  237. // only once in the stream, and use a tag to refer to it thereafter.
  238. func (e *Encoder) Encode(v interface{}) (err error) {
  239. defer panicToErr(&err)
  240. e.encode(v)
  241. e.w.atEndOfEncode()
  242. return
  243. }
  244. func (e *Encoder) encode(iv interface{}) {
  245. switch v := iv.(type) {
  246. case nil:
  247. e.e.encodeNil()
  248. case reflect.Value:
  249. e.encodeValue(v)
  250. case string:
  251. e.e.encodeString(c_UTF8, v)
  252. case bool:
  253. e.e.encodeBool(v)
  254. case int:
  255. e.e.encodeInt(int64(v))
  256. case int8:
  257. e.e.encodeInt(int64(v))
  258. case int16:
  259. e.e.encodeInt(int64(v))
  260. case int32:
  261. e.e.encodeInt(int64(v))
  262. case int64:
  263. e.e.encodeInt(v)
  264. case uint:
  265. e.e.encodeUint(uint64(v))
  266. case uint8:
  267. e.e.encodeUint(uint64(v))
  268. case uint16:
  269. e.e.encodeUint(uint64(v))
  270. case uint32:
  271. e.e.encodeUint(uint64(v))
  272. case uint64:
  273. e.e.encodeUint(v)
  274. case float32:
  275. e.e.encodeFloat32(v)
  276. case float64:
  277. e.e.encodeFloat64(v)
  278. case *string:
  279. e.e.encodeString(c_UTF8, *v)
  280. case *bool:
  281. e.e.encodeBool(*v)
  282. case *int:
  283. e.e.encodeInt(int64(*v))
  284. case *int8:
  285. e.e.encodeInt(int64(*v))
  286. case *int16:
  287. e.e.encodeInt(int64(*v))
  288. case *int32:
  289. e.e.encodeInt(int64(*v))
  290. case *int64:
  291. e.e.encodeInt(*v)
  292. case *uint:
  293. e.e.encodeUint(uint64(*v))
  294. case *uint8:
  295. e.e.encodeUint(uint64(*v))
  296. case *uint16:
  297. e.e.encodeUint(uint64(*v))
  298. case *uint32:
  299. e.e.encodeUint(uint64(*v))
  300. case *uint64:
  301. e.e.encodeUint(*v)
  302. case *float32:
  303. e.e.encodeFloat32(*v)
  304. case *float64:
  305. e.e.encodeFloat64(*v)
  306. default:
  307. e.encodeValue(reflect.ValueOf(iv))
  308. }
  309. }
  310. func (e *Encoder) encodeValue(rv reflect.Value) {
  311. rt := rv.Type()
  312. rtid := reflect.ValueOf(rt).Pointer()
  313. //encode based on type first, since over-rides are based on type.
  314. ee := e.e //don't dereference everytime
  315. if ee.encodeBuiltinType(rtid, rv) {
  316. return
  317. }
  318. //Note: tagFn must handle returning nil if value should be encoded as a nil.
  319. if xfTag, xfFn := e.h.getEncodeExt(rtid); xfFn != nil {
  320. bs, fnerr := xfFn(rv)
  321. if fnerr != nil {
  322. panic(fnerr)
  323. }
  324. if bs == nil {
  325. ee.encodeNil()
  326. return
  327. }
  328. if e.h.writeExt() {
  329. ee.encodeExtPreamble(xfTag, len(bs))
  330. e.w.writeb(bs)
  331. } else {
  332. ee.encodeStringBytes(c_RAW, bs)
  333. }
  334. return
  335. }
  336. // TODO: Encode if type is an encoding.BinaryMarshaler: MarshalBinary() (data []byte, err error)
  337. // There is a cost, as we need to change the rv to an interface{} first.
  338. // ensure more common cases appear early in switch.
  339. rk := rv.Kind()
  340. switch rk {
  341. case reflect.Bool:
  342. ee.encodeBool(rv.Bool())
  343. case reflect.String:
  344. ee.encodeString(c_UTF8, rv.String())
  345. case reflect.Float64:
  346. ee.encodeFloat64(rv.Float())
  347. case reflect.Float32:
  348. ee.encodeFloat32(float32(rv.Float()))
  349. case reflect.Slice:
  350. if rv.IsNil() {
  351. ee.encodeNil()
  352. break
  353. }
  354. if rt == byteSliceTyp {
  355. ee.encodeStringBytes(c_RAW, rv.Bytes())
  356. break
  357. }
  358. l := rv.Len()
  359. ee.encodeArrayPreamble(l)
  360. if l == 0 {
  361. break
  362. }
  363. for j := 0; j < l; j++ {
  364. e.encodeValue(rv.Index(j))
  365. }
  366. case reflect.Array:
  367. e.encodeValue(rv.Slice(0, rv.Len()))
  368. case reflect.Map:
  369. if rv.IsNil() {
  370. ee.encodeNil()
  371. break
  372. }
  373. l := rv.Len()
  374. ee.encodeMapPreamble(l)
  375. if l == 0 {
  376. break
  377. }
  378. keyTypeIsString := rt.Key().Kind() == reflect.String
  379. mks := rv.MapKeys()
  380. // for j, lmks := 0, len(mks); j < lmks; j++ {
  381. for j := range mks {
  382. if keyTypeIsString {
  383. ee.encodeSymbol(mks[j].String())
  384. } else {
  385. e.encodeValue(mks[j])
  386. }
  387. e.encodeValue(rv.MapIndex(mks[j]))
  388. }
  389. case reflect.Struct:
  390. sis := getStructFieldInfos(rtid, rt)
  391. e.encStruct(sis, rv)
  392. case reflect.Ptr:
  393. if rv.IsNil() {
  394. ee.encodeNil()
  395. break
  396. }
  397. e.encodeValue(rv.Elem())
  398. case reflect.Interface:
  399. if rv.IsNil() {
  400. ee.encodeNil()
  401. break
  402. }
  403. e.encodeValue(rv.Elem())
  404. case reflect.Int, reflect.Int8, reflect.Int64, reflect.Int32, reflect.Int16:
  405. ee.encodeInt(rv.Int())
  406. case reflect.Uint8, reflect.Uint64, reflect.Uint, reflect.Uint32, reflect.Uint16:
  407. ee.encodeUint(rv.Uint())
  408. case reflect.Invalid:
  409. ee.encodeNil()
  410. default:
  411. encErr("Unsupported kind: %s, for: %#v", rk, rv)
  412. }
  413. return
  414. }
  415. func (e *Encoder) encStruct(sis *structFieldInfos, rv reflect.Value) {
  416. newlen := len(sis.sis)
  417. rvals := make([]reflect.Value, newlen)
  418. var encnames []string
  419. toMap := !(sis.toArray || e.h.structToArray())
  420. sissis := sis.sisp
  421. // if toMap, use the sorted array. If toArray, use unsorted array (to match sequence in struct)
  422. if toMap {
  423. sissis = sis.sis
  424. encnames = make([]string, newlen)
  425. }
  426. newlen = 0
  427. for _, si := range sissis {
  428. if si.i != -1 {
  429. rvals[newlen] = rv.Field(int(si.i))
  430. } else {
  431. rvals[newlen] = rv.FieldByIndex(si.is)
  432. }
  433. if toMap {
  434. if si.omitEmpty && isEmptyValue(rvals[newlen]) {
  435. continue
  436. }
  437. encnames[newlen] = si.encName
  438. } else {
  439. if si.omitEmpty && isEmptyValue(rvals[newlen]) {
  440. rvals[newlen] = reflect.Value{} //encode as nil
  441. }
  442. }
  443. newlen++
  444. }
  445. if toMap {
  446. ee := e.e //don't dereference everytime
  447. ee.encodeMapPreamble(newlen)
  448. for j := 0; j < newlen; j++ {
  449. ee.encodeSymbol(encnames[j])
  450. e.encodeValue(rvals[j])
  451. }
  452. } else {
  453. e.e.encodeArrayPreamble(newlen)
  454. for j := 0; j < newlen; j++ {
  455. e.encodeValue(rvals[j])
  456. }
  457. }
  458. }
  459. // ----------------------------------------
  460. func (z *ioEncWriter) writeUint16(v uint16) {
  461. bigen.PutUint16(z.x[:2], v)
  462. z.writeb(z.x[:2])
  463. }
  464. func (z *ioEncWriter) writeUint32(v uint32) {
  465. bigen.PutUint32(z.x[:4], v)
  466. z.writeb(z.x[:4])
  467. }
  468. func (z *ioEncWriter) writeUint64(v uint64) {
  469. bigen.PutUint64(z.x[:8], v)
  470. z.writeb(z.x[:8])
  471. }
  472. func (z *ioEncWriter) writeb(bs []byte) {
  473. n, err := z.w.Write(bs)
  474. if err != nil {
  475. panic(err)
  476. }
  477. if n != len(bs) {
  478. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(bs), n)
  479. }
  480. }
  481. func (z *ioEncWriter) writestr(s string) {
  482. n, err := z.w.WriteString(s)
  483. if err != nil {
  484. panic(err)
  485. }
  486. if n != len(s) {
  487. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(s), n)
  488. }
  489. }
  490. func (z *ioEncWriter) writen1(b byte) {
  491. if err := z.w.WriteByte(b); err != nil {
  492. panic(err)
  493. }
  494. }
  495. func (z *ioEncWriter) writen2(b1 byte, b2 byte) {
  496. z.writen1(b1)
  497. z.writen1(b2)
  498. }
  499. func (z *ioEncWriter) atEndOfEncode() { }
  500. // ----------------------------------------
  501. func (z *bytesEncWriter) writeUint16(v uint16) {
  502. c := z.grow(2)
  503. z.b[c] = byte(v >> 8)
  504. z.b[c+1] = byte(v)
  505. }
  506. func (z *bytesEncWriter) writeUint32(v uint32) {
  507. c := z.grow(4)
  508. z.b[c] = byte(v >> 24)
  509. z.b[c+1] = byte(v >> 16)
  510. z.b[c+2] = byte(v >> 8)
  511. z.b[c+3] = byte(v)
  512. }
  513. func (z *bytesEncWriter) writeUint64(v uint64) {
  514. c := z.grow(8)
  515. z.b[c] = byte(v >> 56)
  516. z.b[c+1] = byte(v >> 48)
  517. z.b[c+2] = byte(v >> 40)
  518. z.b[c+3] = byte(v >> 32)
  519. z.b[c+4] = byte(v >> 24)
  520. z.b[c+5] = byte(v >> 16)
  521. z.b[c+6] = byte(v >> 8)
  522. z.b[c+7] = byte(v)
  523. }
  524. func (z *bytesEncWriter) writeb(s []byte) {
  525. c := z.grow(len(s))
  526. copy(z.b[c:], s)
  527. }
  528. func (z *bytesEncWriter) writestr(s string) {
  529. c := z.grow(len(s))
  530. copy(z.b[c:], s)
  531. }
  532. func (z *bytesEncWriter) writen1(b1 byte) {
  533. c := z.grow(1)
  534. z.b[c] = b1
  535. }
  536. func (z *bytesEncWriter) writen2(b1 byte, b2 byte) {
  537. c := z.grow(2)
  538. z.b[c] = b1
  539. z.b[c+1] = b2
  540. }
  541. func (z *bytesEncWriter) atEndOfEncode() {
  542. *(z.out) = z.b[:z.c]
  543. }
  544. func (z *bytesEncWriter) grow(n int) (oldcursor int) {
  545. oldcursor = z.c
  546. z.c = oldcursor + n
  547. if z.c > cap(z.b) {
  548. // It tried using appendslice logic: (if cap < 1024, *2, else *1.25).
  549. // However, it was too expensive, causing too many iterations of copy.
  550. // Using bytes.Buffer model was much better (2*cap + n)
  551. bs := make([]byte, 2*cap(z.b)+n)
  552. copy(bs, z.b[:oldcursor])
  553. z.b = bs
  554. } else if z.c > len(z.b) {
  555. z.b = z.b[:cap(z.b)]
  556. }
  557. return
  558. }
  559. // ----------------------------------------
  560. func encErr(format string, params ...interface{}) {
  561. doPanic(msgTagEnc, format, params...)
  562. }