encode.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. // Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license found in the LICENSE file.
  3. package codec
  4. import (
  5. //"bufio"
  6. "io"
  7. "reflect"
  8. //"fmt"
  9. )
  10. //var _ = fmt.Printf
  11. const (
  12. // Some tagging information for error messages.
  13. msgTagEnc = "codec.encoder"
  14. defEncByteBufSize = 1 << 6 // 4:16, 6:64, 8:256, 10:1024
  15. // maxTimeSecs32 = math.MaxInt32 / 60 / 24 / 366
  16. )
  17. // encWriter abstracting writing to a byte array or to an io.Writer.
  18. type encWriter interface {
  19. writeUint16(uint16)
  20. writeUint32(uint32)
  21. writeUint64(uint64)
  22. writeb([]byte)
  23. writestr(string)
  24. writen1(byte)
  25. writen2(byte, byte)
  26. atEndOfEncode()
  27. }
  28. type encDriver interface {
  29. encodeBuiltinType(rt uintptr, rv reflect.Value) bool
  30. encodeNil()
  31. encodeInt(i int64)
  32. encodeUint(i uint64)
  33. encodeBool(b bool)
  34. encodeFloat32(f float32)
  35. encodeFloat64(f float64)
  36. encodeExtPreamble(xtag byte, length int)
  37. encodeArrayPreamble(length int)
  38. encodeMapPreamble(length int)
  39. encodeString(c charEncoding, v string)
  40. encodeSymbol(v string)
  41. encodeStringBytes(c charEncoding, v []byte)
  42. //TODO
  43. //encBignum(f *big.Int)
  44. //encStringRunes(c charEncoding, v []rune)
  45. }
  46. type encodeHandleI interface {
  47. getEncodeExt(rt uintptr) (tag byte, fn func(reflect.Value) ([]byte, error))
  48. writeExt() bool
  49. }
  50. // An Encoder writes an object to an output stream in the codec format.
  51. type Encoder struct {
  52. w encWriter
  53. e encDriver
  54. h encodeHandleI
  55. }
  56. type ioEncWriterWriter interface {
  57. WriteByte(c byte) error
  58. WriteString(s string) (n int, err error)
  59. Write(p []byte) (n int, err error)
  60. }
  61. type ioEncStringWriter interface {
  62. WriteString(s string) (n int, err error)
  63. }
  64. type simpleIoEncWriterWriter struct {
  65. w io.Writer
  66. bw io.ByteWriter
  67. sw ioEncStringWriter
  68. }
  69. // ioEncWriter implements encWriter and can write to an io.Writer implementation
  70. type ioEncWriter struct {
  71. w ioEncWriterWriter
  72. x [8]byte // temp byte array re-used internally for efficiency
  73. }
  74. // bytesEncWriter implements encWriter and can write to an byte slice.
  75. // It is used by Marshal function.
  76. type bytesEncWriter struct {
  77. b []byte
  78. c int // cursor
  79. out *[]byte // write out on atEndOfEncode
  80. }
  81. type encExtTagFn struct {
  82. fn func(reflect.Value) ([]byte, error)
  83. tag byte
  84. }
  85. type encExtTypeTagFn struct {
  86. rt uintptr
  87. encExtTagFn
  88. }
  89. // EncoderOptions contain options for the encoder, e.g. registered extension functions.
  90. type encHandle struct {
  91. extFuncs map[uintptr]encExtTagFn
  92. exts []encExtTypeTagFn
  93. }
  94. func (o *simpleIoEncWriterWriter) WriteByte(c byte) (err error) {
  95. if o.bw != nil {
  96. return o.bw.WriteByte(c)
  97. }
  98. _, err = o.w.Write([]byte{c})
  99. return
  100. }
  101. func (o *simpleIoEncWriterWriter) WriteString(s string) (n int, err error) {
  102. if o.sw != nil {
  103. return o.sw.WriteString(s)
  104. }
  105. return o.w.Write([]byte(s))
  106. }
  107. func (o *simpleIoEncWriterWriter) Write(p []byte) (n int, err error) {
  108. return o.w.Write(p)
  109. }
  110. // addEncodeExt registers a function to handle encoding a given type as an extension
  111. // with a specific specific tag byte.
  112. // To remove an extension, pass fn=nil.
  113. func (o *encHandle) addEncodeExt(rt uintptr, tag byte, fn func(reflect.Value) ([]byte, error)) {
  114. if o.exts == nil {
  115. o.exts = make([]encExtTypeTagFn, 0, 8)
  116. o.extFuncs = make(map[uintptr]encExtTagFn, 8)
  117. } else {
  118. if _, ok := o.extFuncs[rt]; ok {
  119. delete(o.extFuncs, rt)
  120. for i := 0; i < len(o.exts); i++ {
  121. if o.exts[i].rt == rt {
  122. o.exts = append(o.exts[:i], o.exts[i+1:]...)
  123. break
  124. }
  125. }
  126. }
  127. }
  128. if fn != nil {
  129. o.extFuncs[rt] = encExtTagFn{fn, tag}
  130. o.exts = append(o.exts, encExtTypeTagFn{rt, encExtTagFn{fn, tag}})
  131. }
  132. }
  133. func (o *encHandle) getEncodeExt(rt uintptr) (tag byte, fn func(reflect.Value) ([]byte, error)) {
  134. if l := len(o.exts); l == 0 {
  135. return
  136. } else if l < mapAccessThreshold {
  137. for i := 0; i < l; i++ {
  138. if o.exts[i].rt == rt {
  139. x := o.exts[i].encExtTagFn
  140. return x.tag, x.fn
  141. }
  142. }
  143. } else {
  144. x := o.extFuncs[rt]
  145. return x.tag, x.fn
  146. }
  147. return
  148. }
  149. // NewEncoder returns an Encoder for encoding into an io.Writer.
  150. //
  151. // For efficiency, Users are encouraged to pass in a memory buffered writer
  152. // (eg bufio.Writer, bytes.Buffer).
  153. func NewEncoder(w io.Writer, h Handle) *Encoder {
  154. ww, ok := w.(ioEncWriterWriter)
  155. if !ok {
  156. sww := simpleIoEncWriterWriter{w: w}
  157. sww.bw, _ = w.(io.ByteWriter)
  158. sww.sw, _ = w.(ioEncStringWriter)
  159. ww = &sww
  160. //ww = bufio.NewWriterSize(w, defEncByteBufSize)
  161. }
  162. z := ioEncWriter{
  163. w: ww,
  164. }
  165. return &Encoder{w: &z, h: h, e: h.newEncDriver(&z)}
  166. }
  167. // NewEncoderBytes returns an encoder for encoding directly and efficiently
  168. // into a byte slice, using zero-copying to temporary slices.
  169. //
  170. // It will potentially replace the output byte slice pointed to.
  171. // After encoding, the out parameter contains the encoded contents.
  172. func NewEncoderBytes(out *[]byte, h Handle) *Encoder {
  173. in := *out
  174. if in == nil {
  175. in = make([]byte, defEncByteBufSize)
  176. }
  177. z := bytesEncWriter{
  178. b: in,
  179. out: out,
  180. }
  181. return &Encoder{w: &z, h: h, e: h.newEncDriver(&z)}
  182. }
  183. // Encode writes an object into a stream in the codec format.
  184. //
  185. // Struct values encode as maps. Each exported struct field is encoded unless:
  186. // - the field's tag is "-", or
  187. // - the field is empty and its tag specifies the "omitempty" option.
  188. //
  189. // The empty values are false, 0, any nil pointer or interface value,
  190. // and any array, slice, map, or string of length zero.
  191. //
  192. // Anonymous fields are encoded inline if no struct tag is present.
  193. // Else they are encoded as regular fields.
  194. //
  195. // The object's default key string is the struct field name but can be
  196. // specified in the struct field's tag value.
  197. // The "codec" key in struct field's tag value is the key name,
  198. // followed by an optional comma and options.
  199. //
  200. // To set an option on all fields (e.g. omitempty on all fields), you
  201. // can create a field called _struct, and set flags on it.
  202. //
  203. // Examples:
  204. //
  205. // type MyStruct struct {
  206. // _struct bool `codec:",omitempty"` //set omitempty for every field
  207. // Field1 string `codec:"-"` //skip this field
  208. // Field2 int `codec:"myName"` //Use key "myName" in encode stream
  209. // Field3 int32 `codec:",omitempty"` //use key "Field3". Omit if empty.
  210. // Field4 bool `codec:"f4,omitempty"` //use key "f4". Omit if empty.
  211. // ...
  212. // }
  213. //
  214. // Note:
  215. // - Encode will treat struct field names and keys in map[string]XXX as symbols.
  216. // Some formats support symbols (e.g. binc) and will properly encode the string
  217. // only once in the stream, and use a tag to refer to it thereafter.
  218. func (e *Encoder) Encode(v interface{}) (err error) {
  219. defer panicToErr(&err)
  220. e.encode(v)
  221. e.w.atEndOfEncode()
  222. return
  223. }
  224. func (e *Encoder) encode(iv interface{}) {
  225. switch v := iv.(type) {
  226. case nil:
  227. e.e.encodeNil()
  228. case reflect.Value:
  229. e.encodeValue(v)
  230. case string:
  231. e.e.encodeString(c_UTF8, v)
  232. case bool:
  233. e.e.encodeBool(v)
  234. case int:
  235. e.e.encodeInt(int64(v))
  236. case int8:
  237. e.e.encodeInt(int64(v))
  238. case int16:
  239. e.e.encodeInt(int64(v))
  240. case int32:
  241. e.e.encodeInt(int64(v))
  242. case int64:
  243. e.e.encodeInt(v)
  244. case uint:
  245. e.e.encodeUint(uint64(v))
  246. case uint8:
  247. e.e.encodeUint(uint64(v))
  248. case uint16:
  249. e.e.encodeUint(uint64(v))
  250. case uint32:
  251. e.e.encodeUint(uint64(v))
  252. case uint64:
  253. e.e.encodeUint(v)
  254. case float32:
  255. e.e.encodeFloat32(v)
  256. case float64:
  257. e.e.encodeFloat64(v)
  258. case *string:
  259. e.e.encodeString(c_UTF8, *v)
  260. case *bool:
  261. e.e.encodeBool(*v)
  262. case *int:
  263. e.e.encodeInt(int64(*v))
  264. case *int8:
  265. e.e.encodeInt(int64(*v))
  266. case *int16:
  267. e.e.encodeInt(int64(*v))
  268. case *int32:
  269. e.e.encodeInt(int64(*v))
  270. case *int64:
  271. e.e.encodeInt(*v)
  272. case *uint:
  273. e.e.encodeUint(uint64(*v))
  274. case *uint8:
  275. e.e.encodeUint(uint64(*v))
  276. case *uint16:
  277. e.e.encodeUint(uint64(*v))
  278. case *uint32:
  279. e.e.encodeUint(uint64(*v))
  280. case *uint64:
  281. e.e.encodeUint(*v)
  282. case *float32:
  283. e.e.encodeFloat32(*v)
  284. case *float64:
  285. e.e.encodeFloat64(*v)
  286. default:
  287. e.encodeValue(reflect.ValueOf(iv))
  288. }
  289. }
  290. func (e *Encoder) encodeValue(rv reflect.Value) {
  291. rt := rv.Type()
  292. rtid := reflect.ValueOf(rt).Pointer()
  293. //encode based on type first, since over-rides are based on type.
  294. ee := e.e //don't dereference everytime
  295. if ee.encodeBuiltinType(rtid, rv) {
  296. return
  297. }
  298. //Note: tagFn must handle returning nil if value should be encoded as a nil.
  299. if xfTag, xfFn := e.h.getEncodeExt(rtid); xfFn != nil {
  300. bs, fnerr := xfFn(rv)
  301. if fnerr != nil {
  302. panic(fnerr)
  303. }
  304. if bs == nil {
  305. ee.encodeNil()
  306. return
  307. }
  308. if e.h.writeExt() {
  309. ee.encodeExtPreamble(xfTag, len(bs))
  310. e.w.writeb(bs)
  311. } else {
  312. ee.encodeStringBytes(c_RAW, bs)
  313. }
  314. return
  315. }
  316. // TODO: Encode if type is an encoding.BinaryMarshaler: MarshalBinary() (data []byte, err error)
  317. // There is a cost, as we need to change the rv to an interface{} first.
  318. // ensure more common cases appear early in switch.
  319. rk := rv.Kind()
  320. switch rk {
  321. case reflect.Bool:
  322. ee.encodeBool(rv.Bool())
  323. case reflect.String:
  324. ee.encodeString(c_UTF8, rv.String())
  325. case reflect.Float64:
  326. ee.encodeFloat64(rv.Float())
  327. case reflect.Float32:
  328. ee.encodeFloat32(float32(rv.Float()))
  329. case reflect.Slice:
  330. if rv.IsNil() {
  331. ee.encodeNil()
  332. break
  333. }
  334. if rt == byteSliceTyp {
  335. ee.encodeStringBytes(c_RAW, rv.Bytes())
  336. break
  337. }
  338. l := rv.Len()
  339. ee.encodeArrayPreamble(l)
  340. if l == 0 {
  341. break
  342. }
  343. for j := 0; j < l; j++ {
  344. e.encodeValue(rv.Index(j))
  345. }
  346. case reflect.Array:
  347. e.encodeValue(rv.Slice(0, rv.Len()))
  348. case reflect.Map:
  349. if rv.IsNil() {
  350. ee.encodeNil()
  351. break
  352. }
  353. l := rv.Len()
  354. ee.encodeMapPreamble(l)
  355. if l == 0 {
  356. break
  357. }
  358. keyTypeIsString := rt.Key().Kind() == reflect.String
  359. mks := rv.MapKeys()
  360. // for j, lmks := 0, len(mks); j < lmks; j++ {
  361. for j := range mks {
  362. if keyTypeIsString {
  363. ee.encodeSymbol(mks[j].String())
  364. } else {
  365. e.encodeValue(mks[j])
  366. }
  367. e.encodeValue(rv.MapIndex(mks[j]))
  368. }
  369. case reflect.Struct:
  370. sis := getStructFieldInfos(rtid, rt)
  371. e.encStruct(sis, rv)
  372. case reflect.Ptr:
  373. if rv.IsNil() {
  374. ee.encodeNil()
  375. break
  376. }
  377. e.encodeValue(rv.Elem())
  378. case reflect.Interface:
  379. if rv.IsNil() {
  380. ee.encodeNil()
  381. break
  382. }
  383. e.encodeValue(rv.Elem())
  384. case reflect.Int, reflect.Int8, reflect.Int64, reflect.Int32, reflect.Int16:
  385. ee.encodeInt(rv.Int())
  386. case reflect.Uint8, reflect.Uint64, reflect.Uint, reflect.Uint32, reflect.Uint16:
  387. ee.encodeUint(rv.Uint())
  388. case reflect.Invalid:
  389. ee.encodeNil()
  390. default:
  391. encErr("Unsupported kind: %s, for: %#v", rk, rv)
  392. }
  393. return
  394. }
  395. func (e *Encoder) encStruct(sis structFieldInfos, rv reflect.Value) {
  396. newlen := len(sis)
  397. rvals := make([]reflect.Value, newlen)
  398. encnames := make([]string, newlen)
  399. newlen = 0
  400. // var rv0 reflect.Value
  401. // for i := 0; i < l; i++ {
  402. // si := sis[i]
  403. for _, si := range sis {
  404. if si.i > -1 {
  405. rvals[newlen] = rv.Field(int(si.i))
  406. } else {
  407. rvals[newlen] = rv.FieldByIndex(si.is)
  408. }
  409. if si.omitEmpty && isEmptyValue(rvals[newlen]) {
  410. continue
  411. }
  412. // sivals[newlen] = i
  413. encnames[newlen] = si.encName
  414. newlen++
  415. }
  416. ee := e.e //don't dereference everytime
  417. ee.encodeMapPreamble(newlen)
  418. for j := 0; j < newlen; j++ {
  419. //e.encString(sis[sivals[j]].encName)
  420. ee.encodeSymbol(encnames[j])
  421. e.encodeValue(rvals[j])
  422. }
  423. }
  424. // ----------------------------------------
  425. func (z *ioEncWriter) writeUint16(v uint16) {
  426. bigen.PutUint16(z.x[:2], v)
  427. z.writeb(z.x[:2])
  428. }
  429. func (z *ioEncWriter) writeUint32(v uint32) {
  430. bigen.PutUint32(z.x[:4], v)
  431. z.writeb(z.x[:4])
  432. }
  433. func (z *ioEncWriter) writeUint64(v uint64) {
  434. bigen.PutUint64(z.x[:8], v)
  435. z.writeb(z.x[:8])
  436. }
  437. func (z *ioEncWriter) writeb(bs []byte) {
  438. n, err := z.w.Write(bs)
  439. if err != nil {
  440. panic(err)
  441. }
  442. if n != len(bs) {
  443. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(bs), n)
  444. }
  445. }
  446. func (z *ioEncWriter) writestr(s string) {
  447. n, err := z.w.WriteString(s)
  448. if err != nil {
  449. panic(err)
  450. }
  451. if n != len(s) {
  452. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(s), n)
  453. }
  454. }
  455. func (z *ioEncWriter) writen1(b byte) {
  456. if err := z.w.WriteByte(b); err != nil {
  457. panic(err)
  458. }
  459. }
  460. func (z *ioEncWriter) writen2(b1 byte, b2 byte) {
  461. z.writen1(b1)
  462. z.writen1(b2)
  463. }
  464. func (z *ioEncWriter) atEndOfEncode() { }
  465. // ----------------------------------------
  466. func (z *bytesEncWriter) writeUint16(v uint16) {
  467. c := z.grow(2)
  468. z.b[c] = byte(v >> 8)
  469. z.b[c+1] = byte(v)
  470. }
  471. func (z *bytesEncWriter) writeUint32(v uint32) {
  472. c := z.grow(4)
  473. z.b[c] = byte(v >> 24)
  474. z.b[c+1] = byte(v >> 16)
  475. z.b[c+2] = byte(v >> 8)
  476. z.b[c+3] = byte(v)
  477. }
  478. func (z *bytesEncWriter) writeUint64(v uint64) {
  479. c := z.grow(8)
  480. z.b[c] = byte(v >> 56)
  481. z.b[c+1] = byte(v >> 48)
  482. z.b[c+2] = byte(v >> 40)
  483. z.b[c+3] = byte(v >> 32)
  484. z.b[c+4] = byte(v >> 24)
  485. z.b[c+5] = byte(v >> 16)
  486. z.b[c+6] = byte(v >> 8)
  487. z.b[c+7] = byte(v)
  488. }
  489. func (z *bytesEncWriter) writeb(s []byte) {
  490. c := z.grow(len(s))
  491. copy(z.b[c:], s)
  492. }
  493. func (z *bytesEncWriter) writestr(s string) {
  494. c := z.grow(len(s))
  495. copy(z.b[c:], s)
  496. }
  497. func (z *bytesEncWriter) writen1(b1 byte) {
  498. c := z.grow(1)
  499. z.b[c] = b1
  500. }
  501. func (z *bytesEncWriter) writen2(b1 byte, b2 byte) {
  502. c := z.grow(2)
  503. z.b[c] = b1
  504. z.b[c+1] = b2
  505. }
  506. func (z *bytesEncWriter) atEndOfEncode() {
  507. *(z.out) = z.b[:z.c]
  508. }
  509. func (z *bytesEncWriter) grow(n int) (oldcursor int) {
  510. oldcursor = z.c
  511. z.c = oldcursor + n
  512. if z.c > cap(z.b) {
  513. // It tried using appendslice logic: (if cap < 1024, *2, else *1.25).
  514. // However, it was too expensive, causing too many iterations of copy.
  515. // Using bytes.Buffer model was much better (2*cap + n)
  516. bs := make([]byte, 2*cap(z.b)+n)
  517. copy(bs, z.b[:oldcursor])
  518. z.b = bs
  519. } else if z.c > len(z.b) {
  520. z.b = z.b[:cap(z.b)]
  521. }
  522. return
  523. }
  524. // ----------------------------------------
  525. func encErr(format string, params ...interface{}) {
  526. doPanic(msgTagEnc, format, params...)
  527. }