encode.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. // Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license found in the LICENSE file.
  3. package codec
  4. import (
  5. "bufio"
  6. "io"
  7. "reflect"
  8. //"fmt"
  9. )
  10. //var _ = fmt.Printf
  11. const (
  12. // Some tagging information for error messages.
  13. msgTagEnc = "codec.encoder"
  14. defEncByteBufSize = 1 << 6 // 4:16, 6:64, 8:256, 10:1024
  15. // maxTimeSecs32 = math.MaxInt32 / 60 / 24 / 366
  16. )
  17. // encWriter abstracting writing to a byte array or to an io.Writer.
  18. type encWriter interface {
  19. writeUint16(uint16)
  20. writeUint32(uint32)
  21. writeUint64(uint64)
  22. writeb([]byte)
  23. writestr(string)
  24. writen1(byte)
  25. writen2(byte, byte)
  26. writen3(byte, byte, byte)
  27. writen4(byte, byte, byte, byte)
  28. flush()
  29. }
  30. type encDriver interface {
  31. encodeBuiltinType(rt reflect.Type, rv reflect.Value) bool
  32. encodeNil()
  33. encodeInt(i int64)
  34. encodeUint(i uint64)
  35. encodeBool(b bool)
  36. encodeFloat32(f float32)
  37. encodeFloat64(f float64)
  38. encodeExtPreamble(xtag byte, length int)
  39. encodeArrayPreamble(length int)
  40. encodeMapPreamble(length int)
  41. encodeString(c charEncoding, v string)
  42. encodeSymbol(v string)
  43. encodeStringBytes(c charEncoding, v []byte)
  44. //TODO
  45. //encBignum(f *big.Int)
  46. //encStringRunes(c charEncoding, v []rune)
  47. }
  48. type encodeHandleI interface {
  49. getEncodeExt(rt reflect.Type) (tag byte, fn func(reflect.Value) ([]byte, error))
  50. writeExt() bool
  51. }
  52. // An Encoder writes an object to an output stream in the codec format.
  53. type Encoder struct {
  54. w encWriter
  55. e encDriver
  56. h encodeHandleI
  57. }
  58. type ioEncWriterWriter interface {
  59. WriteByte(c byte) error
  60. WriteString(s string) (n int, err error)
  61. Write(p []byte) (n int, err error)
  62. }
  63. type ioEncWriterFlusher interface {
  64. Flush() error
  65. }
  66. // ioEncWriter implements encWriter and can write to an io.Writer implementation
  67. type ioEncWriter struct {
  68. w ioEncWriterWriter
  69. x [8]byte // temp byte array re-used internally for efficiency
  70. }
  71. // bytesEncWriter implements encWriter and can write to an byte slice.
  72. // It is used by Marshal function.
  73. type bytesEncWriter struct {
  74. b []byte
  75. c int // cursor
  76. out *[]byte // write out on flush
  77. }
  78. type encExtTagFn struct {
  79. fn func(reflect.Value) ([]byte, error)
  80. tag byte
  81. }
  82. type encExtTypeTagFn struct {
  83. rt reflect.Type
  84. encExtTagFn
  85. }
  86. // EncoderOptions contain options for the encoder, e.g. registered extension functions.
  87. type encHandle struct {
  88. extFuncs map[reflect.Type]encExtTagFn
  89. exts []encExtTypeTagFn
  90. }
  91. // addEncodeExt registers a function to handle encoding a given type as an extension
  92. // with a specific specific tag byte.
  93. // To remove an extension, pass fn=nil.
  94. func (o *encHandle) addEncodeExt(rt reflect.Type, tag byte, fn func(reflect.Value) ([]byte, error)) {
  95. if o.exts == nil {
  96. o.exts = make([]encExtTypeTagFn, 0, 8)
  97. o.extFuncs = make(map[reflect.Type]encExtTagFn, 8)
  98. }
  99. delete(o.extFuncs, rt)
  100. if fn != nil {
  101. o.extFuncs[rt] = encExtTagFn{fn, tag}
  102. }
  103. if leno := len(o.extFuncs); leno > cap(o.exts) {
  104. o.exts = make([]encExtTypeTagFn, leno, (leno * 3 / 2))
  105. } else {
  106. o.exts = o.exts[0:leno]
  107. }
  108. var i int
  109. for k, v := range o.extFuncs {
  110. o.exts[i] = encExtTypeTagFn{k, v}
  111. i++
  112. }
  113. }
  114. func (o *encHandle) getEncodeExt(rt reflect.Type) (tag byte, fn func(reflect.Value) ([]byte, error)) {
  115. // For >= 5 elements, map constant cost less than iteration cost.
  116. // This is because reflect.Type equality cost is pretty high
  117. if l := len(o.exts); l == 0 {
  118. return
  119. } else if l < mapAccessThreshold {
  120. for i := 0; i < l; i++ {
  121. if o.exts[i].rt == rt {
  122. x := o.exts[i].encExtTagFn
  123. return x.tag, x.fn
  124. }
  125. }
  126. } else {
  127. x := o.extFuncs[rt]
  128. return x.tag, x.fn
  129. }
  130. return
  131. }
  132. // NewEncoder returns an Encoder for encoding into an io.Writer.
  133. // For efficiency, Users are encouraged to pass in a memory buffered writer
  134. // (eg bufio.Writer, bytes.Buffer). This implementation *may* use one internally.
  135. func NewEncoder(w io.Writer, h Handle) *Encoder {
  136. ww, ok := w.(ioEncWriterWriter)
  137. if !ok {
  138. ww = bufio.NewWriterSize(w, defEncByteBufSize)
  139. }
  140. z := ioEncWriter{
  141. w: ww,
  142. }
  143. return &Encoder{w: &z, h: h, e: h.newEncDriver(&z)}
  144. }
  145. // NewEncoderBytes returns an encoder for encoding directly and efficiently
  146. // into a byte slice, using zero-copying to temporary slices.
  147. //
  148. // It will potentially replace the output byte slice pointed to.
  149. // After encoding, the out parameter contains the encoded contents.
  150. func NewEncoderBytes(out *[]byte, h Handle) *Encoder {
  151. in := *out
  152. if in == nil {
  153. in = make([]byte, defEncByteBufSize)
  154. }
  155. z := bytesEncWriter{
  156. b: in,
  157. out: out,
  158. }
  159. return &Encoder{w: &z, h: h, e: h.newEncDriver(&z)}
  160. }
  161. // Encode writes an object into a stream in the codec format.
  162. //
  163. // Struct values encode as maps. Each exported struct field is encoded unless:
  164. // - the field's tag is "-", or
  165. // - the field is empty and its tag specifies the "omitempty" option.
  166. //
  167. // The empty values are false, 0, any nil pointer or interface value,
  168. // and any array, slice, map, or string of length zero.
  169. //
  170. // Anonymous fields are encoded inline if no struct tag is present.
  171. // Else they are encoded as regular fields.
  172. //
  173. // The object's default key string is the struct field name but can be
  174. // specified in the struct field's tag value.
  175. // The "codec" key in struct field's tag value is the key name,
  176. // followed by an optional comma and options.
  177. //
  178. // To set an option on all fields (e.g. omitempty on all fields), you
  179. // can create a field called _struct, and set flags on it.
  180. //
  181. // Examples:
  182. //
  183. // type MyStruct struct {
  184. // _struct bool `codec:",omitempty"` //set omitempty for every field
  185. // Field1 string `codec:"-"` //skip this field
  186. // Field2 int `codec:"myName"` //Use key "myName" in encode stream
  187. // Field3 int32 `codec:",omitempty"` //use key "Field3". Omit if empty.
  188. // Field4 bool `codec:"f4,omitempty"` //use key "f4". Omit if empty.
  189. // ...
  190. // }
  191. //
  192. // Note:
  193. // - Encode will treat struct field names and keys in map[string]XXX as symbols.
  194. // Some formats support symbols (e.g. binc) and will properly encode the string
  195. // only once in the stream, and use a tag to refer to it thereafter.
  196. func (e *Encoder) Encode(v interface{}) (err error) {
  197. defer panicToErr(&err)
  198. e.encode(v)
  199. e.w.flush()
  200. return
  201. }
  202. func (e *Encoder) encode(iv interface{}) {
  203. switch v := iv.(type) {
  204. case nil:
  205. e.e.encodeNil()
  206. case reflect.Value:
  207. e.encodeValue(v)
  208. case string:
  209. e.e.encodeString(c_UTF8, v)
  210. case bool:
  211. e.e.encodeBool(v)
  212. case int:
  213. e.e.encodeInt(int64(v))
  214. case int8:
  215. e.e.encodeInt(int64(v))
  216. case int16:
  217. e.e.encodeInt(int64(v))
  218. case int32:
  219. e.e.encodeInt(int64(v))
  220. case int64:
  221. e.e.encodeInt(v)
  222. case uint:
  223. e.e.encodeUint(uint64(v))
  224. case uint8:
  225. e.e.encodeUint(uint64(v))
  226. case uint16:
  227. e.e.encodeUint(uint64(v))
  228. case uint32:
  229. e.e.encodeUint(uint64(v))
  230. case uint64:
  231. e.e.encodeUint(v)
  232. case float32:
  233. e.e.encodeFloat32(v)
  234. case float64:
  235. e.e.encodeFloat64(v)
  236. case *string:
  237. e.e.encodeString(c_UTF8, *v)
  238. case *bool:
  239. e.e.encodeBool(*v)
  240. case *int:
  241. e.e.encodeInt(int64(*v))
  242. case *int8:
  243. e.e.encodeInt(int64(*v))
  244. case *int16:
  245. e.e.encodeInt(int64(*v))
  246. case *int32:
  247. e.e.encodeInt(int64(*v))
  248. case *int64:
  249. e.e.encodeInt(*v)
  250. case *uint:
  251. e.e.encodeUint(uint64(*v))
  252. case *uint8:
  253. e.e.encodeUint(uint64(*v))
  254. case *uint16:
  255. e.e.encodeUint(uint64(*v))
  256. case *uint32:
  257. e.e.encodeUint(uint64(*v))
  258. case *uint64:
  259. e.e.encodeUint(*v)
  260. case *float32:
  261. e.e.encodeFloat32(*v)
  262. case *float64:
  263. e.e.encodeFloat64(*v)
  264. default:
  265. e.encodeValue(reflect.ValueOf(iv))
  266. }
  267. }
  268. func (e *Encoder) encodeValue(rv reflect.Value) {
  269. rt := rv.Type()
  270. //encode based on type first, since over-rides are based on type.
  271. ee := e.e //don't dereference everytime
  272. if ee.encodeBuiltinType(rt, rv) {
  273. return
  274. }
  275. //Note: tagFn must handle returning nil if value should be encoded as a nil.
  276. if xfTag, xfFn := e.h.getEncodeExt(rt); xfFn != nil {
  277. bs, fnerr := xfFn(rv)
  278. if fnerr != nil {
  279. panic(fnerr)
  280. }
  281. if bs == nil {
  282. ee.encodeNil()
  283. return
  284. }
  285. if e.h.writeExt() {
  286. ee.encodeExtPreamble(xfTag, len(bs))
  287. e.w.writeb(bs)
  288. } else {
  289. ee.encodeStringBytes(c_RAW, bs)
  290. }
  291. return
  292. }
  293. // ensure more common cases appear early in switch.
  294. rk := rv.Kind()
  295. switch rk {
  296. case reflect.Bool:
  297. ee.encodeBool(rv.Bool())
  298. case reflect.String:
  299. ee.encodeString(c_UTF8, rv.String())
  300. case reflect.Float64:
  301. ee.encodeFloat64(rv.Float())
  302. case reflect.Float32:
  303. ee.encodeFloat32(float32(rv.Float()))
  304. case reflect.Slice:
  305. if rv.IsNil() {
  306. ee.encodeNil()
  307. break
  308. }
  309. if rt == byteSliceTyp {
  310. ee.encodeStringBytes(c_RAW, rv.Bytes())
  311. break
  312. }
  313. l := rv.Len()
  314. ee.encodeArrayPreamble(l)
  315. if l == 0 {
  316. break
  317. }
  318. for j := 0; j < l; j++ {
  319. e.encodeValue(rv.Index(j))
  320. }
  321. case reflect.Array:
  322. e.encodeValue(rv.Slice(0, rv.Len()))
  323. case reflect.Map:
  324. if rv.IsNil() {
  325. ee.encodeNil()
  326. break
  327. }
  328. l := rv.Len()
  329. ee.encodeMapPreamble(l)
  330. if l == 0 {
  331. break
  332. }
  333. keyTypeIsString := rt.Key().Kind() == reflect.String
  334. mks := rv.MapKeys()
  335. // for j, lmks := 0, len(mks); j < lmks; j++ {
  336. for j := range mks {
  337. if keyTypeIsString {
  338. ee.encodeSymbol(mks[j].String())
  339. } else {
  340. e.encodeValue(mks[j])
  341. }
  342. e.encodeValue(rv.MapIndex(mks[j]))
  343. }
  344. case reflect.Struct:
  345. e.encStruct(rt, rv)
  346. case reflect.Ptr:
  347. if rv.IsNil() {
  348. ee.encodeNil()
  349. break
  350. }
  351. e.encodeValue(rv.Elem())
  352. case reflect.Interface:
  353. if rv.IsNil() {
  354. ee.encodeNil()
  355. break
  356. }
  357. e.encodeValue(rv.Elem())
  358. case reflect.Int, reflect.Int8, reflect.Int64, reflect.Int32, reflect.Int16:
  359. ee.encodeInt(rv.Int())
  360. case reflect.Uint8, reflect.Uint64, reflect.Uint, reflect.Uint32, reflect.Uint16:
  361. ee.encodeUint(rv.Uint())
  362. case reflect.Invalid:
  363. ee.encodeNil()
  364. default:
  365. encErr("Unsupported kind: %s, for: %#v", rk, rv)
  366. }
  367. return
  368. }
  369. func (e *Encoder) encStruct(rt reflect.Type, rv reflect.Value) {
  370. sis := getStructFieldInfos(rt)
  371. newlen := len(sis)
  372. rvals := make([]reflect.Value, newlen)
  373. encnames := make([]string, newlen)
  374. newlen = 0
  375. // var rv0 reflect.Value
  376. // for i := 0; i < l; i++ {
  377. // si := sis[i]
  378. for _, si := range sis {
  379. if si.i > -1 {
  380. rvals[newlen] = rv.Field(int(si.i))
  381. } else {
  382. rvals[newlen] = rv.FieldByIndex(si.is)
  383. }
  384. if si.omitEmpty && isEmptyValue(rvals[newlen]) {
  385. continue
  386. }
  387. // sivals[newlen] = i
  388. encnames[newlen] = si.encName
  389. newlen++
  390. }
  391. ee := e.e //don't dereference everytime
  392. ee.encodeMapPreamble(newlen)
  393. for j := 0; j < newlen; j++ {
  394. //e.encString(sis[sivals[j]].encName)
  395. ee.encodeSymbol(encnames[j])
  396. e.encodeValue(rvals[j])
  397. }
  398. }
  399. // ----------------------------------------
  400. func (z *ioEncWriter) writeUint16(v uint16) {
  401. bigen.PutUint16(z.x[:2], v)
  402. z.writeb(z.x[:2])
  403. }
  404. func (z *ioEncWriter) writeUint32(v uint32) {
  405. bigen.PutUint32(z.x[:4], v)
  406. z.writeb(z.x[:4])
  407. }
  408. func (z *ioEncWriter) writeUint64(v uint64) {
  409. bigen.PutUint64(z.x[:8], v)
  410. z.writeb(z.x[:8])
  411. }
  412. func (z *ioEncWriter) writeb(bs []byte) {
  413. n, err := z.w.Write(bs)
  414. if err != nil {
  415. panic(err)
  416. }
  417. if n != len(bs) {
  418. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(bs), n)
  419. }
  420. }
  421. func (z *ioEncWriter) writestr(s string) {
  422. n, err := z.w.WriteString(s)
  423. if err != nil {
  424. panic(err)
  425. }
  426. if n != len(s) {
  427. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(s), n)
  428. }
  429. }
  430. func (z *ioEncWriter) writen1(b byte) {
  431. if err := z.w.WriteByte(b); err != nil {
  432. panic(err)
  433. }
  434. }
  435. func (z *ioEncWriter) writen2(b1 byte, b2 byte) {
  436. z.writen1(b1)
  437. z.writen1(b2)
  438. }
  439. func (z *ioEncWriter) writen3(b1, b2, b3 byte) {
  440. z.writen1(b1)
  441. z.writen1(b2)
  442. z.writen1(b3)
  443. }
  444. func (z *ioEncWriter) writen4(b1, b2, b3, b4 byte) {
  445. z.writen1(b1)
  446. z.writen1(b2)
  447. z.writen1(b3)
  448. z.writen1(b4)
  449. }
  450. func (z *ioEncWriter) flush() {
  451. if f, ok := z.w.(ioEncWriterFlusher); ok {
  452. if err := f.Flush(); err != nil {
  453. panic(err)
  454. }
  455. }
  456. }
  457. // ----------------------------------------
  458. func (z *bytesEncWriter) writeUint16(v uint16) {
  459. c := z.grow(2)
  460. z.b[c] = byte(v >> 8)
  461. z.b[c+1] = byte(v)
  462. }
  463. func (z *bytesEncWriter) writeUint32(v uint32) {
  464. c := z.grow(4)
  465. z.b[c] = byte(v >> 24)
  466. z.b[c+1] = byte(v >> 16)
  467. z.b[c+2] = byte(v >> 8)
  468. z.b[c+3] = byte(v)
  469. }
  470. func (z *bytesEncWriter) writeUint64(v uint64) {
  471. c := z.grow(8)
  472. z.b[c] = byte(v >> 56)
  473. z.b[c+1] = byte(v >> 48)
  474. z.b[c+2] = byte(v >> 40)
  475. z.b[c+3] = byte(v >> 32)
  476. z.b[c+4] = byte(v >> 24)
  477. z.b[c+5] = byte(v >> 16)
  478. z.b[c+6] = byte(v >> 8)
  479. z.b[c+7] = byte(v)
  480. }
  481. func (z *bytesEncWriter) writeb(s []byte) {
  482. c := z.grow(len(s))
  483. copy(z.b[c:], s)
  484. }
  485. func (z *bytesEncWriter) writestr(s string) {
  486. c := z.grow(len(s))
  487. copy(z.b[c:], s)
  488. }
  489. func (z *bytesEncWriter) writen1(b1 byte) {
  490. c := z.grow(1)
  491. z.b[c] = b1
  492. }
  493. func (z *bytesEncWriter) writen2(b1 byte, b2 byte) {
  494. c := z.grow(2)
  495. z.b[c] = b1
  496. z.b[c+1] = b2
  497. }
  498. func (z *bytesEncWriter) writen3(b1 byte, b2 byte, b3 byte) {
  499. c := z.grow(3)
  500. z.b[c] = b1
  501. z.b[c+1] = b2
  502. z.b[c+2] = b3
  503. }
  504. func (z *bytesEncWriter) writen4(b1 byte, b2 byte, b3 byte, b4 byte) {
  505. c := z.grow(4)
  506. z.b[c] = b1
  507. z.b[c+1] = b2
  508. z.b[c+2] = b3
  509. z.b[c+3] = b4
  510. }
  511. func (z *bytesEncWriter) flush() {
  512. *(z.out) = z.b[:z.c]
  513. }
  514. func (z *bytesEncWriter) grow(n int) (oldcursor int) {
  515. oldcursor = z.c
  516. z.c = oldcursor + n
  517. if z.c > cap(z.b) {
  518. // It tried using appendslice logic: (if cap < 1024, *2, else *1.25).
  519. // However, it was too expensive, causing too many iterations of copy.
  520. // Using bytes.Buffer model was much better (2*cap + n)
  521. bs := make([]byte, 2*cap(z.b)+n)
  522. copy(bs, z.b[:oldcursor])
  523. z.b = bs
  524. } else if z.c > len(z.b) {
  525. z.b = z.b[:cap(z.b)]
  526. }
  527. return
  528. }
  529. // ----------------------------------------
  530. func encErr(format string, params ...interface{}) {
  531. doPanic(msgTagEnc, format, params...)
  532. }