encode.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. // Copyright (c) 2012, 2013 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license found in the LICENSE file.
  3. package codec
  4. import (
  5. "io"
  6. "bufio"
  7. "reflect"
  8. "math"
  9. "time"
  10. //"fmt"
  11. )
  12. //var _ = fmt.Printf
  13. const (
  14. // Some tagging information for error messages.
  15. msgTagEnc = "codec.encoder"
  16. defEncByteBufSize = 1 << 6 // 4:16, 6:64, 8:256, 10:1024
  17. // maxTimeSecs32 = math.MaxInt32 / 60 / 24 / 366
  18. )
  19. // encWriter abstracting writing to a byte array or to an io.Writer.
  20. type encWriter interface {
  21. writeUint16(uint16)
  22. writeUint32(uint32)
  23. writeUint64(uint64)
  24. writeb([]byte)
  25. writestr(string)
  26. writen1(byte)
  27. writen2(byte, byte)
  28. writen3(byte, byte, byte)
  29. writen4(byte, byte, byte, byte)
  30. flush()
  31. }
  32. type encoder interface {
  33. encodeBuiltinType(rt reflect.Type, rv reflect.Value) bool
  34. encodeNil()
  35. encodeInt(i int64)
  36. encodeUint(i uint64)
  37. encodeBool(b bool)
  38. encodeFloat32(f float32)
  39. encodeFloat64(f float64)
  40. encodeExtPreamble(xtag byte, length int)
  41. encodeArrayPreamble(length int)
  42. encodeMapPreamble(length int)
  43. encodeString(c charEncoding, v string)
  44. encodeSymbol(v string)
  45. encodeStringBytes(c charEncoding, v []byte)
  46. //TODO
  47. //encBignum(f *big.Int)
  48. //encStringRunes(c charEncoding, v []rune)
  49. }
  50. type newEncoderFunc func(w encWriter) encoder
  51. type encodeHandleI interface {
  52. getEncodeExt(rt reflect.Type) (tag byte, fn func(reflect.Value) ([]byte, error))
  53. newEncoder(w encWriter) encoder
  54. writeExt() bool
  55. }
  56. // An Encoder writes an object to an output stream in the codec format.
  57. type Encoder struct {
  58. w encWriter
  59. e encoder
  60. h encodeHandleI
  61. }
  62. type ioEncWriterWriter interface {
  63. WriteByte(c byte) error
  64. WriteString(s string) (n int, err error)
  65. Write(p []byte) (n int, err error)
  66. }
  67. type ioEncWriterFlusher interface {
  68. Flush() error
  69. }
  70. // ioEncWriter implements encWriter and can write to an io.Writer implementation
  71. type ioEncWriter struct {
  72. w ioEncWriterWriter
  73. x [8]byte // temp byte array re-used internally for efficiency
  74. }
  75. // bytesEncWriter implements encWriter and can write to an byte slice.
  76. // It is used by Marshal function.
  77. type bytesEncWriter struct {
  78. b []byte
  79. c int // cursor
  80. out *[]byte // write out on flush
  81. }
  82. type encExtTagFn struct {
  83. fn func(reflect.Value) ([]byte, error)
  84. tag byte
  85. }
  86. type encExtTypeTagFn struct {
  87. rt reflect.Type
  88. encExtTagFn
  89. }
  90. // EncoderOptions contain options for the encoder, e.g. registered extension functions.
  91. type encHandle struct {
  92. extFuncs map[reflect.Type] encExtTagFn
  93. exts []encExtTypeTagFn
  94. }
  95. // addEncodeExt registers a function to handle encoding a given type as an extension
  96. // with a specific specific tag byte.
  97. // To remove an extension, pass fn=nil.
  98. func (o *encHandle) addEncodeExt(rt reflect.Type, tag byte, fn func(reflect.Value) ([]byte, error)) {
  99. if o.exts == nil {
  100. o.exts = make([]encExtTypeTagFn, 0, 8)
  101. o.extFuncs = make(map[reflect.Type] encExtTagFn, 8)
  102. }
  103. delete(o.extFuncs, rt)
  104. if fn != nil {
  105. o.extFuncs[rt] = encExtTagFn{fn, tag}
  106. }
  107. if leno := len(o.extFuncs); leno > cap(o.exts) {
  108. o.exts = make([]encExtTypeTagFn, leno, (leno * 3 / 2))
  109. } else {
  110. o.exts = o.exts[0:leno]
  111. }
  112. var i int
  113. for k, v := range o.extFuncs {
  114. o.exts[i] = encExtTypeTagFn {k, v}
  115. i++
  116. }
  117. }
  118. func (o *encHandle) getEncodeExt(rt reflect.Type) (tag byte, fn func(reflect.Value) ([]byte, error)) {
  119. // For >= 5 elements, map constant cost less than iteration cost.
  120. // This is because reflect.Type equality cost is pretty high
  121. if l := len(o.exts); l == 0 {
  122. return
  123. } else if l < mapAccessThreshold {
  124. for i := 0; i < l; i++ {
  125. if o.exts[i].rt == rt {
  126. x := o.exts[i].encExtTagFn
  127. return x.tag, x.fn
  128. }
  129. }
  130. } else {
  131. x := o.extFuncs[rt]
  132. return x.tag, x.fn
  133. }
  134. return
  135. }
  136. // NewEncoder returns an Encoder for encoding into an io.Writer.
  137. // For efficiency, Users are encouraged to pass in a memory buffered writer
  138. // (eg bufio.Writer, bytes.Buffer). This implementation *may* use one internally.
  139. func NewEncoder(w io.Writer, h Handle) (*Encoder) {
  140. ww, ok := w.(ioEncWriterWriter)
  141. if !ok {
  142. ww = bufio.NewWriterSize(w, defEncByteBufSize)
  143. }
  144. z := ioEncWriter {
  145. w: ww,
  146. }
  147. return &Encoder { w: &z, h: h, e: h.newEncoder(&z) }
  148. }
  149. // NewEncoderBytes returns an encoder for encoding directly and efficiently
  150. // into a byte slice, using zero-copying to temporary slices.
  151. //
  152. // It will potentially replace the output byte slice pointed to.
  153. // After encoding, the out parameter contains the encoded contents.
  154. func NewEncoderBytes(out *[]byte, h Handle) (*Encoder) {
  155. in := *out
  156. if in == nil {
  157. in = make([]byte, defEncByteBufSize)
  158. }
  159. z := bytesEncWriter {
  160. b: in,
  161. out: out,
  162. }
  163. return &Encoder { w: &z, h: h, e: h.newEncoder(&z) }
  164. }
  165. // Encode writes an object into a stream in the codec format.
  166. //
  167. // Struct values encode as maps. Each exported struct field is encoded unless:
  168. // - the field's tag is "-", or
  169. // - the field is empty and its tag specifies the "omitempty" option.
  170. //
  171. // The empty values are false, 0, any nil pointer or interface value,
  172. // and any array, slice, map, or string of length zero.
  173. //
  174. // Anonymous fields are encoded inline if no struct tag is present.
  175. // Else they are encoded as regular fields.
  176. //
  177. // The object's default key string is the struct field name but can be
  178. // specified in the struct field's tag value.
  179. // The "codec" key in struct field's tag value is the key name,
  180. // followed by an optional comma and options.
  181. //
  182. // To set an option on all fields (e.g. omitempty on all fields), you
  183. // can create a field called _struct, and set flags on it.
  184. //
  185. // Examples:
  186. //
  187. // type MyStruct struct {
  188. // _struct bool `codec:",omitempty"` //set omitempty for every field
  189. // Field1 string `codec:"-"` //skip this field
  190. // Field2 int `codec:"myName"` //Use key "myName" in encode stream
  191. // Field3 int32 `codec:",omitempty"` //use key "Field3". Omit if empty.
  192. // Field4 bool `codec:"f4,omitempty"` //use key "f4". Omit if empty.
  193. // ...
  194. // }
  195. //
  196. // Note:
  197. // - Encode will treat struct field names and keys in map[string]XXX as symbols.
  198. // Some formats support symbols (e.g. binc) and will properly encode the string
  199. // only once in the stream, and use a tag to refer to it thereafter.
  200. func (e *Encoder) Encode(v interface{}) (err error) {
  201. defer panicToErr(&err)
  202. e.encode(v)
  203. e.w.flush()
  204. return
  205. }
  206. func (e *Encoder) encode(iv interface{}) {
  207. switch v := iv.(type) {
  208. case nil:
  209. e.e.encodeNil()
  210. case reflect.Value:
  211. e.encodeValue(v)
  212. case string:
  213. e.e.encodeString(c_UTF8, v)
  214. case bool:
  215. e.e.encodeBool(v)
  216. case int:
  217. e.e.encodeInt(int64(v))
  218. case int8:
  219. e.e.encodeInt(int64(v))
  220. case int16:
  221. e.e.encodeInt(int64(v))
  222. case int32:
  223. e.e.encodeInt(int64(v))
  224. case int64:
  225. e.e.encodeInt(v)
  226. case uint:
  227. e.e.encodeUint(uint64(v))
  228. case uint8:
  229. e.e.encodeUint(uint64(v))
  230. case uint16:
  231. e.e.encodeUint(uint64(v))
  232. case uint32:
  233. e.e.encodeUint(uint64(v))
  234. case uint64:
  235. e.e.encodeUint(v)
  236. case float32:
  237. e.e.encodeFloat32(v)
  238. case float64:
  239. e.e.encodeFloat64(v)
  240. case *string:
  241. e.e.encodeString(c_UTF8, *v)
  242. case *bool:
  243. e.e.encodeBool(*v)
  244. case *int:
  245. e.e.encodeInt(int64(*v))
  246. case *int8:
  247. e.e.encodeInt(int64(*v))
  248. case *int16:
  249. e.e.encodeInt(int64(*v))
  250. case *int32:
  251. e.e.encodeInt(int64(*v))
  252. case *int64:
  253. e.e.encodeInt(*v)
  254. case *uint:
  255. e.e.encodeUint(uint64(*v))
  256. case *uint8:
  257. e.e.encodeUint(uint64(*v))
  258. case *uint16:
  259. e.e.encodeUint(uint64(*v))
  260. case *uint32:
  261. e.e.encodeUint(uint64(*v))
  262. case *uint64:
  263. e.e.encodeUint(*v)
  264. case *float32:
  265. e.e.encodeFloat32(*v)
  266. case *float64:
  267. e.e.encodeFloat64(*v)
  268. default:
  269. e.encodeValue(reflect.ValueOf(iv))
  270. }
  271. }
  272. func (e *Encoder) encodeValue(rv reflect.Value) {
  273. rt := rv.Type()
  274. //encode based on type first, since over-rides are based on type.
  275. ee := e.e //don't dereference everytime
  276. if ee.encodeBuiltinType(rt, rv) {
  277. return
  278. }
  279. //Note: tagFn must handle returning nil if value should be encoded as a nil.
  280. if xfTag, xfFn := e.h.getEncodeExt(rt); xfFn != nil {
  281. bs, fnerr := xfFn(rv)
  282. if fnerr != nil {
  283. panic(fnerr)
  284. }
  285. if bs == nil {
  286. ee.encodeNil()
  287. return
  288. }
  289. if e.h.writeExt() {
  290. ee.encodeExtPreamble(xfTag, len(bs))
  291. e.w.writeb(bs)
  292. } else {
  293. ee.encodeStringBytes(c_RAW, bs)
  294. }
  295. return
  296. }
  297. // ensure more common cases appear early in switch.
  298. rk := rv.Kind()
  299. switch rk {
  300. case reflect.Bool:
  301. ee.encodeBool(rv.Bool())
  302. case reflect.String:
  303. ee.encodeString(c_UTF8, rv.String())
  304. case reflect.Float64:
  305. ee.encodeFloat64(rv.Float())
  306. case reflect.Float32:
  307. ee.encodeFloat32(float32(rv.Float()))
  308. case reflect.Slice:
  309. if rv.IsNil() {
  310. ee.encodeNil()
  311. break
  312. }
  313. if rt == byteSliceTyp {
  314. ee.encodeStringBytes(c_RAW, rv.Bytes())
  315. break
  316. }
  317. l := rv.Len()
  318. ee.encodeArrayPreamble(l)
  319. if l == 0 {
  320. break
  321. }
  322. for j := 0; j < l; j++ {
  323. e.encodeValue(rv.Index(j))
  324. }
  325. case reflect.Array:
  326. e.encodeValue(rv.Slice(0, rv.Len()))
  327. case reflect.Map:
  328. if rv.IsNil() {
  329. ee.encodeNil()
  330. break
  331. }
  332. l := rv.Len()
  333. ee.encodeMapPreamble(l)
  334. if l == 0 {
  335. break
  336. }
  337. keyTypeIsString := rt.Key().Kind() == reflect.String
  338. mks := rv.MapKeys()
  339. // for j, lmks := 0, len(mks); j < lmks; j++ {
  340. for j := range mks {
  341. if keyTypeIsString {
  342. ee.encodeSymbol(mks[j].String())
  343. } else {
  344. e.encodeValue(mks[j])
  345. }
  346. e.encodeValue(rv.MapIndex(mks[j]))
  347. }
  348. case reflect.Struct:
  349. e.encStruct(rt, rv)
  350. case reflect.Ptr:
  351. if rv.IsNil() {
  352. ee.encodeNil()
  353. break
  354. }
  355. e.encodeValue(rv.Elem())
  356. case reflect.Interface:
  357. if rv.IsNil() {
  358. ee.encodeNil()
  359. break
  360. }
  361. e.encodeValue(rv.Elem())
  362. case reflect.Int, reflect.Int8, reflect.Int64, reflect.Int32, reflect.Int16:
  363. ee.encodeInt(rv.Int())
  364. case reflect.Uint8, reflect.Uint64, reflect.Uint, reflect.Uint32, reflect.Uint16:
  365. ee.encodeUint(rv.Uint())
  366. case reflect.Invalid:
  367. ee.encodeNil()
  368. default:
  369. encErr("Unsupported kind: %s, for: %#v", rk, rv)
  370. }
  371. return
  372. }
  373. func (e *Encoder) encStruct(rt reflect.Type, rv reflect.Value) {
  374. sis := getStructFieldInfos(rt)
  375. newlen := len(sis)
  376. rvals := make([]reflect.Value, newlen)
  377. encnames := make([]string, newlen)
  378. newlen = 0
  379. // var rv0 reflect.Value
  380. // for i := 0; i < l; i++ {
  381. // si := sis[i]
  382. for _, si := range sis {
  383. if si.i > -1 {
  384. rvals[newlen] = rv.Field(int(si.i))
  385. } else {
  386. rvals[newlen] = rv.FieldByIndex(si.is)
  387. }
  388. if si.omitEmpty && isEmptyValue(rvals[newlen]) {
  389. continue
  390. }
  391. // sivals[newlen] = i
  392. encnames[newlen] = si.encName
  393. newlen++
  394. }
  395. ee := e.e //don't dereference everytime
  396. ee.encodeMapPreamble(newlen)
  397. for j := 0; j < newlen; j++ {
  398. //e.encString(sis[sivals[j]].encName)
  399. ee.encodeSymbol(encnames[j])
  400. e.encodeValue(rvals[j])
  401. }
  402. }
  403. // ----------------------------------------
  404. func (z *ioEncWriter) writeUint16(v uint16) {
  405. bigen.PutUint16(z.x[:2], v)
  406. z.writeb(z.x[:2])
  407. }
  408. func (z *ioEncWriter) writeUint32(v uint32) {
  409. bigen.PutUint32(z.x[:4], v)
  410. z.writeb(z.x[:4])
  411. }
  412. func (z *ioEncWriter) writeUint64(v uint64) {
  413. bigen.PutUint64(z.x[:8], v)
  414. z.writeb(z.x[:8])
  415. }
  416. func (z *ioEncWriter) writeb(bs []byte) {
  417. n, err := z.w.Write(bs)
  418. if err != nil {
  419. panic(err)
  420. }
  421. if n != len(bs) {
  422. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(bs), n)
  423. }
  424. }
  425. func (z *ioEncWriter) writestr(s string) {
  426. n, err := z.w.WriteString(s)
  427. if err != nil {
  428. panic(err)
  429. }
  430. if n != len(s) {
  431. doPanic(msgTagEnc, "write: Incorrect num bytes written. Expecting: %v, Wrote: %v", len(s), n)
  432. }
  433. }
  434. func (z *ioEncWriter) writen1(b byte) {
  435. if err := z.w.WriteByte(b); err != nil {
  436. panic(err)
  437. }
  438. }
  439. func (z *ioEncWriter) writen2(b1 byte, b2 byte) {
  440. z.writen1(b1)
  441. z.writen1(b2)
  442. }
  443. func (z *ioEncWriter) writen3(b1, b2, b3 byte) {
  444. z.writen1(b1)
  445. z.writen1(b2)
  446. z.writen1(b3)
  447. }
  448. func (z *ioEncWriter) writen4(b1, b2, b3, b4 byte) {
  449. z.writen1(b1)
  450. z.writen1(b2)
  451. z.writen1(b3)
  452. z.writen1(b4)
  453. }
  454. func (z *ioEncWriter) flush() {
  455. if f, ok := z.w.(ioEncWriterFlusher); ok {
  456. if err := f.Flush(); err != nil {
  457. panic(err)
  458. }
  459. }
  460. }
  461. // ----------------------------------------
  462. func (z *bytesEncWriter) writeUint16(v uint16) {
  463. c := z.grow(2)
  464. z.b[c] = byte(v >> 8)
  465. z.b[c + 1] = byte(v)
  466. }
  467. func (z *bytesEncWriter) writeUint32(v uint32) {
  468. c := z.grow(4)
  469. z.b[c] = byte(v >> 24)
  470. z.b[c + 1] = byte(v >> 16)
  471. z.b[c + 2] = byte(v >> 8)
  472. z.b[c + 3] = byte(v)
  473. }
  474. func (z *bytesEncWriter) writeUint64(v uint64) {
  475. c := z.grow(8)
  476. z.b[c] = byte(v >> 56)
  477. z.b[c + 1] = byte(v >> 48)
  478. z.b[c + 2] = byte(v >> 40)
  479. z.b[c + 3] = byte(v >> 32)
  480. z.b[c + 4] = byte(v >> 24)
  481. z.b[c + 5] = byte(v >> 16)
  482. z.b[c + 6] = byte(v >> 8)
  483. z.b[c + 7] = byte(v)
  484. }
  485. func (z *bytesEncWriter) writeb(s []byte) {
  486. c := z.grow(len(s))
  487. copy(z.b[c:], s)
  488. }
  489. func (z *bytesEncWriter) writestr(s string) {
  490. c := z.grow(len(s))
  491. copy(z.b[c:], s)
  492. }
  493. func (z *bytesEncWriter) writen1(b1 byte) {
  494. c := z.grow(1)
  495. z.b[c] = b1
  496. }
  497. func (z *bytesEncWriter) writen2(b1 byte, b2 byte) {
  498. c := z.grow(2)
  499. z.b[c] = b1
  500. z.b[c + 1] = b2
  501. }
  502. func (z *bytesEncWriter) writen3(b1 byte, b2 byte, b3 byte) {
  503. c := z.grow(3)
  504. z.b[c] = b1
  505. z.b[c + 1] = b2
  506. z.b[c + 2] = b3
  507. }
  508. func (z *bytesEncWriter) writen4(b1 byte, b2 byte, b3 byte, b4 byte) {
  509. c := z.grow(4)
  510. z.b[c] = b1
  511. z.b[c + 1] = b2
  512. z.b[c + 2] = b3
  513. z.b[c + 3] = b4
  514. }
  515. func (z *bytesEncWriter) flush() {
  516. *(z.out) = z.b[:z.c]
  517. }
  518. func (z *bytesEncWriter) grow(n int) (oldcursor int) {
  519. oldcursor = z.c
  520. z.c = oldcursor + n
  521. if z.c > cap(z.b) {
  522. // It tried using appendslice logic: (if cap < 1024, *2, else *1.25).
  523. // However, it was too expensive, causing too many iterations of copy.
  524. // Using bytes.Buffer model was much better (2*cap + n)
  525. bs := make([]byte, 2*cap(z.b)+n)
  526. copy(bs, z.b[:oldcursor])
  527. z.b = bs
  528. } else if z.c > len(z.b) {
  529. z.b = z.b[:cap(z.b)]
  530. }
  531. return
  532. }
  533. // ----------------------------------------
  534. func encErr(format string, params ...interface{}) {
  535. doPanic(msgTagEnc, format, params...)
  536. }
  537. // EncodeTimeExt encodes a time.Time as a []byte, including
  538. // information on the instant in time and UTC offset.
  539. func encodeTime(t time.Time) ([]byte) {
  540. //t := rv.Interface().(time.Time)
  541. tsecs, tnsecs := t.Unix(), t.Nanosecond()
  542. var padzero bool
  543. var bs [14]byte
  544. var i int
  545. l := t.Location()
  546. if l == time.UTC {
  547. l = nil
  548. }
  549. if tsecs > math.MinInt32 && tsecs < math.MaxInt32 {
  550. bigen.PutUint32(bs[i:], uint32(int32(tsecs)))
  551. i = i + 4
  552. } else {
  553. bigen.PutUint64(bs[i:], uint64(tsecs))
  554. i = i + 8
  555. padzero = (tnsecs == 0)
  556. }
  557. if tnsecs != 0 {
  558. bigen.PutUint32(bs[i:], uint32(tnsecs))
  559. i = i + 4
  560. }
  561. if l != nil {
  562. // Note that Go Libs do not give access to dst flag.
  563. _, zoneOffset := t.Zone()
  564. //zoneName, zoneOffset := t.Zone()
  565. //fmt.Printf(">>>>>> ENC: zone: %s, %v\n", zoneName, zoneOffset)
  566. zoneOffset /= 60
  567. isNeg := zoneOffset < 0
  568. if isNeg {
  569. zoneOffset = -zoneOffset
  570. }
  571. var z uint16 = uint16(zoneOffset)
  572. if isNeg {
  573. z |= 1 << 15 //set sign bit
  574. }
  575. //fmt.Printf(">>>>>> ENC: z: %b\n", z)
  576. bigen.PutUint16(bs[i:], z)
  577. i = i + 2
  578. }
  579. if padzero {
  580. i = i + 1
  581. }
  582. //fmt.Printf(">>>> EncodeTimeExt: t: %v, len: %v, v: %v\n", t, i, bs[0:i])
  583. return bs[0:i]
  584. }