Jelajahi Sumber

codec: refactoring to set us up for mid-stack inlining performance improvements in 1.10

Ugorji Nwoke 8 tahun lalu
induk
melakukan
7a1de86227
5 mengubah file dengan 262 tambahan dan 89 penghapusan
  1. 8 0
      codec/0doc.go
  2. 91 13
      codec/decode.go
  3. 151 66
      codec/encode.go
  4. 6 6
      codec/json.go
  5. 6 4
      codec/z_all_test.go

+ 8 - 0
codec/0doc.go

@@ -218,3 +218,11 @@ with some caveats. See Encode documentation.
 
 */
 package codec
+
+// TODO:
+//   - In Go 1.10, when mid-stack inlining is enabled,
+//     we should use committed functions for writeXXX and readXXX calls.
+//     This involves uncommenting the methods for decReaderSwitch and encWriterSwitch
+//     and using those (decReaderSwitch and encWriterSwitch in all handles
+//     instead of encWriter and decReader.
+//

+ 91 - 13
codec/decode.go

@@ -1682,6 +1682,87 @@ type rtid2rv struct {
 	rv   reflect.Value
 }
 
+// --------------
+
+type decReaderSwitch struct {
+	rb bytesDecReader
+	// ---- cpu cache line boundary?
+	ri       ioDecReader
+	mtr, str bool // whether maptype or slicetype are known types
+
+	be    bool // is binary encoding
+	bytes bool // is bytes reader
+	js    bool // is json handle
+	jsms  bool // is json handle, and MapKeyAsString
+	esep  bool // has elem separators
+}
+
+// TODO: Uncomment after mid-stack inlining enabled in go 1.10
+//
+// func (z *decReaderSwitch) unreadn1() {
+// 	if z.bytes {
+// 		z.rb.unreadn1()
+// 	} else {
+// 		z.ri.unreadn1()
+// 	}
+// }
+// func (z *decReaderSwitch) readx(n int) []byte {
+// 	if z.bytes {
+// 		return z.rb.readx(n)
+// 	}
+// 	return z.ri.readx(n)
+// }
+// func (z *decReaderSwitch) readb(s []byte) {
+// 	if z.bytes {
+// 		z.rb.readb(s)
+// 	} else {
+// 		z.ri.readb(s)
+// 	}
+// }
+// func (z *decReaderSwitch) readn1() uint8 {
+// 	if z.bytes {
+// 		return z.rb.readn1()
+// 	}
+// 	return z.ri.readn1()
+// }
+// func (z *decReaderSwitch) numread() int {
+// 	if z.bytes {
+// 		return z.rb.numread()
+// 	}
+// 	return z.ri.numread()
+// }
+// func (z *decReaderSwitch) track() {
+// 	if z.bytes {
+// 		z.rb.track()
+// 	} else {
+// 		z.ri.track()
+// 	}
+// }
+// func (z *decReaderSwitch) stopTrack() []byte {
+// 	if z.bytes {
+// 		return z.rb.stopTrack()
+// 	}
+// 	return z.ri.stopTrack()
+// }
+// func (z *decReaderSwitch) skip(accept *bitset256) (token byte) {
+// 	if z.bytes {
+// 		return z.rb.skip(accept)
+// 	}
+// 	return z.ri.skip(accept)
+// }
+// func (z *decReaderSwitch) readTo(in []byte, accept *bitset256) (out []byte) {
+// 	if z.bytes {
+// 		return z.rb.readTo(in, accept)
+// 	}
+// 	return z.ri.readTo(in, accept)
+// }
+// func (z *decReaderSwitch) readUntil(in []byte, stop byte) (out []byte) {
+// 	if z.bytes {
+// 		return z.rb.readUntil(in, stop)
+// 	}
+// 	return z.ri.readUntil(in, stop)
+// }
+
 // A Decoder reads and decodes an object from an input stream in the codec format.
 type Decoder struct {
 	// hopefully, reduce derefencing cost by laying the decReader inside the Decoder.
@@ -1694,18 +1775,8 @@ type Decoder struct {
 	hh Handle
 	h  *BasicHandle
 
-	mtr, str bool // whether maptype or slicetype are known types
-
-	be    bool // is binary encoding
-	bytes bool // is bytes reader
-	js    bool // is json handle
-	jsms  bool // is json handle, and MapKeyAsString
-	esep  bool // has elem separators
-
-	// ---- cpu cache line boundary?
-	rb bytesDecReader
 	// ---- cpu cache line boundary?
-	ri ioDecReader
+	decReaderSwitch
 	// ---- cpu cache line boundary?
 	bi bufioDecReader
 	// ---- cpu cache line boundary?
@@ -1751,8 +1822,8 @@ func NewDecoderBytes(in []byte, h Handle) *Decoder {
 var defaultDecNaked decNaked
 
 func newDecoder(h Handle) *Decoder {
-	d := &Decoder{hh: h, h: h.getBasicHandle(), be: h.isBinary()}
-
+	d := &Decoder{hh: h, h: h.getBasicHandle()}
+	d.be = h.isBinary()
 	// NOTE: do not initialize d.n here. It is lazily initialized in d.naked()
 	var jh *JsonHandle
 	jh, d.js = h.(*JsonHandle)
@@ -1808,6 +1879,10 @@ func (d *Decoder) resetCommon() {
 // Reset the Decoder with a new Reader to decode from,
 // clearing all state from last run(s).
 func (d *Decoder) Reset(r io.Reader) {
+	if r == nil {
+		return
+	}
+	d.bytes = false
 	if d.h.ReaderBufferSize > 0 {
 		d.bi.buf = make([]byte, 0, d.h.ReaderBufferSize)
 		d.bi.reset(r)
@@ -1824,6 +1899,9 @@ func (d *Decoder) Reset(r io.Reader) {
 // ResetBytes resets the Decoder with a new []byte to decode from,
 // clearing all state from last run(s).
 func (d *Decoder) ResetBytes(in []byte) {
+	if in == nil {
+		return
+	}
 	d.bytes = true
 	d.rb.reset(in)
 	d.r = &d.rb

+ 151 - 66
codec/encode.go

@@ -6,6 +6,7 @@ package codec
 import (
 	"bufio"
 	"encoding"
+	"errors"
 	"fmt"
 	"io"
 	"reflect"
@@ -16,6 +17,8 @@ import (
 
 const defEncByteBufSize = 1 << 6 // 4:16, 6:64, 8:256, 10:1024
 
+var errEncoderNotInitialized = errors.New("Encoder not initialized")
+
 // AsSymbolFlag defines what should be encoded as symbols.
 type AsSymbolFlag uint8
 
@@ -268,76 +271,110 @@ func (z *ioEncWriter) atEndOfEncode() {
 	}
 }
 
-// ----------------------------------------
+// // ----------------------------------------
 
-// bytesEncWriter implements encWriter and can write to an byte slice.
-// It is used by Marshal function.
-type bytesEncWriter struct {
-	b   []byte
-	c   int     // cursor
-	out *[]byte // write out on atEndOfEncode
-}
+// // bytesEncWriter implements encWriter and can write to an byte slice.
+// // It is used by Marshal function.
+// type bytesEncWriter struct {
+// 	b   []byte
+// 	c   int     // cursor
+// 	out *[]byte // write out on atEndOfEncode
+// }
 
-func (z *bytesEncWriter) writeb(s []byte) {
-	oc, a := z.growNoAlloc(len(s))
-	if a {
-		z.growAlloc(len(s), oc)
-	}
-	copy(z.b[oc:], s)
-}
+// func (z *bytesEncWriter) writeb(s []byte) {
+// 	oc, a := z.growNoAlloc(len(s))
+// 	if a {
+// 		z.growAlloc(len(s), oc)
+// 	}
+// 	copy(z.b[oc:], s)
+// }
 
-func (z *bytesEncWriter) writestr(s string) {
-	oc, a := z.growNoAlloc(len(s))
-	if a {
-		z.growAlloc(len(s), oc)
-	}
-	copy(z.b[oc:], s)
-}
+// func (z *bytesEncWriter) writestr(s string) {
+// 	oc, a := z.growNoAlloc(len(s))
+// 	if a {
+// 		z.growAlloc(len(s), oc)
+// 	}
+// 	copy(z.b[oc:], s)
+// }
 
-func (z *bytesEncWriter) writen1(b1 byte) {
-	oc, a := z.growNoAlloc(1)
-	if a {
-		z.growAlloc(1, oc)
-	}
-	z.b[oc] = b1
-}
+// func (z *bytesEncWriter) writen1(b1 byte) {
+// 	oc, a := z.growNoAlloc(1)
+// 	if a {
+// 		z.growAlloc(1, oc)
+// 	}
+// 	z.b[oc] = b1
+// }
 
-func (z *bytesEncWriter) writen2(b1, b2 byte) {
-	oc, a := z.growNoAlloc(2)
-	if a {
-		z.growAlloc(2, oc)
-	}
-	z.b[oc+1] = b2
-	z.b[oc] = b1
-}
+// func (z *bytesEncWriter) writen2(b1, b2 byte) {
+// 	oc, a := z.growNoAlloc(2)
+// 	if a {
+// 		z.growAlloc(2, oc)
+// 	}
+// 	z.b[oc+1] = b2
+// 	z.b[oc] = b1
+// }
 
-func (z *bytesEncWriter) atEndOfEncode() {
-	*(z.out) = z.b[:z.c]
-}
+// func (z *bytesEncWriter) atEndOfEncode() {
+// 	*(z.out) = z.b[:z.c]
+// }
+
+// // have a growNoalloc(n int), which can be inlined.
+// // if allocation is needed, then call growAlloc(n int)
+
+// func (z *bytesEncWriter) growNoAlloc(n int) (oldcursor int, allocNeeded bool) {
+// 	oldcursor = z.c
+// 	z.c = z.c + n
+// 	if z.c > len(z.b) {
+// 		if z.c > cap(z.b) {
+// 			allocNeeded = true
+// 		} else {
+// 			z.b = z.b[:cap(z.b)]
+// 		}
+// 	}
+// 	return
+// }
 
-// have a growNoalloc(n int), which can be inlined.
-// if allocation is needed, then call growAlloc(n int)
+// func (z *bytesEncWriter) growAlloc(n int, oldcursor int) {
+// 	// appendslice logic (if cap < 1024, *2, else *1.25): more expensive. many copy calls.
+// 	// bytes.Buffer model (2*cap + n): much better
+// 	// bs := make([]byte, 2*cap(z.b)+n)
+// 	bs := make([]byte, growCap(cap(z.b), 1, n))
+// 	copy(bs, z.b[:oldcursor])
+// 	z.b = bs
+// }
 
-func (z *bytesEncWriter) growNoAlloc(n int) (oldcursor int, allocNeeded bool) {
-	oldcursor = z.c
-	z.c = z.c + n
-	if z.c > len(z.b) {
-		if z.c > cap(z.b) {
-			allocNeeded = true
-		} else {
-			z.b = z.b[:cap(z.b)]
-		}
-	}
-	return
+// func (z *bytesEncWriter) reset(in []byte, out *[]byte) {
+// 	z.out = out
+// 	z.b = in
+// 	z.c = 0
+// }
+
+// ---------------------------------------------
+
+// bytesEncAppender implements encWriter and can write to an byte slice.
+type bytesEncAppender struct {
+	b   []byte
+	out *[]byte
 }
 
-func (z *bytesEncWriter) growAlloc(n int, oldcursor int) {
-	// appendslice logic (if cap < 1024, *2, else *1.25): more expensive. many copy calls.
-	// bytes.Buffer model (2*cap + n): much better
-	// bs := make([]byte, 2*cap(z.b)+n)
-	bs := make([]byte, growCap(cap(z.b), 1, n))
-	copy(bs, z.b[:oldcursor])
-	z.b = bs
+func (z *bytesEncAppender) writeb(s []byte) {
+	z.b = append(z.b, s...)
+}
+func (z *bytesEncAppender) writestr(s string) {
+	z.b = append(z.b, s...)
+}
+func (z *bytesEncAppender) writen1(b1 byte) {
+	z.b = append(z.b, b1)
+}
+func (z *bytesEncAppender) writen2(b1, b2 byte) {
+	z.b = append(z.b, b1, b2)
+}
+func (z *bytesEncAppender) atEndOfEncode() {
+	*(z.out) = z.b
+}
+func (z *bytesEncAppender) reset(in []byte, out *[]byte) {
+	z.b = in[:0]
+	z.out = out
 }
 
 // ---------------------------------------------
@@ -975,6 +1012,45 @@ func (e *Encoder) kMapCanonical(rtkey reflect.Type, rv reflect.Value, mks []refl
 
 // // --------------------------------------------------
 
+type encWriterSwitch struct {
+	wi ioEncWriter
+	// ---- cpu cache line boundary?
+	// wb bytesEncWriter
+	wb bytesEncAppender
+	wx bool // if bytes, wx=true
+}
+
+// TODO: Uncomment after mid-stack inlining enabled in go 1.10
+//
+// func (z *encWriterSwitch) writeb(s []byte) {
+// 	if z.wx {
+// 		z.wb.writeb(s)
+// 	} else {
+// 		z.wi.writeb(s)
+// 	}
+// }
+// func (z *encWriterSwitch) writestr(s string) {
+// 	if z.wx {
+// 		z.wb.writestr(s)
+// 	} else {
+// 		z.wi.writestr(s)
+// 	}
+// }
+// func (z *encWriterSwitch) writen1(b1 byte) {
+// 	if z.wx {
+// 		z.wb.writen1(b1)
+// 	} else {
+// 		z.wi.writen1(b1)
+// 	}
+// }
+// func (z *encWriterSwitch) writen2(b1, b2 byte) {
+// 	if z.wx {
+// 		z.wb.writen2(b1, b2)
+// 	} else {
+// 		z.wi.writen2(b1, b2)
+// 	}
+// }
+
 // An Encoder writes an object to an output stream in the codec format.
 type Encoder struct {
 	// hopefully, reduce derefencing cost by laying the encWriter inside the Encoder
@@ -994,9 +1070,7 @@ type Encoder struct {
 	ci set
 
 	// ---- cpu cache line boundary?
-	wi ioEncWriter
-	// ---- cpu cache line boundary?
-	wb bytesEncWriter
+	encWriterSwitch
 
 	// ---- cpu cache line boundary?
 	bw bufio.Writer
@@ -1033,7 +1107,7 @@ func NewEncoderBytes(out *[]byte, h Handle) *Encoder {
 }
 
 func newEncoder(h Handle) *Encoder {
-	e := &Encoder{hh: h, h: h.getBasicHandle()}
+	e := &Encoder{hh: h, h: h.getBasicHandle(), err: errEncoderNotInitialized}
 	e.e = h.newEncDriver(e)
 	e.as, _ = e.e.(encDriverAsis)
 	e.esep = e.hh.hasElemSeparators()
@@ -1046,7 +1120,11 @@ func newEncoder(h Handle) *Encoder {
 // This accommodates using the state of the Encoder,
 // where it has "cached" information about sub-engines.
 func (e *Encoder) Reset(w io.Writer) {
+	if w == nil {
+		return
+	}
 	var ok bool
+	e.wx = false
 	e.wi.w = w
 	if e.h.WriterBufferSize > 0 {
 		bw := bufio.NewWriterSize(w, e.h.WriterBufferSize)
@@ -1073,11 +1151,18 @@ func (e *Encoder) Reset(w io.Writer) {
 
 // ResetBytes resets the Encoder with a new destination output []byte.
 func (e *Encoder) ResetBytes(out *[]byte) {
-	in := *out
+	if out == nil {
+		return
+	}
+	var in []byte
+	if out != nil {
+		in = *out
+	}
 	if in == nil {
 		in = make([]byte, defEncByteBufSize)
 	}
-	e.wb.b, e.wb.out, e.wb.c = in, out, 0
+	e.wx = true
+	e.wb.reset(in, out)
 	e.w = &e.wb
 	e.e.reset()
 	e.cf.reset(e.hh)

+ 6 - 6
codec/json.go

@@ -163,7 +163,7 @@ type jsonEncDriver struct {
 	noBuiltInTypes
 	e  *Encoder
 	h  *JsonHandle
-	w  encWriter
+	w  encWriter // encWriter // *encWriterSwitch
 	se setExtWrapper
 	// ---- cpu cache line boundary?
 	ds string // indent string
@@ -513,7 +513,7 @@ type jsonDecDriver struct {
 	noBuiltInTypes
 	d  *Decoder
 	h  *JsonHandle
-	r  decReader
+	r  decReader // *decReaderSwitch // decReader
 	se setExtWrapper
 
 	// ---- writable fields during execution --- *try* to keep in sep cache line
@@ -666,8 +666,8 @@ func (d *jsonDecDriver) TryDecodeAsNil() bool {
 	if d.tok == 0 {
 		d.tok = d.r.skip(&jsonCharWhitespaceSet)
 	}
-	// TODO: we shouldn't try to see if "null" was here, right?
-	// only "null" denotes a nil
+	// we shouldn't try to see if "null" was here, right?
+	// only the plain string: `null` denotes a nil (ie not quotes)
 	if d.tok == 'n' {
 		d.readLit(3, jsonLitNull+1) // (n)ull
 		return true
@@ -1174,7 +1174,7 @@ func (h *JsonHandle) newDecDriver(d *Decoder) decDriver {
 }
 
 func (e *jsonEncDriver) reset() {
-	e.w = e.e.w
+	e.w = e.e.w // e.e.w // &e.e.encWriterSwitch
 	e.se.i = e.h.RawBytesExt
 	if e.bs != nil {
 		e.bs = e.bs[:0]
@@ -1192,7 +1192,7 @@ func (e *jsonEncDriver) reset() {
 }
 
 func (d *jsonDecDriver) reset() {
-	d.r = d.d.r
+	d.r = d.d.r // &d.d.decReaderSwitch // d.d.r
 	d.se.i = d.h.RawBytesExt
 	if d.bs != nil {
 		d.bs = d.bs[:0]

+ 6 - 4
codec/z_all_test.go

@@ -44,6 +44,7 @@ func testGroupResetFlags() {
 	testEncodeOptions.RecursiveEmptyCheck = false
 	testDecodeOptions.MapValueReset = false
 	testUseIoEncDec = -1
+	testDepth = 0
 }
 
 func testSuite(t *testing.T, f func(t *testing.T)) {
@@ -72,6 +73,11 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 	testReinit()
 	t.Run("optionsTrue", f)
 
+	testDepth = 6
+	testReinit()
+	t.Run("optionsTrue-deepstruct", f)
+	testDepth = 0
+
 	testEncodeOptions.AsSymbols = AsSymbolAll
 	testUseIoWrapper = true
 	testReinit()
@@ -79,10 +85,6 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 
 	testUseIoEncDec = -1
 
-	testDepth = 6
-	testReinit()
-	t.Run("optionsTrue-deepstruct", f)
-
 	// make buffer small enough so that we have to re-fill multiple times.
 	testSkipRPCTests = true
 	testUseIoEncDec = 128