|
|
@@ -15,6 +15,8 @@
|
|
|
package etcdserver
|
|
|
|
|
|
import (
|
|
|
+ "bytes"
|
|
|
+ "encoding/binary"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
|
@@ -26,6 +28,9 @@ import (
|
|
|
"github.com/coreos/etcd/lease/leasehttp"
|
|
|
"github.com/coreos/etcd/lease/leasepb"
|
|
|
"github.com/coreos/etcd/mvcc"
|
|
|
+ "github.com/coreos/etcd/raft"
|
|
|
+
|
|
|
+ "github.com/coreos/go-semver/semver"
|
|
|
"golang.org/x/net/context"
|
|
|
"google.golang.org/grpc/metadata"
|
|
|
)
|
|
|
@@ -44,6 +49,10 @@ const (
|
|
|
maxGapBetweenApplyAndCommitIndex = 1000
|
|
|
)
|
|
|
|
|
|
+var (
|
|
|
+ newRangeClusterVersion = *semver.Must(semver.NewVersion("3.1.0"))
|
|
|
+)
|
|
|
+
|
|
|
type RaftKV interface {
|
|
|
Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
|
|
|
Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
|
|
|
@@ -86,6 +95,31 @@ type Authenticator interface {
|
|
|
}
|
|
|
|
|
|
func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
|
|
|
+ // TODO: remove this checking when we release etcd 3.2
|
|
|
+ if s.ClusterVersion() == nil || s.ClusterVersion().LessThan(newRangeClusterVersion) {
|
|
|
+ return s.legacyRange(ctx, r)
|
|
|
+ }
|
|
|
+
|
|
|
+ if !r.Serializable {
|
|
|
+ err := s.linearizableReadNotify(ctx)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ var resp *pb.RangeResponse
|
|
|
+ var err error
|
|
|
+ chk := func(ai *auth.AuthInfo) error {
|
|
|
+ return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd)
|
|
|
+ }
|
|
|
+ get := func() { resp, err = s.applyV3Base.Range(noTxn, r) }
|
|
|
+ if serr := s.doSerialize(ctx, chk, get); serr != nil {
|
|
|
+ return nil, serr
|
|
|
+ }
|
|
|
+ return resp, err
|
|
|
+}
|
|
|
+
|
|
|
+// TODO: remove this func when we release etcd 3.2
|
|
|
+func (s *EtcdServer) legacyRange(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
|
|
|
if r.Serializable {
|
|
|
var resp *pb.RangeResponse
|
|
|
var err error
|
|
|
@@ -143,6 +177,7 @@ func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse
|
|
|
}
|
|
|
return resp, err
|
|
|
}
|
|
|
+ // TODO: readonly Txn do not need to go through raft
|
|
|
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Txn: r})
|
|
|
if err != nil {
|
|
|
return nil, err
|
|
|
@@ -641,3 +676,95 @@ func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.Intern
|
|
|
|
|
|
// Watchable returns a watchable interface attached to the etcdserver.
|
|
|
func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() }
|
|
|
+
|
|
|
+func (s *EtcdServer) linearizableReadLoop() {
|
|
|
+ var rs raft.ReadState
|
|
|
+ internalTimeout := time.Second
|
|
|
+
|
|
|
+ for {
|
|
|
+ ctx := make([]byte, 8)
|
|
|
+ binary.BigEndian.PutUint64(ctx, s.reqIDGen.Next())
|
|
|
+
|
|
|
+ select {
|
|
|
+ case <-s.readwaitc:
|
|
|
+ case <-s.stopping:
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ nextnr := newNotifier()
|
|
|
+
|
|
|
+ s.readMu.Lock()
|
|
|
+ nr := s.readNotifier
|
|
|
+ s.readNotifier = nextnr
|
|
|
+ s.readMu.Unlock()
|
|
|
+
|
|
|
+ cctx, cancel := context.WithTimeout(context.Background(), internalTimeout)
|
|
|
+ if err := s.r.ReadIndex(cctx, ctx); err != nil {
|
|
|
+ cancel()
|
|
|
+ if err == raft.ErrStopped {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ plog.Errorf("failed to get read index from raft: %v", err)
|
|
|
+ nr.notify(err)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ cancel()
|
|
|
+
|
|
|
+ var (
|
|
|
+ timeout bool
|
|
|
+ done bool
|
|
|
+ )
|
|
|
+ for !timeout && !done {
|
|
|
+ select {
|
|
|
+ case rs = <-s.r.readStateC:
|
|
|
+ done = bytes.Equal(rs.RequestCtx, ctx)
|
|
|
+ if !done {
|
|
|
+ // a previous request might time out. now we should ignore the response of it and
|
|
|
+ // continue waiting for the response of the current requests.
|
|
|
+ plog.Warningf("ignored out-of-date read index response (want %v, got %v)", rs.RequestCtx, ctx)
|
|
|
+ }
|
|
|
+ case <-time.After(internalTimeout):
|
|
|
+ plog.Warningf("timed out waiting for read index response")
|
|
|
+ nr.notify(ErrTimeout)
|
|
|
+ timeout = true
|
|
|
+ case <-s.stopping:
|
|
|
+ return
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !done {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ if ai := s.getAppliedIndex(); ai < rs.Index {
|
|
|
+ select {
|
|
|
+ case <-s.applyWait.Wait(rs.Index):
|
|
|
+ case <-s.stopping:
|
|
|
+ return
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // unblock all l-reads requested at indices before rs.Index
|
|
|
+ nr.notify(nil)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (s *EtcdServer) linearizableReadNotify(ctx context.Context) error {
|
|
|
+ s.readMu.RLock()
|
|
|
+ nc := s.readNotifier
|
|
|
+ s.readMu.RUnlock()
|
|
|
+
|
|
|
+ // signal linearizable loop for current notify if it hasn't been already
|
|
|
+ select {
|
|
|
+ case s.readwaitc <- struct{}{}:
|
|
|
+ default:
|
|
|
+ }
|
|
|
+
|
|
|
+ // wait for read state notification
|
|
|
+ select {
|
|
|
+ case <-nc.c:
|
|
|
+ return nc.err
|
|
|
+ case <-ctx.Done():
|
|
|
+ return ctx.Err()
|
|
|
+ case <-s.done:
|
|
|
+ return ErrStopped
|
|
|
+ }
|
|
|
+}
|