Browse Source

Merge pull request #7023 from heyitsanthony/lease-freeze

clientv3: fix lease "freezing" on unhealthy cluster
Anthony Romano 9 years ago
parent
commit
531c3061c1

+ 56 - 0
clientv3/integration/lease_test.go

@@ -17,10 +17,12 @@ package integration
 import (
 	"reflect"
 	"sort"
+	"sync"
 	"testing"
 	"time"
 
 	"github.com/coreos/etcd/clientv3"
+	"github.com/coreos/etcd/clientv3/concurrency"
 	"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
 	"github.com/coreos/etcd/integration"
 	"github.com/coreos/etcd/pkg/testutil"
@@ -574,3 +576,57 @@ func TestLeaseKeepAliveLoopExit(t *testing.T) {
 		t.Fatalf("expected %T, got %v(%T)", clientv3.ErrKeepAliveHalted{}, err, err)
 	}
 }
+
+// TestV3LeaseFailureOverlap issues Grant and Keepalive requests to a cluster
+// before, during, and after quorum loss to confirm Grant/Keepalive tolerates
+// transient cluster failure.
+func TestV3LeaseFailureOverlap(t *testing.T) {
+	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 2})
+	defer clus.Terminate(t)
+
+	numReqs := 5
+	cli := clus.Client(0)
+
+	// bring up a session, tear it down
+	updown := func(i int) error {
+		sess, err := concurrency.NewSession(cli)
+		if err != nil {
+			return err
+		}
+		ch := make(chan struct{})
+		go func() {
+			defer close(ch)
+			sess.Close()
+		}()
+		select {
+		case <-ch:
+		case <-time.After(time.Minute / 4):
+			t.Fatalf("timeout %d", i)
+		}
+		return nil
+	}
+
+	var wg sync.WaitGroup
+	mkReqs := func(n int) {
+		wg.Add(numReqs)
+		for i := 0; i < numReqs; i++ {
+			go func() {
+				defer wg.Done()
+				err := updown(n)
+				if err == nil || err == rpctypes.ErrTimeoutDueToConnectionLost {
+					return
+				}
+				t.Fatal(err)
+			}()
+		}
+	}
+
+	mkReqs(1)
+	clus.Members[1].Stop(t)
+	mkReqs(2)
+	time.Sleep(time.Second)
+	mkReqs(3)
+	clus.Members[1].Restart(t)
+	mkReqs(4)
+	wg.Wait()
+}

+ 0 - 3
clientv3/lease.go

@@ -177,9 +177,6 @@ func (l *lessor) Grant(ctx context.Context, ttl int64) (*LeaseGrantResponse, err
 		if isHaltErr(cctx, err) {
 			return nil, toErr(cctx, err)
 		}
-		if nerr := l.newStream(); nerr != nil {
-			return nil, nerr
-		}
 	}
 }
 

+ 21 - 18
etcdserver/api/v3rpc/rpctypes/error.go

@@ -52,12 +52,13 @@ var (
 	ErrGRPCPermissionNotGranted = grpc.Errorf(codes.FailedPrecondition, "etcdserver: permission is not granted to the role")
 	ErrGRPCAuthNotEnabled       = grpc.Errorf(codes.FailedPrecondition, "etcdserver: authentication is not enabled")
 
-	ErrGRPCNoLeader               = grpc.Errorf(codes.Unavailable, "etcdserver: no leader")
-	ErrGRPCNotCapable             = grpc.Errorf(codes.Unavailable, "etcdserver: not capable")
-	ErrGRPCStopped                = grpc.Errorf(codes.Unavailable, "etcdserver: server stopped")
-	ErrGRPCTimeout                = grpc.Errorf(codes.Unavailable, "etcdserver: request timed out")
-	ErrGRPCTimeoutDueToLeaderFail = grpc.Errorf(codes.Unavailable, "etcdserver: request timed out, possibly due to previous leader failure")
-	ErrGRPCUnhealthy              = grpc.Errorf(codes.Unavailable, "etcdserver: unhealthy cluster")
+	ErrGRPCNoLeader                   = grpc.Errorf(codes.Unavailable, "etcdserver: no leader")
+	ErrGRPCNotCapable                 = grpc.Errorf(codes.Unavailable, "etcdserver: not capable")
+	ErrGRPCStopped                    = grpc.Errorf(codes.Unavailable, "etcdserver: server stopped")
+	ErrGRPCTimeout                    = grpc.Errorf(codes.Unavailable, "etcdserver: request timed out")
+	ErrGRPCTimeoutDueToLeaderFail     = grpc.Errorf(codes.Unavailable, "etcdserver: request timed out, possibly due to previous leader failure")
+	ErrGRPCTimeoutDueToConnectionLost = grpc.Errorf(codes.Unavailable, "etcdserver: request timed out, possibly due to connection lost")
+	ErrGRPCUnhealthy                  = grpc.Errorf(codes.Unavailable, "etcdserver: unhealthy cluster")
 
 	errStringToError = map[string]error{
 		grpc.ErrorDesc(ErrGRPCEmptyKey):     ErrGRPCEmptyKey,
@@ -91,12 +92,13 @@ var (
 		grpc.ErrorDesc(ErrGRPCPermissionNotGranted): ErrGRPCPermissionNotGranted,
 		grpc.ErrorDesc(ErrGRPCAuthNotEnabled):       ErrGRPCAuthNotEnabled,
 
-		grpc.ErrorDesc(ErrGRPCNoLeader):               ErrGRPCNoLeader,
-		grpc.ErrorDesc(ErrGRPCNotCapable):             ErrGRPCNotCapable,
-		grpc.ErrorDesc(ErrGRPCStopped):                ErrGRPCStopped,
-		grpc.ErrorDesc(ErrGRPCTimeout):                ErrGRPCTimeout,
-		grpc.ErrorDesc(ErrGRPCTimeoutDueToLeaderFail): ErrGRPCTimeoutDueToLeaderFail,
-		grpc.ErrorDesc(ErrGRPCUnhealthy):              ErrGRPCUnhealthy,
+		grpc.ErrorDesc(ErrGRPCNoLeader):                   ErrGRPCNoLeader,
+		grpc.ErrorDesc(ErrGRPCNotCapable):                 ErrGRPCNotCapable,
+		grpc.ErrorDesc(ErrGRPCStopped):                    ErrGRPCStopped,
+		grpc.ErrorDesc(ErrGRPCTimeout):                    ErrGRPCTimeout,
+		grpc.ErrorDesc(ErrGRPCTimeoutDueToLeaderFail):     ErrGRPCTimeoutDueToLeaderFail,
+		grpc.ErrorDesc(ErrGRPCTimeoutDueToConnectionLost): ErrGRPCTimeoutDueToConnectionLost,
+		grpc.ErrorDesc(ErrGRPCUnhealthy):                  ErrGRPCUnhealthy,
 	}
 
 	// client-side error
@@ -131,12 +133,13 @@ var (
 	ErrPermissionNotGranted = Error(ErrGRPCPermissionNotGranted)
 	ErrAuthNotEnabled       = Error(ErrGRPCAuthNotEnabled)
 
-	ErrNoLeader               = Error(ErrGRPCNoLeader)
-	ErrNotCapable             = Error(ErrGRPCNotCapable)
-	ErrStopped                = Error(ErrGRPCStopped)
-	ErrTimeout                = Error(ErrGRPCTimeout)
-	ErrTimeoutDueToLeaderFail = Error(ErrGRPCTimeoutDueToLeaderFail)
-	ErrUnhealthy              = Error(ErrGRPCUnhealthy)
+	ErrNoLeader                   = Error(ErrGRPCNoLeader)
+	ErrNotCapable                 = Error(ErrGRPCNotCapable)
+	ErrStopped                    = Error(ErrGRPCStopped)
+	ErrTimeout                    = Error(ErrGRPCTimeout)
+	ErrTimeoutDueToLeaderFail     = Error(ErrGRPCTimeoutDueToLeaderFail)
+	ErrTimeoutDueToConnectionLost = Error(ErrGRPCTimeoutDueToConnectionLost)
+	ErrUnhealthy                  = Error(ErrGRPCUnhealthy)
 )
 
 // EtcdError defines gRPC server errors.

+ 2 - 0
etcdserver/api/v3rpc/util.go

@@ -57,6 +57,8 @@ func togRPCError(err error) error {
 		return rpctypes.ErrGRPCTimeout
 	case etcdserver.ErrTimeoutDueToLeaderFail:
 		return rpctypes.ErrGRPCTimeoutDueToLeaderFail
+	case etcdserver.ErrTimeoutDueToConnectionLost:
+		return rpctypes.ErrGRPCTimeoutDueToConnectionLost
 	case etcdserver.ErrUnhealthy:
 		return rpctypes.ErrGRPCUnhealthy