Browse Source

etcdserver: exclude learner from leader transfer

1. Maintenance API MoveLeader() returns ErrBadLeaderTransferee if
transferee does not exist or is raft learner.

2. etcdserver TransferLeadership() only choose voting member as
transferee.
Jingyi Hu 6 years ago
parent
commit
44d935e90a

+ 22 - 0
etcdserver/api/membership/cluster.go

@@ -754,3 +754,25 @@ func (c *RaftCluster) IsLearner() bool {
 	}
 	}
 	return localMember.IsLearner
 	return localMember.IsLearner
 }
 }
+
+// IsMemberExist returns if the member with the given id exists in cluster.
+func (c *RaftCluster) IsMemberExist(id types.ID) bool {
+	c.Lock()
+	defer c.Unlock()
+	_, ok := c.members[id]
+	return ok
+}
+
+// VotingMemberIDs returns the ID of voting members in cluster.
+func (c *RaftCluster) VotingMemberIDs() []types.ID {
+	c.Lock()
+	defer c.Unlock()
+	var ids []types.ID
+	for _, m := range c.members {
+		if !m.IsLearner {
+			ids = append(ids, m.ID)
+		}
+	}
+	sort.Sort(types.IDSlice(ids))
+	return ids
+}

+ 3 - 0
etcdserver/api/v3rpc/rpctypes/error.go

@@ -72,6 +72,7 @@ var (
 	ErrGRPCUnhealthy                  = status.New(codes.Unavailable, "etcdserver: unhealthy cluster").Err()
 	ErrGRPCUnhealthy                  = status.New(codes.Unavailable, "etcdserver: unhealthy cluster").Err()
 	ErrGRPCCorrupt                    = status.New(codes.DataLoss, "etcdserver: corrupt cluster").Err()
 	ErrGRPCCorrupt                    = status.New(codes.DataLoss, "etcdserver: corrupt cluster").Err()
 	ErrGPRCNotSupportedForLearner     = status.New(codes.FailedPrecondition, "etcdserver: rpc not supported for learner").Err()
 	ErrGPRCNotSupportedForLearner     = status.New(codes.FailedPrecondition, "etcdserver: rpc not supported for learner").Err()
+	ErrGRPCBadLeaderTransferee        = status.New(codes.FailedPrecondition, "etcdserver: bad leader transferee").Err()
 
 
 	errStringToError = map[string]error{
 	errStringToError = map[string]error{
 		ErrorDesc(ErrGRPCEmptyKey):      ErrGRPCEmptyKey,
 		ErrorDesc(ErrGRPCEmptyKey):      ErrGRPCEmptyKey,
@@ -123,6 +124,7 @@ var (
 		ErrorDesc(ErrGRPCTimeoutDueToConnectionLost): ErrGRPCTimeoutDueToConnectionLost,
 		ErrorDesc(ErrGRPCTimeoutDueToConnectionLost): ErrGRPCTimeoutDueToConnectionLost,
 		ErrorDesc(ErrGRPCUnhealthy):                  ErrGRPCUnhealthy,
 		ErrorDesc(ErrGRPCUnhealthy):                  ErrGRPCUnhealthy,
 		ErrorDesc(ErrGRPCCorrupt):                    ErrGRPCCorrupt,
 		ErrorDesc(ErrGRPCCorrupt):                    ErrGRPCCorrupt,
+		ErrorDesc(ErrGRPCBadLeaderTransferee):        ErrGRPCBadLeaderTransferee,
 	}
 	}
 )
 )
 
 
@@ -176,6 +178,7 @@ var (
 	ErrTimeoutDueToConnectionLost = Error(ErrGRPCTimeoutDueToConnectionLost)
 	ErrTimeoutDueToConnectionLost = Error(ErrGRPCTimeoutDueToConnectionLost)
 	ErrUnhealthy                  = Error(ErrGRPCUnhealthy)
 	ErrUnhealthy                  = Error(ErrGRPCUnhealthy)
 	ErrCorrupt                    = Error(ErrGRPCCorrupt)
 	ErrCorrupt                    = Error(ErrGRPCCorrupt)
+	ErrBadLeaderTransferee        = Error(ErrGRPCBadLeaderTransferee)
 )
 )
 
 
 // EtcdError defines gRPC server errors.
 // EtcdError defines gRPC server errors.

+ 1 - 0
etcdserver/api/v3rpc/util.go

@@ -55,6 +55,7 @@ var toGRPCErrorMap = map[error]error{
 	etcdserver.ErrUnhealthy:                  rpctypes.ErrGRPCUnhealthy,
 	etcdserver.ErrUnhealthy:                  rpctypes.ErrGRPCUnhealthy,
 	etcdserver.ErrKeyNotFound:                rpctypes.ErrGRPCKeyNotFound,
 	etcdserver.ErrKeyNotFound:                rpctypes.ErrGRPCKeyNotFound,
 	etcdserver.ErrCorrupt:                    rpctypes.ErrGRPCCorrupt,
 	etcdserver.ErrCorrupt:                    rpctypes.ErrGRPCCorrupt,
+	etcdserver.ErrBadLeaderTransferee:        rpctypes.ErrGRPCBadLeaderTransferee,
 
 
 	lease.ErrLeaseNotFound:    rpctypes.ErrGRPCLeaseNotFound,
 	lease.ErrLeaseNotFound:    rpctypes.ErrGRPCLeaseNotFound,
 	lease.ErrLeaseExists:      rpctypes.ErrGRPCLeaseExist,
 	lease.ErrLeaseExists:      rpctypes.ErrGRPCLeaseExist,

+ 1 - 0
etcdserver/errors.go

@@ -37,6 +37,7 @@ var (
 	ErrUnhealthy                  = errors.New("etcdserver: unhealthy cluster")
 	ErrUnhealthy                  = errors.New("etcdserver: unhealthy cluster")
 	ErrKeyNotFound                = errors.New("etcdserver: key not found")
 	ErrKeyNotFound                = errors.New("etcdserver: key not found")
 	ErrCorrupt                    = errors.New("etcdserver: corrupt cluster")
 	ErrCorrupt                    = errors.New("etcdserver: corrupt cluster")
+	ErrBadLeaderTransferee        = errors.New("etcdserver: bad leader transferee")
 )
 )
 
 
 type DiscoveryError struct {
 type DiscoveryError struct {

+ 8 - 8
etcdserver/server.go

@@ -1377,16 +1377,16 @@ func (s *EtcdServer) triggerSnapshot(ep *etcdProgress) {
 	ep.snapi = ep.appliedi
 	ep.snapi = ep.appliedi
 }
 }
 
 
-func (s *EtcdServer) isMultiNode() bool {
-	return s.cluster != nil && len(s.cluster.MemberIDs()) > 1
-}
-
 func (s *EtcdServer) isLeader() bool {
 func (s *EtcdServer) isLeader() bool {
 	return uint64(s.ID()) == s.Lead()
 	return uint64(s.ID()) == s.Lead()
 }
 }
 
 
 // MoveLeader transfers the leader to the given transferee.
 // MoveLeader transfers the leader to the given transferee.
 func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) error {
 func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) error {
+	if !s.cluster.IsMemberExist(types.ID(transferee)) || s.cluster.Member(types.ID(transferee)).IsLearner {
+		return ErrBadLeaderTransferee
+	}
+
 	now := time.Now()
 	now := time.Now()
 	interval := time.Duration(s.Cfg.TickMs) * time.Millisecond
 	interval := time.Duration(s.Cfg.TickMs) * time.Millisecond
 
 
@@ -1440,20 +1440,20 @@ func (s *EtcdServer) TransferLeadership() error {
 		return nil
 		return nil
 	}
 	}
 
 
-	if !s.isMultiNode() {
+	if s.cluster == nil || len(s.cluster.VotingMemberIDs()) <= 1 {
 		if lg := s.getLogger(); lg != nil {
 		if lg := s.getLogger(); lg != nil {
 			lg.Info(
 			lg.Info(
-				"skipped leadership transfer; it's a single-node cluster",
+				"skipped leadership transfer for single voting member cluster",
 				zap.String("local-member-id", s.ID().String()),
 				zap.String("local-member-id", s.ID().String()),
 				zap.String("current-leader-member-id", types.ID(s.Lead()).String()),
 				zap.String("current-leader-member-id", types.ID(s.Lead()).String()),
 			)
 			)
 		} else {
 		} else {
-			plog.Printf("skipped leadership transfer for single member cluster")
+			plog.Printf("skipped leadership transfer for single voting member cluster")
 		}
 		}
 		return nil
 		return nil
 	}
 	}
 
 
-	transferee, ok := longestConnected(s.r.transport, s.cluster.MemberIDs())
+	transferee, ok := longestConnected(s.r.transport, s.cluster.VotingMemberIDs())
 	if !ok {
 	if !ok {
 		return ErrUnhealthy
 		return ErrUnhealthy
 	}
 	}