فهرست منبع

etcdserver: exclude learner from leader transfer

1. Maintenance API MoveLeader() returns ErrBadLeaderTransferee if
transferee does not exist or is raft learner.

2. etcdserver TransferLeadership() only choose voting member as
transferee.
Jingyi Hu 6 سال پیش
والد
کامیت
44d935e90a
5فایلهای تغییر یافته به همراه35 افزوده شده و 8 حذف شده
  1. 22 0
      etcdserver/api/membership/cluster.go
  2. 3 0
      etcdserver/api/v3rpc/rpctypes/error.go
  3. 1 0
      etcdserver/api/v3rpc/util.go
  4. 1 0
      etcdserver/errors.go
  5. 8 8
      etcdserver/server.go

+ 22 - 0
etcdserver/api/membership/cluster.go

@@ -754,3 +754,25 @@ func (c *RaftCluster) IsLearner() bool {
 	}
 	return localMember.IsLearner
 }
+
+// IsMemberExist returns if the member with the given id exists in cluster.
+func (c *RaftCluster) IsMemberExist(id types.ID) bool {
+	c.Lock()
+	defer c.Unlock()
+	_, ok := c.members[id]
+	return ok
+}
+
+// VotingMemberIDs returns the ID of voting members in cluster.
+func (c *RaftCluster) VotingMemberIDs() []types.ID {
+	c.Lock()
+	defer c.Unlock()
+	var ids []types.ID
+	for _, m := range c.members {
+		if !m.IsLearner {
+			ids = append(ids, m.ID)
+		}
+	}
+	sort.Sort(types.IDSlice(ids))
+	return ids
+}

+ 3 - 0
etcdserver/api/v3rpc/rpctypes/error.go

@@ -72,6 +72,7 @@ var (
 	ErrGRPCUnhealthy                  = status.New(codes.Unavailable, "etcdserver: unhealthy cluster").Err()
 	ErrGRPCCorrupt                    = status.New(codes.DataLoss, "etcdserver: corrupt cluster").Err()
 	ErrGPRCNotSupportedForLearner     = status.New(codes.FailedPrecondition, "etcdserver: rpc not supported for learner").Err()
+	ErrGRPCBadLeaderTransferee        = status.New(codes.FailedPrecondition, "etcdserver: bad leader transferee").Err()
 
 	errStringToError = map[string]error{
 		ErrorDesc(ErrGRPCEmptyKey):      ErrGRPCEmptyKey,
@@ -123,6 +124,7 @@ var (
 		ErrorDesc(ErrGRPCTimeoutDueToConnectionLost): ErrGRPCTimeoutDueToConnectionLost,
 		ErrorDesc(ErrGRPCUnhealthy):                  ErrGRPCUnhealthy,
 		ErrorDesc(ErrGRPCCorrupt):                    ErrGRPCCorrupt,
+		ErrorDesc(ErrGRPCBadLeaderTransferee):        ErrGRPCBadLeaderTransferee,
 	}
 )
 
@@ -176,6 +178,7 @@ var (
 	ErrTimeoutDueToConnectionLost = Error(ErrGRPCTimeoutDueToConnectionLost)
 	ErrUnhealthy                  = Error(ErrGRPCUnhealthy)
 	ErrCorrupt                    = Error(ErrGRPCCorrupt)
+	ErrBadLeaderTransferee        = Error(ErrGRPCBadLeaderTransferee)
 )
 
 // EtcdError defines gRPC server errors.

+ 1 - 0
etcdserver/api/v3rpc/util.go

@@ -55,6 +55,7 @@ var toGRPCErrorMap = map[error]error{
 	etcdserver.ErrUnhealthy:                  rpctypes.ErrGRPCUnhealthy,
 	etcdserver.ErrKeyNotFound:                rpctypes.ErrGRPCKeyNotFound,
 	etcdserver.ErrCorrupt:                    rpctypes.ErrGRPCCorrupt,
+	etcdserver.ErrBadLeaderTransferee:        rpctypes.ErrGRPCBadLeaderTransferee,
 
 	lease.ErrLeaseNotFound:    rpctypes.ErrGRPCLeaseNotFound,
 	lease.ErrLeaseExists:      rpctypes.ErrGRPCLeaseExist,

+ 1 - 0
etcdserver/errors.go

@@ -37,6 +37,7 @@ var (
 	ErrUnhealthy                  = errors.New("etcdserver: unhealthy cluster")
 	ErrKeyNotFound                = errors.New("etcdserver: key not found")
 	ErrCorrupt                    = errors.New("etcdserver: corrupt cluster")
+	ErrBadLeaderTransferee        = errors.New("etcdserver: bad leader transferee")
 )
 
 type DiscoveryError struct {

+ 8 - 8
etcdserver/server.go

@@ -1377,16 +1377,16 @@ func (s *EtcdServer) triggerSnapshot(ep *etcdProgress) {
 	ep.snapi = ep.appliedi
 }
 
-func (s *EtcdServer) isMultiNode() bool {
-	return s.cluster != nil && len(s.cluster.MemberIDs()) > 1
-}
-
 func (s *EtcdServer) isLeader() bool {
 	return uint64(s.ID()) == s.Lead()
 }
 
 // MoveLeader transfers the leader to the given transferee.
 func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) error {
+	if !s.cluster.IsMemberExist(types.ID(transferee)) || s.cluster.Member(types.ID(transferee)).IsLearner {
+		return ErrBadLeaderTransferee
+	}
+
 	now := time.Now()
 	interval := time.Duration(s.Cfg.TickMs) * time.Millisecond
 
@@ -1440,20 +1440,20 @@ func (s *EtcdServer) TransferLeadership() error {
 		return nil
 	}
 
-	if !s.isMultiNode() {
+	if s.cluster == nil || len(s.cluster.VotingMemberIDs()) <= 1 {
 		if lg := s.getLogger(); lg != nil {
 			lg.Info(
-				"skipped leadership transfer; it's a single-node cluster",
+				"skipped leadership transfer for single voting member cluster",
 				zap.String("local-member-id", s.ID().String()),
 				zap.String("current-leader-member-id", types.ID(s.Lead()).String()),
 			)
 		} else {
-			plog.Printf("skipped leadership transfer for single member cluster")
+			plog.Printf("skipped leadership transfer for single voting member cluster")
 		}
 		return nil
 	}
 
-	transferee, ok := longestConnected(s.r.transport, s.cluster.MemberIDs())
+	transferee, ok := longestConnected(s.r.transport, s.cluster.VotingMemberIDs())
 	if !ok {
 		return ErrUnhealthy
 	}