Browse Source

Merge pull request #10730 from jingyih/learner_part3

*: support raft learner in etcd - part 3
Xiang Li 6 years ago
parent
commit
77e1c37787

+ 70 - 0
Documentation/op-guide/runtime-configuration.md

@@ -123,6 +123,46 @@ The new member will run as a part of the cluster and immediately begin catching
 
 If adding multiple members the best practice is to configure a single member at a time and verify it starts correctly before adding more new members. If adding a new member to a 1-node cluster, the cluster cannot make progress before the new member starts because it needs two members as majority to agree on the consensus. This behavior only happens between the time `etcdctl member add` informs the cluster about the new member and the new member successfully establishing a connection to the existing one.
 
+#### Add a new member as learner
+
+Starting from v3.4, etcd supports adding a new member as learner / non-voting member.
+The motivation and design can be found in [design doc](https://etcd.readthedocs.io/en/latest/server-learner.html).
+In order to make the process of adding a new member safer,
+and to reduce cluster downtime when the new member is added, it is recommended that the new member is added to cluster
+as a learner until it catches up. This can be described as a three step process:
+
+ * Add the new member as learner via [gRPC members API][member-api-grpc] or the `etcdctl member add --learner` command.
+
+ * Start the new member with the new cluster configuration, including a list of the updated members (existing members + the new member).
+ This step is exactly the same as before.
+
+ * Promote the newly added learner to voting member via [gRPC members API][member-api-grpc] or the `etcdctl member promote` command.
+ etcd server validates promote request to ensure its operational safety.
+ Only after its raft log has caught up to leader’s can learner be promoted to a voting member.
+ If a learner member has not caught up to leader's raft log, member promote request will fail
+ (see [error cases when promoting a member] section for more details).
+ In this case, user should wait and retry later.
+
+In v3.4, etcd server limits the number of learners that cluster can have to one. The main consideration is to limit the
+extra workload on leader due to propagating data from leader to learner.
+
+Use `etcdctl member add` with flag `--learner` to add new member to cluster as learner.
+
+```sh
+$ etcdctl member add infra3 --peer-urls=http://10.0.1.13:2380 --learner
+Member 9bf1b35fc7761a23 added to cluster a7ef944b95711739
+
+ETCD_NAME="infra3"
+ETCD_INITIAL_CLUSTER="infra0=http://10.0.1.10:2380,infra1=http://10.0.1.11:2380,infra2=http://10.0.1.12:2380,infra3=http://10.0.1.13:2380"
+ETCD_INITIAL_CLUSTER_STATE=existing
+```
+
+After new etcd process is started for the newly added learner member, use `etcdctl member promote` to promote learner to voting member.
+```
+$ etcdctl member promote 9bf1b35fc7761a23
+Member 9e29bbaa45d74461 promoted in cluster a7ef944b95711739
+```
+
 #### Error cases when adding members
 
 In the following case a new host is not included in the list of enumerated nodes. If this is a new cluster, the node must be added to the list of initial cluster members.
@@ -153,6 +193,35 @@ etcd: this member has been permanently removed from the cluster. Exiting.
 exit 1
 ```
 
+#### Error cases when adding a learner member
+
+Cannot add learner to cluster if the cluster already has 1 learner (v3.4).
+```
+$ etcdctl member add infra4 --peer-urls=http://10.0.1.14:2380 --learner
+Error: etcdserver: too many learner members in cluster
+```
+
+#### Error cases when promoting a learner member
+
+Learner can only be promoted to voting member if it is in sync with leader.
+```
+$ etcdctl member promote 9bf1b35fc7761a23
+Error: etcdserver: can only promote a learner member which is in sync with leader
+```
+
+Promoting a member that is not a learner will fail.
+```
+$ etcdctl member promote 9bf1b35fc7761a23
+Error: etcdserver: can only promote a learner member
+```
+
+Promoting a member that does not exist in cluster will fail.
+```
+$ etcdctl member promote 12345abcde
+Error: etcdserver: member not found
+```
+
+
 ### Strict reconfiguration check mode (`-strict-reconfig-check`)
 
 As described in the above, the best practice of adding new members is to configure a single member at a time and verify it starts correctly before adding more new members. This step by step approach is very important because if newly added members is not configured correctly (for example the peer URLs are incorrect), the cluster can lose quorum. The quorum loss happens since the newly added member are counted in the quorum even if that member is not reachable from other existing members. Also quorum loss might happen if there is a connectivity issue or there are operational issues.
@@ -173,3 +242,4 @@ It is enabled by default.
 [member migration]: ../v2/admin_guide.md#member-migration
 [remove member]: #remove-a-member
 [runtime-reconf]: runtime-reconf-design.md
+[error cases when promoting a member]: #error-cases-when-promoting-a-learner-member

+ 83 - 11
clientv3/integration/cluster_test.go

@@ -19,6 +19,7 @@ import (
 	"reflect"
 	"strings"
 	"testing"
+	"time"
 
 	"go.etcd.io/etcd/integration"
 	"go.etcd.io/etcd/pkg/testutil"
@@ -214,14 +215,19 @@ func TestMemberAddForLearner(t *testing.T) {
 	}
 }
 
-func TestMemberPromoteForLearner(t *testing.T) {
-	// TODO test not ready learner promotion.
+func TestMemberPromote(t *testing.T) {
 	defer testutil.AfterTest(t)
 
 	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
 	defer clus.Terminate(t)
-	// TODO change the random client to client that talk to leader directly.
-	capi := clus.RandClient()
+
+	// member promote request can be sent to any server in cluster,
+	// the request will be auto-forwarded to leader on server-side.
+	// This test explicitly includes the server-side forwarding by
+	// sending the request to follower.
+	leaderIdx := clus.WaitLeader(t)
+	followerIdx := (leaderIdx + 1) % 3
+	capi := clus.Client(followerIdx)
 
 	urls := []string{"http://127.0.0.1:1234"}
 	memberAddResp, err := capi.MemberAddAsLearner(context.Background(), urls)
@@ -244,18 +250,84 @@ func TestMemberPromoteForLearner(t *testing.T) {
 		t.Fatalf("Added 1 learner node to cluster, got %d", numberOfLearners)
 	}
 
-	memberPromoteResp, err := capi.MemberPromote(context.Background(), learnerID)
-	if err != nil {
-		t.Fatalf("failed to promote member: %v", err)
+	// learner is not started yet. Expect learner progress check to fail.
+	// As the result, member promote request will fail.
+	_, err = capi.MemberPromote(context.Background(), learnerID)
+	expectedErrKeywords := "can only promote a learner member which is in sync with leader"
+	if err == nil {
+		t.Fatalf("expecting promote not ready learner to fail, got no error")
+	}
+	if !strings.Contains(err.Error(), expectedErrKeywords) {
+		t.Fatalf("expecting error to contain %s, got %s", expectedErrKeywords, err.Error())
+	}
+
+	// create and launch learner member based on the response of V3 Member Add API.
+	// (the response has information on peer urls of the existing members in cluster)
+	learnerMember := clus.MustNewMember(t, memberAddResp)
+	clus.Members = append(clus.Members, learnerMember)
+	if err := learnerMember.Launch(); err != nil {
+		t.Fatal(err)
+	}
+
+	// retry until promote succeed or timeout
+	timeout := time.After(5 * time.Second)
+	for {
+		select {
+		case <-time.After(500 * time.Millisecond):
+		case <-timeout:
+			t.Errorf("failed all attempts to promote learner member, last error: %v", err)
+			break
+		}
+
+		_, err = capi.MemberPromote(context.Background(), learnerID)
+		// successfully promoted learner
+		if err == nil {
+			break
+		}
+		// if member promote fails due to learner not ready, retry.
+		// otherwise fails the test.
+		if !strings.Contains(err.Error(), expectedErrKeywords) {
+			t.Fatalf("unexpected error when promoting learner member: %v", err)
+		}
 	}
+}
 
-	numberOfLearners = 0
-	for _, m := range memberPromoteResp.Members {
+// TestMaxLearnerInCluster verifies that the maximum number of learners allowed in a cluster is 1
+func TestMaxLearnerInCluster(t *testing.T) {
+	defer testutil.AfterTest(t)
+
+	// 1. start with a cluster with 3 voting member and 0 learner member
+	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
+	defer clus.Terminate(t)
+
+	// 2. adding a learner member should succeed
+	resp1, err := clus.Client(0).MemberAddAsLearner(context.Background(), []string{"http://127.0.0.1:1234"})
+	if err != nil {
+		t.Fatalf("failed to add learner member %v", err)
+	}
+	numberOfLearners := 0
+	for _, m := range resp1.Members {
 		if m.IsLearner {
 			numberOfLearners++
 		}
 	}
-	if numberOfLearners != 0 {
-		t.Errorf("learner promoted, expect 0 learner, got %d", numberOfLearners)
+	if numberOfLearners != 1 {
+		t.Fatalf("Added 1 learner node to cluster, got %d", numberOfLearners)
+	}
+
+	// 3. cluster has 3 voting member and 1 learner, adding another learner should fail
+	_, err = clus.Client(0).MemberAddAsLearner(context.Background(), []string{"http://127.0.0.1:2345"})
+	if err == nil {
+		t.Fatalf("expect member add to fail, got no error")
+	}
+	expectedErrKeywords := "too many learner members in cluster"
+	if !strings.Contains(err.Error(), expectedErrKeywords) {
+		t.Fatalf("expecting error to contain %s, got %s", expectedErrKeywords, err.Error())
+	}
+
+	// 4. cluster has 3 voting member and 1 learner, adding a voting member should succeed
+	_, err = clus.Client(0).MemberAdd(context.Background(), []string{"http://127.0.0.1:3456"})
+	if err != nil {
+		t.Errorf("failed to add member %v", err)
 	}
 }

+ 48 - 3
clientv3/integration/kv_test.go

@@ -1011,9 +1011,8 @@ func TestKVForLearner(t *testing.T) {
 	}
 	defer cli.Close()
 
-	// TODO: expose servers's ReadyNotify() in test and use it instead.
-	// waiting for learner member to catch up applying the config change entries in raft log.
-	time.Sleep(3 * time.Second)
+	// wait until learner member is ready
+	<-clus.Members[3].ReadyNotify()
 
 	tests := []struct {
 		op   clientv3.Op
@@ -1051,3 +1050,49 @@ func TestKVForLearner(t *testing.T) {
 		}
 	}
 }
+
+// TestBalancerSupportLearner verifies that balancer's retry and failover mechanism supports cluster with learner member
+func TestBalancerSupportLearner(t *testing.T) {
+	defer testutil.AfterTest(t)
+
+	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
+	defer clus.Terminate(t)
+
+	// we have to add and launch learner member after initial cluster was created, because
+	// bootstrapping a cluster with learner member is not supported.
+	clus.AddAndLaunchLearnerMember(t)
+
+	learners, err := clus.GetLearnerMembers()
+	if err != nil {
+		t.Fatalf("failed to get the learner members in cluster: %v", err)
+	}
+	if len(learners) != 1 {
+		t.Fatalf("added 1 learner to cluster, got %d", len(learners))
+	}
+
+	// clus.Members[3] is the newly added learner member, which was appended to clus.Members
+	learnerEp := clus.Members[3].GRPCAddr()
+	cfg := clientv3.Config{
+		Endpoints:   []string{learnerEp},
+		DialTimeout: 5 * time.Second,
+		DialOptions: []grpc.DialOption{grpc.WithBlock()},
+	}
+	cli, err := clientv3.New(cfg)
+	if err != nil {
+		t.Fatalf("failed to create clientv3: %v", err)
+	}
+	defer cli.Close()
+
+	// wait until learner member is ready
+	<-clus.Members[3].ReadyNotify()
+
+	if _, err := cli.Get(context.Background(), "foo"); err == nil {
+		t.Fatalf("expect Get request to learner to fail, got no error")
+	}
+
+	eps := []string{learnerEp, clus.Members[0].GRPCAddr()}
+	cli.SetEndpoints(eps...)
+	if _, err := cli.Get(context.Background(), "foo"); err != nil {
+		t.Errorf("expect no error (balancer should retry when request to learner fails), got error: %v", err)
+	}
+}

+ 88 - 10
etcdserver/api/etcdhttp/peer.go

@@ -16,56 +16,82 @@ package etcdhttp
 
 import (
 	"encoding/json"
+	"fmt"
 	"net/http"
+	"strconv"
+	"strings"
 
 	"go.etcd.io/etcd/etcdserver"
 	"go.etcd.io/etcd/etcdserver/api"
+	"go.etcd.io/etcd/etcdserver/api/membership"
 	"go.etcd.io/etcd/etcdserver/api/rafthttp"
 	"go.etcd.io/etcd/lease/leasehttp"
+	"go.etcd.io/etcd/pkg/types"
 
 	"go.uber.org/zap"
 )
 
 const (
-	peerMembersPrefix = "/members"
+	peerMembersPath         = "/members"
+	peerMemberPromotePrefix = "/members/promote/"
 )
 
 // NewPeerHandler generates an http.Handler to handle etcd peer requests.
 func NewPeerHandler(lg *zap.Logger, s etcdserver.ServerPeer) http.Handler {
-	return newPeerHandler(lg, s.Cluster(), s.RaftHandler(), s.LeaseHandler())
+	return newPeerHandler(lg, s, s.RaftHandler(), s.LeaseHandler())
 }
 
-func newPeerHandler(lg *zap.Logger, cluster api.Cluster, raftHandler http.Handler, leaseHandler http.Handler) http.Handler {
-	mh := &peerMembersHandler{
-		lg:      lg,
-		cluster: cluster,
-	}
+func newPeerHandler(lg *zap.Logger, s etcdserver.Server, raftHandler http.Handler, leaseHandler http.Handler) http.Handler {
+	peerMembersHandler := newPeerMembersHandler(lg, s.Cluster())
+	peerMemberPromoteHandler := newPeerMemberPromoteHandler(lg, s)
 
 	mux := http.NewServeMux()
 	mux.HandleFunc("/", http.NotFound)
 	mux.Handle(rafthttp.RaftPrefix, raftHandler)
 	mux.Handle(rafthttp.RaftPrefix+"/", raftHandler)
-	mux.Handle(peerMembersPrefix, mh)
+	mux.Handle(peerMembersPath, peerMembersHandler)
+	mux.Handle(peerMemberPromotePrefix, peerMemberPromoteHandler)
 	if leaseHandler != nil {
 		mux.Handle(leasehttp.LeasePrefix, leaseHandler)
 		mux.Handle(leasehttp.LeaseInternalPrefix, leaseHandler)
 	}
-	mux.HandleFunc(versionPath, versionHandler(cluster, serveVersion))
+	mux.HandleFunc(versionPath, versionHandler(s.Cluster(), serveVersion))
 	return mux
 }
 
+func newPeerMembersHandler(lg *zap.Logger, cluster api.Cluster) http.Handler {
+	return &peerMembersHandler{
+		lg:      lg,
+		cluster: cluster,
+	}
+}
+
 type peerMembersHandler struct {
 	lg      *zap.Logger
 	cluster api.Cluster
 }
 
+func newPeerMemberPromoteHandler(lg *zap.Logger, s etcdserver.Server) http.Handler {
+	return &peerMemberPromoteHandler{
+		lg:      lg,
+		cluster: s.Cluster(),
+		server:  s,
+	}
+}
+
+type peerMemberPromoteHandler struct {
+	lg      *zap.Logger
+	cluster api.Cluster
+	server  etcdserver.Server
+}
+
 func (h *peerMembersHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 	if !allowMethod(w, r, "GET") {
 		return
 	}
 	w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
 
-	if r.URL.Path != peerMembersPrefix {
+	if r.URL.Path != peerMembersPath {
 		http.Error(w, "bad path", http.StatusBadRequest)
 		return
 	}
@@ -79,3 +105,55 @@ func (h *peerMembersHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		}
 	}
 }
+
+func (h *peerMemberPromoteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+	if !allowMethod(w, r, "POST") {
+		return
+	}
+	w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
+
+	if !strings.HasPrefix(r.URL.Path, peerMemberPromotePrefix) {
+		http.Error(w, "bad path", http.StatusBadRequest)
+		return
+	}
+	idStr := strings.TrimPrefix(r.URL.Path, peerMemberPromotePrefix)
+	id, err := strconv.ParseUint(idStr, 10, 64)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("member %s not found in cluster", idStr), http.StatusNotFound)
+		return
+	}
+
+	resp, err := h.server.PromoteMember(r.Context(), id)
+	if err != nil {
+		switch err {
+		case membership.ErrIDNotFound:
+			http.Error(w, err.Error(), http.StatusNotFound)
+		case membership.ErrMemberNotLearner:
+			http.Error(w, err.Error(), http.StatusPreconditionFailed)
+		case etcdserver.ErrLearnerNotReady:
+			http.Error(w, err.Error(), http.StatusPreconditionFailed)
+		default:
+			WriteError(h.lg, w, r, err)
+		}
+		if h.lg != nil {
+			h.lg.Warn(
+				"failed to promote a member",
+				zap.String("member-id", types.ID(id).String()),
+				zap.Error(err),
+			)
+		} else {
+			plog.Errorf("error promoting member %s (%v)", types.ID(id).String(), err)
+		}
+		return
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	if err := json.NewEncoder(w).Encode(resp); err != nil {
+		if h.lg != nil {
+			h.lg.Warn("failed to encode members response", zap.Error(err))
+		} else {
+			plog.Warningf("failed to encode members response (%v)", err)
+		}
+	}
+}

+ 131 - 9
etcdserver/api/etcdhttp/peer_test.go

@@ -15,19 +15,24 @@
 package etcdhttp
 
 import (
+	"context"
 	"encoding/json"
+	"fmt"
 	"io/ioutil"
 	"net/http"
 	"net/http/httptest"
 	"path"
 	"sort"
+	"strings"
 	"testing"
 
 	"go.uber.org/zap"
 
 	"github.com/coreos/go-semver/semver"
+	"go.etcd.io/etcd/etcdserver/api"
 	"go.etcd.io/etcd/etcdserver/api/membership"
 	"go.etcd.io/etcd/etcdserver/api/rafthttp"
+	pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
 	"go.etcd.io/etcd/pkg/testutil"
 	"go.etcd.io/etcd/pkg/types"
 )
@@ -51,13 +56,34 @@ func (c *fakeCluster) Members() []*membership.Member {
 func (c *fakeCluster) Member(id types.ID) *membership.Member { return c.members[uint64(id)] }
 func (c *fakeCluster) Version() *semver.Version              { return nil }
 
+type fakeServer struct {
+	cluster api.Cluster
+}
+
+func (s *fakeServer) AddMember(ctx context.Context, memb membership.Member) ([]*membership.Member, error) {
+	return nil, fmt.Errorf("AddMember not implemented in fakeServer")
+}
+func (s *fakeServer) RemoveMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
+	return nil, fmt.Errorf("RemoveMember not implemented in fakeServer")
+}
+func (s *fakeServer) UpdateMember(ctx context.Context, updateMemb membership.Member) ([]*membership.Member, error) {
+	return nil, fmt.Errorf("UpdateMember not implemented in fakeServer")
+}
+func (s *fakeServer) PromoteMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
+	return nil, fmt.Errorf("PromoteMember not implemented in fakeServer")
+}
+func (s *fakeServer) ClusterVersion() *semver.Version { return nil }
+func (s *fakeServer) Cluster() api.Cluster            { return s.cluster }
+func (s *fakeServer) Alarms() []*pb.AlarmMember       { return nil }
+
+var fakeRaftHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+	w.Write([]byte("test data"))
+})
+
 // TestNewPeerHandlerOnRaftPrefix tests that NewPeerHandler returns a handler that
 // handles raft-prefix requests well.
 func TestNewPeerHandlerOnRaftPrefix(t *testing.T) {
-	h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.Write([]byte("test data"))
-	})
-	ph := newPeerHandler(zap.NewExample(), &fakeCluster{}, h, nil)
+	ph := newPeerHandler(zap.NewExample(), &fakeServer{cluster: &fakeCluster{}}, fakeRaftHandler, nil)
 	srv := httptest.NewServer(ph)
 	defer srv.Close()
 
@@ -80,6 +106,7 @@ func TestNewPeerHandlerOnRaftPrefix(t *testing.T) {
 	}
 }
 
+// TestServeMembersFails ensures peerMembersHandler only accepts GET request
 func TestServeMembersFails(t *testing.T) {
 	tests := []struct {
 		method string
@@ -89,6 +116,10 @@ func TestServeMembersFails(t *testing.T) {
 			"POST",
 			http.StatusMethodNotAllowed,
 		},
+		{
+			"PUT",
+			http.StatusMethodNotAllowed,
+		},
 		{
 			"DELETE",
 			http.StatusMethodNotAllowed,
@@ -100,8 +131,12 @@ func TestServeMembersFails(t *testing.T) {
 	}
 	for i, tt := range tests {
 		rw := httptest.NewRecorder()
-		h := &peerMembersHandler{cluster: nil}
-		h.ServeHTTP(rw, &http.Request{Method: tt.method})
+		h := newPeerMembersHandler(nil, &fakeCluster{})
+		req, err := http.NewRequest(tt.method, "", nil)
+		if err != nil {
+			t.Fatalf("#%d: failed to create http request: %v", i, err)
+		}
+		h.ServeHTTP(rw, req)
 		if rw.Code != tt.wcode {
 			t.Errorf("#%d: code=%d, want %d", i, rw.Code, tt.wcode)
 		}
@@ -115,7 +150,7 @@ func TestServeMembersGet(t *testing.T) {
 		id:      1,
 		members: map[uint64]*membership.Member{1: &memb1, 2: &memb2},
 	}
-	h := &peerMembersHandler{cluster: cluster}
+	h := newPeerMembersHandler(nil, cluster)
 	msb, err := json.Marshal([]membership.Member{memb1, memb2})
 	if err != nil {
 		t.Fatal(err)
@@ -128,8 +163,8 @@ func TestServeMembersGet(t *testing.T) {
 		wct   string
 		wbody string
 	}{
-		{peerMembersPrefix, http.StatusOK, "application/json", wms},
-		{path.Join(peerMembersPrefix, "bad"), http.StatusBadRequest, "text/plain; charset=utf-8", "bad path\n"},
+		{peerMembersPath, http.StatusOK, "application/json", wms},
+		{path.Join(peerMembersPath, "bad"), http.StatusBadRequest, "text/plain; charset=utf-8", "bad path\n"},
 	}
 
 	for i, tt := range tests {
@@ -156,3 +191,90 @@ func TestServeMembersGet(t *testing.T) {
 		}
 	}
 }
+
+// TestServeMemberPromoteFails ensures peerMemberPromoteHandler only accepts POST request
+func TestServeMemberPromoteFails(t *testing.T) {
+	tests := []struct {
+		method string
+		wcode  int
+	}{
+		{
+			"GET",
+			http.StatusMethodNotAllowed,
+		},
+		{
+			"PUT",
+			http.StatusMethodNotAllowed,
+		},
+		{
+			"DELETE",
+			http.StatusMethodNotAllowed,
+		},
+		{
+			"BAD",
+			http.StatusMethodNotAllowed,
+		},
+	}
+	for i, tt := range tests {
+		rw := httptest.NewRecorder()
+		h := newPeerMemberPromoteHandler(nil, &fakeServer{cluster: &fakeCluster{}})
+		req, err := http.NewRequest(tt.method, "", nil)
+		if err != nil {
+			t.Fatalf("#%d: failed to create http request: %v", i, err)
+		}
+		h.ServeHTTP(rw, req)
+		if rw.Code != tt.wcode {
+			t.Errorf("#%d: code=%d, want %d", i, rw.Code, tt.wcode)
+		}
+	}
+}
+
+// TestNewPeerHandlerOnMembersPromotePrefix verifies the request with members promote prefix is routed correctly
+func TestNewPeerHandlerOnMembersPromotePrefix(t *testing.T) {
+	ph := newPeerHandler(zap.NewExample(), &fakeServer{cluster: &fakeCluster{}}, fakeRaftHandler, nil)
+	srv := httptest.NewServer(ph)
+	defer srv.Close()
+
+	tests := []struct {
+		path      string
+		wcode     int
+		checkBody bool
+		wKeyWords string
+	}{
+		{
+			// does not contain member id in path
+			peerMemberPromotePrefix,
+			http.StatusNotFound,
+			false,
+			"",
+		},
+		{
+			// try to promote member id = 1
+			peerMemberPromotePrefix + "1",
+			http.StatusInternalServerError,
+			true,
+			"PromoteMember not implemented in fakeServer",
+		},
+	}
+	for i, tt := range tests {
+		req, err := http.NewRequest("POST", srv.URL+tt.path, nil)
+		if err != nil {
+			t.Fatalf("failed to create request: %v", err)
+		}
+		resp, err := http.DefaultClient.Do(req)
+		if err != nil {
+			t.Fatalf("failed to get http response: %v", err)
+		}
+		body, err := ioutil.ReadAll(resp.Body)
+		resp.Body.Close()
+		if err != nil {
+			t.Fatalf("unexpected ioutil.ReadAll error: %v", err)
+		}
+		if resp.StatusCode != tt.wcode {
+			t.Fatalf("#%d: code = %d, want %d", i, resp.StatusCode, tt.wcode)
+		}
+		if tt.checkBody && strings.Contains(string(body), tt.wKeyWords) {
+			t.Errorf("#%d: body: %s, want body to contain keywords: %s", i, string(body), tt.wKeyWords)
+		}
+	}
+}

+ 64 - 8
etcdserver/api/membership/cluster.go

@@ -40,6 +40,8 @@ import (
 	"go.uber.org/zap"
 )
 
+const maxLearners = 1
+
 // RaftCluster is a list of Members that belong to the same raft cluster
 type RaftCluster struct {
 	lg *zap.Logger
@@ -123,6 +125,19 @@ func (c *RaftCluster) Member(id types.ID) *Member {
 	return c.members[id].Clone()
 }
 
+func (c *RaftCluster) VotingMembers() []*Member {
+	c.Lock()
+	defer c.Unlock()
+	var ms MembersByID
+	for _, m := range c.members {
+		if !m.IsLearner {
+			ms = append(ms, m.Clone())
+		}
+	}
+	sort.Sort(ms)
+	return []*Member(ms)
+}
+
 // MemberByName returns a Member with the given name if exists.
 // If more than one member has the given name, it will panic.
 func (c *RaftCluster) MemberByName(name string) *Member {
@@ -279,16 +294,15 @@ func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error {
 				plog.Panicf("unmarshal confChangeContext should never fail: %v", err)
 			}
 		}
-		// A ConfChangeAddNode to a existing learner node promotes it to a voting member.
-		if confChangeContext.IsPromote {
+
+		if confChangeContext.IsPromote { // promoting a learner member to voting member
 			if members[id] == nil {
 				return ErrIDNotFound
 			}
 			if !members[id].IsLearner {
 				return ErrMemberNotLearner
 			}
-		} else {
-			// add a learner or a follower case
+		} else { // adding a new member
 			if members[id] != nil {
 				return ErrIDExists
 			}
@@ -304,6 +318,18 @@ func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error {
 					return ErrPeerURLexists
 				}
 			}
+
+			if confChangeContext.Member.IsLearner { // the new member is a learner
+				numLearners := 0
+				for _, m := range members {
+					if m.IsLearner {
+						numLearners++
+					}
+				}
+				if numLearners+1 > maxLearners {
+					return ErrTooManyLearners
+				}
+			}
 		}
 	case raftpb.ConfChangeRemoveNode:
 		if members[id] == nil {
@@ -551,11 +577,11 @@ func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*zap.Logger, *s
 	onSet(c.lg, ver)
 }
 
-func (c *RaftCluster) IsReadyToAddNewMember() bool {
+func (c *RaftCluster) IsReadyToAddVotingMember() bool {
 	nmembers := 1
 	nstarted := 0
 
-	for _, member := range c.members {
+	for _, member := range c.VotingMembers() {
 		if member.IsStarted() {
 			nstarted++
 		}
@@ -592,11 +618,11 @@ func (c *RaftCluster) IsReadyToAddNewMember() bool {
 	return true
 }
 
-func (c *RaftCluster) IsReadyToRemoveMember(id uint64) bool {
+func (c *RaftCluster) IsReadyToRemoveVotingMember(id uint64) bool {
 	nmembers := 0
 	nstarted := 0
 
-	for _, member := range c.members {
+	for _, member := range c.VotingMembers() {
 		if uint64(member.ID) == id {
 			continue
 		}
@@ -626,6 +652,36 @@ func (c *RaftCluster) IsReadyToRemoveMember(id uint64) bool {
 	return true
 }
 
+func (c *RaftCluster) IsReadyToPromoteMember(id uint64) bool {
+	nmembers := 1
+	nstarted := 0
+
+	for _, member := range c.VotingMembers() {
+		if member.IsStarted() {
+			nstarted++
+		}
+		nmembers++
+	}
+
+	nquorum := nmembers/2 + 1
+	if nstarted < nquorum {
+		if c.lg != nil {
+			c.lg.Warn(
+				"rejecting member promote; started member will be less than quorum",
+				zap.Int("number-of-started-member", nstarted),
+				zap.Int("quorum", nquorum),
+				zap.String("cluster-id", c.cid.String()),
+				zap.String("local-member-id", c.localID.String()),
+			)
+		} else {
+			plog.Warningf("Reject promote member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
+		}
+		return false
+	}
+
+	return true
+}
+
 func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
 	members := make(map[types.ID]*Member)
 	removed := make(map[types.ID]bool)

+ 73 - 4
etcdserver/api/membership/cluster_test.go

@@ -626,7 +626,7 @@ func newTestCluster(membs []*Member) *RaftCluster {
 
 func stringp(s string) *string { return &s }
 
-func TestIsReadyToAddNewMember(t *testing.T) {
+func TestIsReadyToAddVotingMember(t *testing.T) {
 	tests := []struct {
 		members []*Member
 		want    bool
@@ -697,16 +697,38 @@ func TestIsReadyToAddNewMember(t *testing.T) {
 			[]*Member{},
 			false,
 		},
+		{
+			// 2 voting members ready in cluster with 2 voting members and 2 unstarted learner member, should succeed
+			// (the status of learner members does not affect the readiness of adding voting member)
+			[]*Member{
+				newTestMember(1, nil, "1", nil),
+				newTestMember(2, nil, "2", nil),
+				newTestMemberAsLearner(3, nil, "", nil),
+				newTestMemberAsLearner(4, nil, "", nil),
+			},
+			true,
+		},
+		{
+			// 1 voting member ready in cluster with 2 voting members and 2 ready learner member, should fail
+			// (the status of learner members does not affect the readiness of adding voting member)
+			[]*Member{
+				newTestMember(1, nil, "1", nil),
+				newTestMember(2, nil, "", nil),
+				newTestMemberAsLearner(3, nil, "3", nil),
+				newTestMemberAsLearner(4, nil, "4", nil),
+			},
+			false,
+		},
 	}
 	for i, tt := range tests {
 		c := newTestCluster(tt.members)
-		if got := c.IsReadyToAddNewMember(); got != tt.want {
+		if got := c.IsReadyToAddVotingMember(); got != tt.want {
 			t.Errorf("%d: isReadyToAddNewMember returned %t, want %t", i, got, tt.want)
 		}
 	}
 }
 
-func TestIsReadyToRemoveMember(t *testing.T) {
+func TestIsReadyToRemoveVotingMember(t *testing.T) {
 	tests := []struct {
 		members  []*Member
 		removeID uint64
@@ -782,10 +804,57 @@ func TestIsReadyToRemoveMember(t *testing.T) {
 			4,
 			true,
 		},
+		{
+			// 1 voting members ready in cluster with 1 voting member and 1 ready learner,
+			// removing voting member should fail
+			// (the status of learner members does not affect the readiness of removing voting member)
+			[]*Member{
+				newTestMember(1, nil, "1", nil),
+				newTestMemberAsLearner(2, nil, "2", nil),
+			},
+			1,
+			false,
+		},
+		{
+			// 1 voting members ready in cluster with 2 voting member and 1 ready learner,
+			// removing ready voting member should fail
+			// (the status of learner members does not affect the readiness of removing voting member)
+			[]*Member{
+				newTestMember(1, nil, "1", nil),
+				newTestMember(2, nil, "", nil),
+				newTestMemberAsLearner(3, nil, "3", nil),
+			},
+			1,
+			false,
+		},
+		{
+			// 1 voting members ready in cluster with 2 voting member and 1 ready learner,
+			// removing unstarted voting member should be fine. (Actual operation will fail)
+			// (the status of learner members does not affect the readiness of removing voting member)
+			[]*Member{
+				newTestMember(1, nil, "1", nil),
+				newTestMember(2, nil, "", nil),
+				newTestMemberAsLearner(3, nil, "3", nil),
+			},
+			2,
+			true,
+		},
+		{
+			// 1 voting members ready in cluster with 2 voting member and 1 unstarted learner,
+			// removing not-ready voting member should be fine. (Actual operation will fail)
+			// (the status of learner members does not affect the readiness of removing voting member)
+			[]*Member{
+				newTestMember(1, nil, "1", nil),
+				newTestMember(2, nil, "", nil),
+				newTestMemberAsLearner(3, nil, "", nil),
+			},
+			2,
+			true,
+		},
 	}
 	for i, tt := range tests {
 		c := newTestCluster(tt.members)
-		if got := c.IsReadyToRemoveMember(tt.removeID); got != tt.want {
+		if got := c.IsReadyToRemoveVotingMember(tt.removeID); got != tt.want {
 			t.Errorf("%d: isReadyToAddNewMember returned %t, want %t", i, got, tt.want)
 		}
 	}

+ 1 - 1
etcdserver/api/membership/errors.go

@@ -26,7 +26,7 @@ var (
 	ErrIDNotFound       = errors.New("membership: ID not found")
 	ErrPeerURLexists    = errors.New("membership: peerURL exists")
 	ErrMemberNotLearner = errors.New("membership: can only promote a learner member")
-	ErrLearnerNotReady  = errors.New("membership: can only promote a learner member which is in sync with leader")
+	ErrTooManyLearners  = errors.New("membership: too many learner members in cluster")
 )
 
 func isKeyNotFound(err error) bool {

+ 2 - 3
etcdserver/api/v3rpc/interceptor.go

@@ -48,8 +48,7 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
 			return nil, rpctypes.ErrGRPCNotCapable
 		}
 
-		// TODO: add test in clientv3/integration to verify behavior
-		if s.IsLearner() && !isRPCSupportedForLearner(req) {
+		if s.IsMemberExist(s.ID()) && s.IsLearner() && !isRPCSupportedForLearner(req) {
 			return nil, rpctypes.ErrGPRCNotSupportedForLearner
 		}
 
@@ -195,7 +194,7 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor
 			return rpctypes.ErrGRPCNotCapable
 		}
 
-		if s.IsLearner() { // learner does not support Watch and LeaseKeepAlive RPC
+		if s.IsMemberExist(s.ID()) && s.IsLearner() { // learner does not support stream RPC
 			return rpctypes.ErrGPRCNotSupportedForLearner
 		}
 

+ 5 - 1
etcdserver/api/v3rpc/rpctypes/error.go

@@ -42,6 +42,7 @@ var (
 	ErrGRPCMemberNotFound         = status.New(codes.NotFound, "etcdserver: member not found").Err()
 	ErrGRPCMemberNotLearner       = status.New(codes.FailedPrecondition, "etcdserver: can only promote a learner member").Err()
 	ErrGRPCLearnerNotReady        = status.New(codes.FailedPrecondition, "etcdserver: can only promote a learner member which is in sync with leader").Err()
+	ErrGRPCTooManyLearners        = status.New(codes.FailedPrecondition, "etcdserver: too many learner members in cluster").Err()
 
 	ErrGRPCRequestTooLarge        = status.New(codes.InvalidArgument, "etcdserver: request is too large").Err()
 	ErrGRPCRequestTooManyRequests = status.New(codes.ResourceExhausted, "etcdserver: too many requests").Err()
@@ -71,7 +72,7 @@ var (
 	ErrGRPCTimeoutDueToConnectionLost = status.New(codes.Unavailable, "etcdserver: request timed out, possibly due to connection lost").Err()
 	ErrGRPCUnhealthy                  = status.New(codes.Unavailable, "etcdserver: unhealthy cluster").Err()
 	ErrGRPCCorrupt                    = status.New(codes.DataLoss, "etcdserver: corrupt cluster").Err()
-	ErrGPRCNotSupportedForLearner     = status.New(codes.FailedPrecondition, "etcdserver: rpc not supported for learner").Err()
+	ErrGPRCNotSupportedForLearner     = status.New(codes.Unavailable, "etcdserver: rpc not supported for learner").Err()
 	ErrGRPCBadLeaderTransferee        = status.New(codes.FailedPrecondition, "etcdserver: bad leader transferee").Err()
 
 	errStringToError = map[string]error{
@@ -97,6 +98,7 @@ var (
 		ErrorDesc(ErrGRPCMemberNotFound):         ErrGRPCMemberNotFound,
 		ErrorDesc(ErrGRPCMemberNotLearner):       ErrGRPCMemberNotLearner,
 		ErrorDesc(ErrGRPCLearnerNotReady):        ErrGRPCLearnerNotReady,
+		ErrorDesc(ErrGRPCTooManyLearners):        ErrGRPCTooManyLearners,
 
 		ErrorDesc(ErrGRPCRequestTooLarge):        ErrGRPCRequestTooLarge,
 		ErrorDesc(ErrGRPCRequestTooManyRequests): ErrGRPCRequestTooManyRequests,
@@ -126,6 +128,7 @@ var (
 		ErrorDesc(ErrGRPCTimeoutDueToConnectionLost): ErrGRPCTimeoutDueToConnectionLost,
 		ErrorDesc(ErrGRPCUnhealthy):                  ErrGRPCUnhealthy,
 		ErrorDesc(ErrGRPCCorrupt):                    ErrGRPCCorrupt,
+		ErrorDesc(ErrGPRCNotSupportedForLearner):     ErrGPRCNotSupportedForLearner,
 		ErrorDesc(ErrGRPCBadLeaderTransferee):        ErrGRPCBadLeaderTransferee,
 	}
 )
@@ -153,6 +156,7 @@ var (
 	ErrMemberNotFound         = Error(ErrGRPCMemberNotFound)
 	ErrMemberNotLearner       = Error(ErrGRPCMemberNotLearner)
 	ErrMemberLearnerNotReady  = Error(ErrGRPCLearnerNotReady)
+	ErrTooManyLearners        = Error(ErrGRPCTooManyLearners)
 
 	ErrRequestTooLarge = Error(ErrGRPCRequestTooLarge)
 	ErrTooManyRequests = Error(ErrGRPCRequestTooManyRequests)

+ 2 - 1
etcdserver/api/v3rpc/util.go

@@ -36,8 +36,9 @@ var toGRPCErrorMap = map[error]error{
 	membership.ErrIDExists:                rpctypes.ErrGRPCMemberExist,
 	membership.ErrPeerURLexists:           rpctypes.ErrGRPCPeerURLExist,
 	membership.ErrMemberNotLearner:        rpctypes.ErrGRPCMemberNotLearner,
-	membership.ErrLearnerNotReady:         rpctypes.ErrGRPCLearnerNotReady,
+	membership.ErrTooManyLearners:         rpctypes.ErrGRPCTooManyLearners,
 	etcdserver.ErrNotEnoughStartedMembers: rpctypes.ErrMemberNotEnoughStarted,
+	etcdserver.ErrLearnerNotReady:         rpctypes.ErrGRPCLearnerNotReady,
 
 	mvcc.ErrCompacted:             rpctypes.ErrGRPCCompacted,
 	mvcc.ErrFutureRev:             rpctypes.ErrGRPCFutureRev,

+ 50 - 0
etcdserver/cluster_util.go

@@ -15,11 +15,13 @@
 package etcdserver
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"net/http"
 	"sort"
+	"strings"
 	"time"
 
 	"go.etcd.io/etcd/etcdserver/api/membership"
@@ -355,3 +357,51 @@ func getVersion(lg *zap.Logger, m *membership.Member, rt http.RoundTripper) (*ve
 	}
 	return nil, err
 }
+
+func promoteMemberHTTP(ctx context.Context, url string, id uint64, peerRt http.RoundTripper) ([]*membership.Member, error) {
+	cc := &http.Client{Transport: peerRt}
+	// TODO: refactor member http handler code
+	// cannot import etcdhttp, so manually construct url
+	requestUrl := url + "/members/promote/" + fmt.Sprintf("%d", id)
+	req, err := http.NewRequest("POST", requestUrl, nil)
+	if err != nil {
+		return nil, err
+	}
+	req = req.WithContext(ctx)
+	resp, err := cc.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	b, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.StatusCode == http.StatusRequestTimeout {
+		return nil, ErrTimeout
+	}
+	if resp.StatusCode == http.StatusPreconditionFailed {
+		// both ErrMemberNotLearner and ErrLearnerNotReady have same http status code
+		if strings.Contains(string(b), ErrLearnerNotReady.Error()) {
+			return nil, ErrLearnerNotReady
+		}
+		if strings.Contains(string(b), membership.ErrMemberNotLearner.Error()) {
+			return nil, membership.ErrMemberNotLearner
+		}
+		return nil, fmt.Errorf("member promote: unknown error(%s)", string(b))
+	}
+	if resp.StatusCode == http.StatusNotFound {
+		return nil, membership.ErrIDNotFound
+	}
+
+	if resp.StatusCode != http.StatusOK { // all other types of errors
+		return nil, fmt.Errorf("member promote: unknown error(%s)", string(b))
+	}
+
+	var membs []*membership.Member
+	if err := json.Unmarshal(b, &membs); err != nil {
+		return nil, err
+	}
+	return membs, nil
+}

+ 1 - 0
etcdserver/errors.go

@@ -29,6 +29,7 @@ var (
 	ErrTimeoutLeaderTransfer      = errors.New("etcdserver: request timed out, leader transfer took too long")
 	ErrLeaderChanged              = errors.New("etcdserver: leader changed")
 	ErrNotEnoughStartedMembers    = errors.New("etcdserver: re-configuration failed due to not enough started members")
+	ErrLearnerNotReady            = errors.New("etcdserver: can only promote a learner member which is in sync with leader")
 	ErrNoLeader                   = errors.New("etcdserver: no leader")
 	ErrNotLeader                  = errors.New("etcdserver: not leader")
 	ErrRequestTooLarge            = errors.New("etcdserver: request is too large")

+ 157 - 36
etcdserver/server.go

@@ -97,6 +97,8 @@ const (
 	maxPendingRevokes = 16
 
 	recommendedMaxRequestBytes = 10 * 1024 * 1024
+
+	readyPercent = 0.9
 )
 
 var (
@@ -1553,43 +1555,17 @@ func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) ([]*
 		return nil, err
 	}
 
-	// TODO: might switch to less strict check when adding raft learner
-	if s.Cfg.StrictReconfigCheck {
-		// by default StrictReconfigCheck is enabled; reject new members if unhealthy
-		if !s.cluster.IsReadyToAddNewMember() {
-			if lg := s.getLogger(); lg != nil {
-				lg.Warn(
-					"rejecting member add request; not enough healthy members",
-					zap.String("local-member-id", s.ID().String()),
-					zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
-					zap.Error(ErrNotEnoughStartedMembers),
-				)
-			} else {
-				plog.Warningf("not enough started members, rejecting member add %+v", memb)
-			}
-			return nil, ErrNotEnoughStartedMembers
-		}
-
-		if !isConnectedFullySince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), s.cluster.Members()) {
-			if lg := s.getLogger(); lg != nil {
-				lg.Warn(
-					"rejecting member add request; local member has not been connected to all peers, reconfigure breaks active quorum",
-					zap.String("local-member-id", s.ID().String()),
-					zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
-					zap.Error(ErrUnhealthy),
-				)
-			} else {
-				plog.Warningf("not healthy for reconfigure, rejecting member add %+v", memb)
-			}
-			return nil, ErrUnhealthy
-		}
-	}
-
 	// TODO: move Member to protobuf type
 	b, err := json.Marshal(memb)
 	if err != nil {
 		return nil, err
 	}
+
+	// by default StrictReconfigCheck is enabled; reject new members if unhealthy.
+	if err := s.mayAddMember(memb); err != nil {
+		return nil, err
+	}
+
 	cc := raftpb.ConfChange{
 		Type:    raftpb.ConfChangeAddNode,
 		NodeID:  uint64(memb.ID),
@@ -1603,6 +1579,43 @@ func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) ([]*
 	return s.configure(ctx, cc)
 }
 
+func (s *EtcdServer) mayAddMember(memb membership.Member) error {
+	if !s.Cfg.StrictReconfigCheck {
+		return nil
+	}
+
+	// protect quorum when adding voting member
+	if !memb.IsLearner && !s.cluster.IsReadyToAddVotingMember() {
+		if lg := s.getLogger(); lg != nil {
+			lg.Warn(
+				"rejecting member add request; not enough healthy members",
+				zap.String("local-member-id", s.ID().String()),
+				zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
+				zap.Error(ErrNotEnoughStartedMembers),
+			)
+		} else {
+			plog.Warningf("not enough started members, rejecting member add %+v", memb)
+		}
+		return ErrNotEnoughStartedMembers
+	}
+
+	if !isConnectedFullySince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), s.cluster.VotingMembers()) {
+		if lg := s.getLogger(); lg != nil {
+			lg.Warn(
+				"rejecting member add request; local member has not been connected to all peers, reconfigure breaks active quorum",
+				zap.String("local-member-id", s.ID().String()),
+				zap.String("requested-member-add", fmt.Sprintf("%+v", memb)),
+				zap.Error(ErrUnhealthy),
+			)
+		} else {
+			plog.Warningf("not healthy for reconfigure, rejecting member add %+v", memb)
+		}
+		return ErrUnhealthy
+	}
+
+	return nil
+}
+
 func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
 	if err := s.checkMembershipOperationPermission(ctx); err != nil {
 		return nil, err
@@ -1622,11 +1635,52 @@ func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) ([]*membership
 
 // PromoteMember promotes a learner node to a voting node.
 func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
+	// only raft leader has information on whether the to-be-promoted learner node is ready. If promoteMember call
+	// fails with ErrNotLeader, forward the request to leader node via HTTP. If promoteMember call fails with error
+	// other than ErrNotLeader, return the error.
+	resp, err := s.promoteMember(ctx, id)
+	if err != ErrNotLeader {
+		return resp, err
+	}
+
+	cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
+	defer cancel()
+	// forward to leader
+	for cctx.Err() == nil {
+		leader, err := s.waitLeader(cctx)
+		if err != nil {
+			return nil, err
+		}
+		for _, url := range leader.PeerURLs {
+			resp, err := promoteMemberHTTP(cctx, url, id, s.peerRt)
+			if err == nil {
+				return resp, nil
+			}
+			// If member promotion failed, return early. Otherwise keep retry.
+			if err == ErrLearnerNotReady || err == membership.ErrIDNotFound || err == membership.ErrMemberNotLearner {
+				return nil, err
+			}
+		}
+	}
+
+	if cctx.Err() == context.DeadlineExceeded {
+		return nil, ErrTimeout
+	}
+	return nil, ErrCanceled
+}
+
+// promoteMember checks whether the to-be-promoted learner node is ready before sending the promote
+// request to raft.
+// The function returns ErrNotLeader if the local node is not raft leader (therefore does not have
+// enough information to determine if the learner node is ready), returns ErrLearnerNotReady if the
+// local node is leader (therefore has enough information) but decided the learner node is not ready
+// to be promoted.
+func (s *EtcdServer) promoteMember(ctx context.Context, id uint64) ([]*membership.Member, error) {
 	if err := s.checkMembershipOperationPermission(ctx); err != nil {
 		return nil, err
 	}
 
-	// check if we can promote this learner
+	// check if we can promote this learner.
 	if err := s.mayPromoteMember(types.ID(id)); err != nil {
 		return nil, err
 	}
@@ -1654,10 +1708,61 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
 }
 
 func (s *EtcdServer) mayPromoteMember(id types.ID) error {
+	err := s.isLearnerReady(uint64(id))
+	if err != nil {
+		return err
+	}
+
 	if !s.Cfg.StrictReconfigCheck {
 		return nil
 	}
-	// TODO add more checks whether the member can be promoted.
+	if !s.cluster.IsReadyToPromoteMember(uint64(id)) {
+		if lg := s.getLogger(); lg != nil {
+			lg.Warn(
+				"rejecting member promote request; not enough healthy members",
+				zap.String("local-member-id", s.ID().String()),
+				zap.String("requested-member-remove-id", id.String()),
+				zap.Error(ErrNotEnoughStartedMembers),
+			)
+		} else {
+			plog.Warningf("not enough started members, rejecting promote member %s", id)
+		}
+		return ErrNotEnoughStartedMembers
+	}
+
+	return nil
+}
+
+// check whether the learner catches up with leader or not.
+// Note: it will return nil if member is not found in cluster or if member is not learner.
+// These two conditions will be checked before apply phase later.
+func (s *EtcdServer) isLearnerReady(id uint64) error {
+	rs := s.raftStatus()
+
+	// leader's raftStatus.Progress is not nil
+	if rs.Progress == nil {
+		return ErrNotLeader
+	}
+
+	var learnerMatch uint64
+	isFound := false
+	leaderID := rs.ID
+	for memberID, progress := range rs.Progress {
+		if id == memberID {
+			// check its status
+			learnerMatch = progress.Match
+			isFound = true
+			break
+		}
+	}
+
+	if isFound {
+		leaderMatch := rs.Progress[leaderID].Match
+		// the learner's Match not caught up with leader yet
+		if float64(learnerMatch) < float64(leaderMatch)*readyPercent {
+			return ErrLearnerNotReady
+		}
+	}
 
 	return nil
 }
@@ -1667,7 +1772,13 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error {
 		return nil
 	}
 
-	if !s.cluster.IsReadyToRemoveMember(uint64(id)) {
+	isLearner := s.cluster.IsMemberExist(id) && s.cluster.Member(id).IsLearner
+	// no need to check quorum when removing non-voting member
+	if isLearner {
+		return nil
+	}
+
+	if !s.cluster.IsReadyToRemoveVotingMember(uint64(id)) {
 		if lg := s.getLogger(); lg != nil {
 			lg.Warn(
 				"rejecting member remove request; not enough healthy members",
@@ -1687,7 +1798,7 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error {
 	}
 
 	// protect quorum if some members are down
-	m := s.cluster.Members()
+	m := s.cluster.VotingMembers()
 	active := numConnectedSince(s.r.transport, time.Now().Add(-HealthInterval), s.ID(), m)
 	if (active - 1) < 1+((len(m)-1)/2) {
 		if lg := s.getLogger(); lg != nil {
@@ -2501,3 +2612,13 @@ func (s *EtcdServer) Logger() *zap.Logger {
 func (s *EtcdServer) IsLearner() bool {
 	return s.cluster.IsLocalMemberLearner()
 }
+
+// IsMemberExist returns if the member with the given id exists in cluster.
+func (s *EtcdServer) IsMemberExist(id types.ID) bool {
+	return s.cluster.IsMemberExist(id)
+}
+
+// raftStatus returns the raft status of this etcd node.
+func (s *EtcdServer) raftStatus() raft.Status {
+	return s.r.Node.Status()
+}

+ 0 - 48
etcdserver/server_test.go

@@ -1340,54 +1340,6 @@ func TestRemoveMember(t *testing.T) {
 	}
 }
 
-// TestPromoteMember tests PromoteMember can propose and perform learner node promotion.
-func TestPromoteMember(t *testing.T) {
-	n := newNodeConfChangeCommitterRecorder()
-	n.readyc <- raft.Ready{
-		SoftState: &raft.SoftState{RaftState: raft.StateLeader},
-	}
-	cl := newTestCluster(nil)
-	st := v2store.New()
-	cl.SetStore(v2store.New())
-	cl.AddMember(&membership.Member{
-		ID: 1234,
-		RaftAttributes: membership.RaftAttributes{
-			IsLearner: true,
-		},
-	})
-	r := newRaftNode(raftNodeConfig{
-		lg:          zap.NewExample(),
-		Node:        n,
-		raftStorage: raft.NewMemoryStorage(),
-		storage:     mockstorage.NewStorageRecorder(""),
-		transport:   newNopTransporter(),
-	})
-	s := &EtcdServer{
-		lgMu:       new(sync.RWMutex),
-		lg:         zap.NewExample(),
-		r:          *r,
-		v2store:    st,
-		cluster:    cl,
-		reqIDGen:   idutil.NewGenerator(0, time.Time{}),
-		SyncTicker: &time.Ticker{},
-	}
-	s.start()
-	_, err := s.PromoteMember(context.TODO(), 1234)
-	gaction := n.Action()
-	s.Stop()
-
-	if err != nil {
-		t.Fatalf("PromoteMember error: %v", err)
-	}
-	wactions := []testutil.Action{{Name: "ProposeConfChange:ConfChangeAddNode"}, {Name: "ApplyConfChange:ConfChangeAddNode"}}
-	if !reflect.DeepEqual(gaction, wactions) {
-		t.Errorf("action = %v, want %v", gaction, wactions)
-	}
-	if cl.Member(1234).IsLearner {
-		t.Errorf("member with id 1234 is not promoted")
-	}
-}
-
 // TestUpdateMember tests RemoveMember can propose and perform node update.
 func TestUpdateMember(t *testing.T) {
 	n := newNodeConfChangeCommitterRecorder()

+ 10 - 2
etcdserver/v3_server.go

@@ -260,7 +260,11 @@ func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, e
 			}
 		}
 	}
-	return -1, ErrTimeout
+
+	if cctx.Err() == context.DeadlineExceeded {
+		return -1, ErrTimeout
+	}
+	return -1, ErrCanceled
 }
 
 func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
@@ -303,7 +307,11 @@ func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveR
 			}
 		}
 	}
-	return nil, ErrTimeout
+
+	if cctx.Err() == context.DeadlineExceeded {
+		return nil, ErrTimeout
+	}
+	return nil, ErrCanceled
 }
 
 func (s *EtcdServer) LeaseLeases(ctx context.Context, r *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error) {

+ 19 - 0
integration/cluster.go

@@ -1166,6 +1166,10 @@ func (m *member) RecoverPartition(t testing.TB, others ...*member) {
 	}
 }
 
+func (m *member) ReadyNotify() <-chan struct{} {
+	return m.s.ReadyNotify()
+}
+
 func MustNewHTTPClient(t testing.TB, eps []string, tls *transport.TLSInfo) client.Client {
 	cfgtls := transport.TLSInfo{}
 	if tls != nil {
@@ -1392,3 +1396,18 @@ func (p SortableProtoMemberSliceByPeerURLs) Less(i, j int) bool {
 	return p[i].PeerURLs[0] < p[j].PeerURLs[0]
 }
 func (p SortableProtoMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+// MustNewMember creates a new member instance based on the response of V3 Member Add API.
+func (c *ClusterV3) MustNewMember(t testing.TB, resp *clientv3.MemberAddResponse) *member {
+	m := c.mustNewMember(t)
+	m.isLearner = resp.Member.IsLearner
+	m.NewCluster = false
+
+	m.InitialPeerURLsMap = types.URLsMap{}
+	for _, mm := range c.Members {
+		m.InitialPeerURLsMap[mm.Name] = mm.PeerURLs
+	}
+	m.InitialPeerURLsMap[m.Name] = types.MustNewURLs(resp.Member.PeerURLs)
+
+	return m
+}