Browse Source

chore(standby): minor changes based on comments

Yicheng Qin 11 years ago
parent
commit
5367c1c998

+ 6 - 6
Documentation/design/standbys.md

@@ -11,13 +11,13 @@ Standbys also act as standby nodes in the event that a peer node in the cluster
 
 ## Configuration Parameters
 
-There are three configuration parameters used by standbys: active size, promotion delay and standby sync interval.
+There are three configuration parameters used by standbys: active size, remove delay and standby sync interval.
 
 The active size specifies a target size for the number of peers in the cluster.
 If there are not enough peers to meet the active size then, standbys will send join requests until the peer count is equal to the active size.
-If there are more peers than the target active size then peers are demoted to standbys by the leader.
+If there are more peers than the target active size then peers are removed by the leader and will become standbys.
 
-The promotion delay specifies how long the cluster should wait before removing a dead peer.
+The remove delay specifies how long the cluster should wait before removing a dead peer.
 By default this is 30 minutes.
 If a peer is inactive for 30 minutes then the peer is removed.
 
@@ -169,7 +169,7 @@ Loop:
   Sleep for some time
 
   For each peer:
-    If peer last activity time > promote delay:
+    If peer last activity time > remove delay:
       Remove the peer
       Goto Loop
 ```
@@ -200,7 +200,7 @@ Machines in standby mode always sync the cluster. If sync fails, it uses the fir
 
 Leader of the cluster lose the connection with the peer.
 
-When the time exceeds promotion delay, it removes the peer from the cluster.
+When the time exceeds remove delay, it removes the peer from the cluster.
 
 Machine in standby mode finds one available place of the cluster. It sends join request and joins the cluster.
 
@@ -224,7 +224,7 @@ No change for the cluster.
 
 ## Future Attack Plans
 
-1. Based on heartbeat miss and promotion delay, standby could adjust its next check time.
+1. Based on heartbeat miss and remove delay, standby could adjust its next check time.
 
 2. Preregister the promotion target when heartbeat miss happens.
 

+ 2 - 2
etcd/etcd.go

@@ -237,11 +237,11 @@ func (e *Etcd) Run() {
 	peerTLSConfig := server.TLSServerConfig(e.Config.PeerTLSInfo())
 	etcdTLSConfig := server.TLSServerConfig(e.Config.EtcdTLSInfo())
 
-	toStartPeerServer, possiblePeers, err := e.PeerServer.FindCluster(e.Config.Discovery, e.Config.Peers)
+	startPeerServer, possiblePeers, err := e.PeerServer.FindCluster(e.Config.Discovery, e.Config.Peers)
 	if err != nil {
 		log.Fatal(err)
 	}
-	if toStartPeerServer {
+	if startPeerServer {
 		e.setMode(PeerMode)
 	} else {
 		e.StandbyServer.SyncCluster(possiblePeers)

+ 2 - 2
server/cluster_config.go

@@ -11,10 +11,10 @@ const (
 	// MinActiveSize is the minimum active size allowed.
 	MinActiveSize = 3
 
-	// DefaultRemoveDelay is the default elapsed time before promotion.
+	// DefaultRemoveDelay is the default elapsed time before removal.
 	DefaultRemoveDelay = int((30 * time.Minute) / time.Second)
 
-	// MinRemoveDelay is the minimum promote delay allowed.
+	// MinRemoveDelay is the minimum remove delay allowed.
 	MinRemoveDelay = int((2 * time.Second) / time.Second)
 
 	// DefaultSyncInterval is the default interval for cluster sync.

+ 3 - 3
server/peer_server.go

@@ -372,8 +372,8 @@ func (s *PeerServer) ClusterConfig() *ClusterConfig {
 }
 
 // SetClusterConfig updates the current cluster configuration.
-// Adjusting the active size will cause the PeerServer to demote peers or
-// promote standbys to match the new size.
+// Adjusting the active size will cause cluster to add or remove machines
+// to match the new size.
 func (s *PeerServer) SetClusterConfig(c *ClusterConfig) {
 	// Set minimums.
 	if c.ActiveSize < MinActiveSize {
@@ -820,7 +820,7 @@ func (s *PeerServer) monitorPeerActivity() {
 		removeDelay := time.Duration(s.ClusterConfig().RemoveDelay) * time.Second
 		peers := s.raftServer.Peers()
 		for _, peer := range peers {
-			// If the last response from the peer is longer than the promote delay
+			// If the last response from the peer is longer than the remove delay
 			// then automatically demote the peer.
 			if !peer.LastActivity().IsZero() && now.Sub(peer.LastActivity()) > removeDelay {
 				log.Infof("%s: removing node: %v; last activity %v ago", s.Config.Name, peer.Name, now.Sub(peer.LastActivity()))

+ 0 - 1
server/peer_server_handlers.go

@@ -221,7 +221,6 @@ func (ps *PeerServer) setClusterConfigHttpHandler(w http.ResponseWriter, req *ht
 }
 
 // Retrieves a list of peers and standbys.
-// If leader exists, it is at the first place.
 func (ps *PeerServer) getMachinesHttpHandler(w http.ResponseWriter, req *http.Request) {
 	machines := make([]*machineMessage, 0)
 	leader := ps.raftServer.Leader()

+ 2 - 2
tests/functional/kill_leader_test.go

@@ -114,10 +114,10 @@ func TestKillLeaderWithStandbys(t *testing.T) {
 	leader := "http://127.0.0.1:7001"
 
 	for i := 0; i < clusterSize; i++ {
-		fmt.Println("leader is ", leader)
+		t.Log("leader is ", leader)
 		port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
 		num := port - 7001
-		fmt.Println("kill server ", num)
+		t.Log("kill server ", num)
 		etcds[num].Kill()
 		etcds[num].Release()
 

+ 8 - 8
tests/functional/standby_test.go

@@ -36,7 +36,7 @@ func TestStandby(t *testing.T) {
 	assert.NoError(t, err)
 	assert.Equal(t, len(result.Node.Nodes), 9)
 
-	fmt.Println("Reconfigure with a smaller active size")
+	t.Log("Reconfigure with a smaller active size")
 	resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":7, "syncInterval":1}`))
 	if !assert.Equal(t, resp.StatusCode, 200) {
 		t.FailNow()
@@ -50,7 +50,7 @@ func TestStandby(t *testing.T) {
 	assert.NoError(t, err)
 	assert.Equal(t, len(result.Node.Nodes), 7)
 
-	fmt.Println("Test the functionality of all servers")
+	t.Log("Test the functionality of all servers")
 	// Set key.
 	time.Sleep(time.Second)
 	if _, err := c.Set("foo", "bar", 0); err != nil {
@@ -69,7 +69,7 @@ func TestStandby(t *testing.T) {
 		}
 	}
 
-	fmt.Println("Reconfigure with larger active size and wait for join")
+	t.Log("Reconfigure with larger active size and wait for join")
 	resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":8, "syncInterval":1}`))
 	if !assert.Equal(t, resp.StatusCode, 200) {
 		t.FailNow()
@@ -106,7 +106,7 @@ func TestStandbyAutoJoin(t *testing.T) {
 	assert.NoError(t, err)
 	assert.Equal(t, len(result.Node.Nodes), 5)
 
-	// Reconfigure with a short promote delay (2 second).
+	// Reconfigure with a short remove delay (2 second).
 	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":4, "removeDelay":2, "syncInterval":1}`))
 	if !assert.Equal(t, resp.StatusCode, 200) {
 		t.FailNow()
@@ -173,7 +173,7 @@ func TestStandbyGradualChange(t *testing.T) {
 				num++
 			}
 
-			fmt.Println("Reconfigure with active size", num)
+			t.Log("Reconfigure with active size", num)
 			resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(fmt.Sprintf(`{"activeSize":%d, "syncInterval":1}`, num)))
 			if !assert.Equal(t, resp.StatusCode, 200) {
 				t.FailNow()
@@ -191,7 +191,7 @@ func TestStandbyGradualChange(t *testing.T) {
 			assert.NoError(t, err)
 			assert.Equal(t, len(result.Node.Nodes), num)
 
-			fmt.Println("Test the functionality of all servers")
+			t.Log("Test the functionality of all servers")
 			// Set key.
 			if _, err := c.Set("foo", "bar", 0); err != nil {
 				panic(err)
@@ -241,7 +241,7 @@ func TestStandbyDramaticChange(t *testing.T) {
 				num += 6
 			}
 
-			fmt.Println("Reconfigure with active size", num)
+			t.Log("Reconfigure with active size", num)
 			resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(fmt.Sprintf(`{"activeSize":%d, "syncInterval":1}`, num)))
 			if !assert.Equal(t, resp.StatusCode, 200) {
 				t.FailNow()
@@ -259,7 +259,7 @@ func TestStandbyDramaticChange(t *testing.T) {
 			assert.NoError(t, err)
 			assert.Equal(t, len(result.Node.Nodes), num)
 
-			fmt.Println("Test the functionality of all servers")
+			t.Log("Test the functionality of all servers")
 			// Set key.
 			if _, err := c.Set("foo", "bar", 0); err != nil {
 				panic(err)