Browse Source

chore(tests): start TLS cluster slowly to evade problem

Yicheng Qin 11 years ago
parent
commit
0a4b6570e1

+ 7 - 0
tests/functional/multi_node_kill_all_and_recovery_test.go

@@ -2,6 +2,7 @@ package test
 
 import (
 	"os"
+	"strconv"
 	"testing"
 	"time"
 
@@ -128,6 +129,12 @@ func TestTLSMultiNodeKillAllAndRecovery(t *testing.T) {
 
 	for i := 0; i < clusterSize; i++ {
 		etcds[i], err = os.StartProcess(EtcdBinPath, argGroup[i], procAttr)
+		// See util.go for the reason to wait for server
+		client := buildClient()
+		err = WaitForServer("127.0.0.1:400"+strconv.Itoa(i+1), client, "http")
+		if err != nil {
+			t.Fatalf("node start error: %s", err)
+		}
 	}
 
 	go Monitor(clusterSize, 1, leaderChan, all, stop)

+ 12 - 2
tests/functional/util.go

@@ -129,9 +129,19 @@ func CreateCluster(size int, procAttr *os.ProcAttr, ssl bool) ([][]string, []*os
 		// The problem is that if the master isn't up then the children
 		// have to retry. This retry can take upwards of 15 seconds
 		// which slows tests way down and some of them fail.
-		if i == 0 {
+		//
+		// Waiting for each server to start when ssl is a workaround.
+		// Autotest machines are dramatically slow, and it could spend
+		// several seconds to build TSL connections between servers. That
+		// is extremely terribe when the second machine joins the cluster
+		// because the cluster is out of work at this time. The guy
+		// tries to join during this time will fail, and current implementation
+		// makes it fail after just one-time try(bug in #661). This
+		// makes the cluster start with N-1 machines.
+		// TODO(yichengq): It should be fixed.
+		if i == 0 || ssl {
 			client := buildClient()
-			err = WaitForServer("127.0.0.1:4001", client, "http")
+			err = WaitForServer("127.0.0.1:400"+strconv.Itoa(i+1), client, "http")
 			if err != nil {
 				return nil, nil, err
 			}