Selaa lähdekoodia

etcdmain: calculate dial timeout for peer transport

This helps peer communication in globally-deployed cluster.
Yicheng Qin 10 vuotta sitten
vanhempi
commit
ffae601af5
3 muutettua tiedostoa jossa 9 lisäystä ja 3 poistoa
  1. 7 1
      etcdmain/etcd.go
  2. 2 1
      integration/cluster_test.go
  3. 0 1
      rafthttp/peer.go

+ 7 - 1
etcdmain/etcd.go

@@ -174,7 +174,7 @@ func startEtcd(cfg *config) (<-chan struct{}, error) {
 		return nil, fmt.Errorf("error setting up initial cluster: %v", err)
 	}
 
-	pt, err := transport.NewTimeoutTransport(cfg.peerTLSInfo, rafthttp.DialTimeout, rafthttp.ConnReadTimeout, rafthttp.ConnWriteTimeout)
+	pt, err := transport.NewTimeoutTransport(cfg.peerTLSInfo, peerDialTimeout(cfg.ElectionMs), rafthttp.ConnReadTimeout, rafthttp.ConnWriteTimeout)
 	if err != nil {
 		return nil, err
 	}
@@ -503,3 +503,9 @@ func setupLogging(cfg *config) {
 		repoLog.SetLogLevel(settings)
 	}
 }
+
+func peerDialTimeout(electionMs uint) time.Duration {
+	// 1s for queue wait and system delay
+	// + one RTT, which is smaller than 1/5 election timeout
+	return time.Second + time.Duration(electionMs)*time.Millisecond/5
+}

+ 2 - 1
integration/cluster_test.go

@@ -842,7 +842,8 @@ func mustNewHTTPClient(t *testing.T, eps []string) client.Client {
 }
 
 func mustNewTransport(t *testing.T, tlsInfo transport.TLSInfo) *http.Transport {
-	tr, err := transport.NewTimeoutTransport(tlsInfo, rafthttp.DialTimeout, rafthttp.ConnReadTimeout, rafthttp.ConnWriteTimeout)
+	// tick in integration test is short, so 1s dial timeout could play well.
+	tr, err := transport.NewTimeoutTransport(tlsInfo, time.Second, rafthttp.ConnReadTimeout, rafthttp.ConnWriteTimeout)
 	if err != nil {
 		t.Fatal(err)
 	}

+ 0 - 1
rafthttp/peer.go

@@ -26,7 +26,6 @@ import (
 )
 
 const (
-	DialTimeout = time.Second
 	// ConnRead/WriteTimeout is the i/o timeout set on each connection rafthttp pkg creates.
 	// A 5 seconds timeout is good enough for recycling bad connections. Or we have to wait for
 	// tcp keepalive failing to detect a bad connection, which is at minutes level.