Browse Source

client: add a mechanism for various endpoint selection mode

Current etcd client library chooses a default destination node from
every member of a cluster in a random manner. However, requests of
write and read (for consistent results) need to be forwarded to the
leader node as the nature of Raft algorithm. If the chosen node is a
follower, additional network traffic will be caused by the forwarding
from follower to leader.

Mainly for reducing the forward traffic, this commit adds a new
mechanism for various endpoint selection mode to the client library
which can be configured with client.Config.SelectionMode.

Currently, two modes are provided:
 - EndpointSelectionRandom: default, same to existing behavior (pick
   a node in a random manner)
 - EndpointSelectionPrioritizeLeader: prioritize leader, for the above
   purpose

I evaluated the effectiveness of the EndpointSelectionPrioritizeLeader
with 4 t1.micro instances of AWS (3 nodes for etcd cluster and 1 node
for etcd client). Client executes this simple benchmark
(https://github.com/mitake/etcd-things/tree/master/prioritize-leader-bench),
just writes 10000 keys. When SelectionMode == EndpointSelectionRandom
(default), the benchmark needed 1 min and 32.102 sec to finish. When
SelectionMode == EndpointSelectionPrioritizeLeader, the benchmark
needed 1 min 4.760 sec.
Hitoshi Mitake 10 years ago
parent
commit
a46ffc60e5
1 changed files with 52 additions and 4 deletions
  1. 52 4
      client/client.go

+ 52 - 4
client/client.go

@@ -34,6 +34,7 @@ var (
 	ErrNoEndpoints           = errors.New("client: no endpoints available")
 	ErrTooManyRedirects      = errors.New("client: too many redirects")
 	ErrClusterUnavailable    = errors.New("client: etcd cluster is unavailable or misconfigured")
+	ErrNoLeaderEndpoint      = errors.New("client: no leader endpoint available")
 	errTooManyRedirectChecks = errors.New("client: too many redirect checks")
 )
 
@@ -48,6 +49,19 @@ var DefaultTransport CancelableTransport = &http.Transport{
 	TLSHandshakeTimeout: 10 * time.Second,
 }
 
+type EndpointSelectionMode int
+
+const (
+	// EndpointSelectionRandom is to pick an endpoint in a random manner.
+	EndpointSelectionRandom EndpointSelectionMode = iota
+
+	// EndpointSelectionPrioritizeLeader is to prioritize leader for reducing needless
+	// forward between follower and leader.
+	//
+	// This mode should be used with Client.AutoSync().
+	EndpointSelectionPrioritizeLeader
+)
+
 type Config struct {
 	// Endpoints defines a set of URLs (schemes, hosts and ports only)
 	// that can be used to communicate with a logical etcd cluster. For
@@ -104,6 +118,9 @@ type Config struct {
 	//
 	// A HeaderTimeoutPerRequest of zero means no timeout.
 	HeaderTimeoutPerRequest time.Duration
+
+	// SelectionMode specifies a way of selecting destination endpoint.
+	SelectionMode EndpointSelectionMode
 }
 
 func (cfg *Config) transport() CancelableTransport {
@@ -169,6 +186,7 @@ func New(cfg Config) (Client, error) {
 	c := &httpClusterClient{
 		clientFactory: newHTTPClientFactory(cfg.transport(), cfg.checkRedirect(), cfg.HeaderTimeoutPerRequest),
 		rand:          rand.New(rand.NewSource(int64(time.Now().Nanosecond()))),
+		selectionMode: cfg.SelectionMode,
 	}
 	if cfg.Username != "" {
 		c.credentials = &credentials{
@@ -216,7 +234,18 @@ type httpClusterClient struct {
 	pinned        int
 	credentials   *credentials
 	sync.RWMutex
-	rand *rand.Rand
+	rand          *rand.Rand
+	selectionMode EndpointSelectionMode
+}
+
+func (c *httpClusterClient) getLeaderEndpoint() (string, error) {
+	mAPI := NewMembersAPI(c)
+	leader, err := mAPI.Leader(context.Background())
+	if err != nil {
+		return "", err
+	}
+
+	return leader.ClientURLs[0], nil // TODO: how to handle multiple client URLs?
 }
 
 func (c *httpClusterClient) reset(eps []string) error {
@@ -233,9 +262,28 @@ func (c *httpClusterClient) reset(eps []string) error {
 		neps[i] = *u
 	}
 
-	c.endpoints = shuffleEndpoints(c.rand, neps)
-	// TODO: pin old endpoint if possible, and rebalance when new endpoint appears
-	c.pinned = 0
+	switch c.selectionMode {
+	case EndpointSelectionRandom:
+		c.endpoints = shuffleEndpoints(c.rand, neps)
+		c.pinned = 0
+	case EndpointSelectionPrioritizeLeader:
+		c.endpoints = neps
+		lep, err := c.getLeaderEndpoint()
+		if err != nil {
+			return ErrNoLeaderEndpoint
+		}
+
+		for i := range c.endpoints {
+			if c.endpoints[i].String() == lep {
+				c.pinned = i
+				break
+			}
+		}
+		// If endpoints doesn't have the lu, just keep c.pinned = 0.
+		// Forwarding between follower and leader would be required but it works.
+	default:
+		return errors.New(fmt.Sprintf("invalid endpoint selection mode: %d", c.selectionMode))
+	}
 
 	return nil
 }