Browse Source

feat(discovery): fully working discovery now

Brandon Philips 12 years ago
parent
commit
8687dd3802

+ 5 - 5
Documentation/discovery-protocol.md

@@ -1,14 +1,14 @@
 # Discovery Protocol
 
-Starting an etcd cluster initially can be painful since each machine needs to know of at least one live machine in the cluster. If you are trying to bring up a cluster all at once, say using an AWS cloud formation, you also need to coordinate who will be the initial cluster leader. The discovery protocol helps you by providing a way to discover the peers in a new etcd cluster using another already running etcd cluster.
+Starting a new etcd cluster can be painful since each machine needs to know of at least one live machine in the cluster. If you are trying to bring up a new cluster all at once, say using an AWS cloud formation, you also need to coordinate who will be the initial cluster leader. The discovery protocol uses an existing running etcd cluster to start a second etcd cluster.
 
-To use this protocol you add the command line flag `-discovery` to your etcd args. In this example we will use `http://example.com/v2/keys/_etcd/registry` as the URL prefix.
+To use this feature you add the command line flag `-discovery` to your etcd args. In this example we will use `http://example.com/v2/keys/_etcd/registry` as the URL prefix.
 
 ## The Protocol
 
 By convention the etcd discovery protocol uses the key prefix `_etcd/registry`. A full URL to the keyspace will be `http://example.com/v2/keys/_etcd/registry`.
 
-## Creating a New Cluster
+### Creating a New Cluster
 
 Generate a unique token that will identify the new cluster and create a key called "_state". If you get a `201 Created` back then your key is unused and you can proceed with cluster creation. If the return value is `412 Precondition Failed` then you will need to create a new token.
 
@@ -17,7 +17,7 @@ UUID=$(uuidgen)
 curl -X PUT "http://example.com/v2/keys/_etcd/registry/${UUID}/_state?prevExist=false" -d value=init
 ```
 
-## Bringing up Machines
+### Bringing up Machines
 
 Now that you have your cluster ID you can start bringing up machines. Every machine will follow this protocol internally in etcd if given a `-discovery`.
 
@@ -29,7 +29,7 @@ The first thing etcd must do is register your machine. This is done by using the
 curl -X PUT "http://example.com/v2/keys/_etcd/registry/${UUID}/${etcd_machine_name}?ttl=604800" -d value=${peer_addr}
 ```
 
-### Figuring out your Peers
+### Discovering Peers
 
 Now that this etcd machine is registered it must discover its peers.
 

+ 32 - 14
config/config.go

@@ -145,21 +145,9 @@ func (c *Config) Load(arguments []string) error {
 
 	// Attempt cluster discovery
 	if c.Discovery != "" {
-		p, err := discovery.Do(c.Discovery, c.Name, c.Peer.Addr)
-		if err != nil {
-			log.Fatalf("Bootstrapping encountered an unexpected error: %v", err)
-		}
-
-		for i := range p {
-			// Strip the scheme off of the peer if it has one
-			// TODO(bp): clean this up!
-			purl, err := url.Parse(p[i])
-			if err == nil {
-				p[i] = purl.Host
-			}
+		if err := c.handleDiscovery(); err != nil {
+			return err
 		}
-
-		c.Peers = p
 	}
 
 	// Force remove server configuration if specified.
@@ -226,6 +214,36 @@ func (c *Config) loadEnv(target interface{}) error {
 	return nil
 }
 
+func (c *Config) handleDiscovery() error {
+	p, err := discovery.Do(c.Discovery, c.Name, c.Peer.Addr)
+
+	// This is fatal, discovery encountered an unexpected error
+	// and we have no peer list.
+	if err != nil && len(c.Peers) == 0 {
+		log.Fatalf("Discovery failed and a backup peer list wasn't provided: %v", err)
+		return err
+	}
+
+	// Warn about errors coming from discovery, this isn't fatal
+	// since the user might have provided a peer list elsewhere.
+	if err != nil {
+		log.Warnf("Discovery encountered an error but a backup peer list (%v) was provided: %v", c.Peers, err)
+	}
+
+	for i := range p {
+		// Strip the scheme off of the peer if it has one
+		// TODO(bp): clean this up!
+		purl, err := url.Parse(p[i])
+		if err == nil {
+			p[i] = purl.Host
+		}
+	}
+
+	c.Peers = p
+
+	return nil
+}
+
 // Loads configuration from command line flags.
 func (c *Config) LoadFlags(arguments []string) error {
 	var peers, cors, path string

+ 18 - 12
discovery/discovery.go

@@ -9,7 +9,7 @@ import (
 	"time"
 
 	"github.com/coreos/etcd/log"
-	"github.com/coreos/go-etcd/etcd"
+	"github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
 )
 
 const (
@@ -44,14 +44,21 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers []
 		return
 	}
 
-	// prefix is appended to all keys
+	// prefix is prepended to all keys for this discovery
 	d.prefix = strings.TrimPrefix(u.Path, "/v2/keys/")
 
-	// Connect to a scheme://host not a full URL with path
+	// keep the old path in case we need to set the KeyPrefix below
+	oldPath := u.Path
 	u.Path = ""
-	log.Infof("Bootstrapping via %s using prefix %s.", u.String(), d.prefix)
+
+	// Connect to a scheme://host not a full URL with path
+	log.Infof("Discovery via %s using prefix %s.", u.String(), d.prefix)
 	d.client = etcd.NewClient([]string{u.String()})
 
+	if !strings.HasPrefix(oldPath, "/v2/keys") {
+		d.client.SetKeyPrefix("")
+	}
+
 	// Register this machine first and announce that we are a member of
 	// this cluster
 	err = d.heartbeat()
@@ -68,7 +75,7 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers []
 
 	// Bail out on unexpected errors
 	if err != nil {
-		if etcdErr, ok := err.(etcd.EtcdError); !ok || etcdErr.ErrorCode != 101 {
+		if etcdErr, ok := err.(*etcd.EtcdError); !ok || etcdErr.ErrorCode != 101 {
 			return nil, err
 		}
 	}
@@ -76,11 +83,11 @@ func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers []
 	// If we got a response then the CAS was successful, we are leader
 	if resp != nil && resp.Node.Value == startedState {
 		// We are the leader, we have no peers
-		log.Infof("Bootstrapping was in 'init' state this machine is the initial leader.")
+		log.Infof("Discovery was in the 'init' state this machine is the initial leader.")
 		return nil, nil
 	}
 
-	// Fall through to finding the other discoveryped peers
+	// Fall through to finding the other discovery peers
 	return d.findPeers()
 }
 
@@ -93,7 +100,7 @@ func (d *Discoverer) findPeers() (peers []string, err error) {
 	node := resp.Node
 
 	if node == nil {
-		return nil, errors.New(fmt.Sprintf("%s key doesn't exist.", d.prefix))
+		return nil, fmt.Errorf("%s key doesn't exist.", d.prefix)
 	}
 
 	for _, n := range node.Nodes {
@@ -105,10 +112,10 @@ func (d *Discoverer) findPeers() (peers []string, err error) {
 	}
 
 	if len(peers) == 0 {
-		return nil, errors.New("No peers found.")
+		return nil, errors.New("Discovery found an initialized cluster but no peers are registered.")
 	}
 
-	log.Infof("Bootstrap found peers %v", peers)
+	log.Infof("Discovery found peers %v", peers)
 
 	return
 }
@@ -122,7 +129,7 @@ func (d *Discoverer) startHeartbeat() {
 		case <-ticker:
 			err := d.heartbeat()
 			if err != nil {
-				log.Warnf("Bootstrapping heartbeat failed: %v", err)
+				log.Warnf("Discovery heartbeat failed: %v", err)
 			}
 		}
 	}
@@ -130,7 +137,6 @@ func (d *Discoverer) startHeartbeat() {
 
 func (d *Discoverer) heartbeat() error {
 	_, err := d.client.Set(path.Join(d.prefix, d.name), d.peer, defaultTTL)
-
 	return err
 }
 

+ 11 - 2
scripts/test-cluster

@@ -6,16 +6,25 @@ ulimit -n unlimited
 
 tmux new-session -d -s $SESSION
 
+peer_args=
+if [ -n "${DISCOVERY_URL}" ]; then
+	peer_args="-discovery ${DISCOVERY_URL}"
+fi
+
 # Setup a window for tailing log files
 tmux new-window -t $SESSION:1 -n 'peers'
 tmux split-window -h
 tmux select-pane -t 0
-tmux send-keys "${DIR}/../bin/etcd -peer-addr 127.0.0.1:7001 -addr 127.0.0.1:4001 -data-dir peer1 -name peer1" C-m
+tmux send-keys "${DIR}/../bin/etcd -peer-addr 127.0.0.1:7001 -addr 127.0.0.1:4001 -data-dir peer1 -name peer1 ${peer_args}" C-m
+
+if [ -n "${peer_args}" ]; then
+	peer_args="-peers 127.0.0.1:7001"
+fi
 
 for i in 2 3; do
 	tmux select-pane -t 0
 	tmux split-window -v
-	tmux send-keys "${DIR}/../bin/etcd -cors='*' -peer-addr 127.0.0.1:700${i} -addr 127.0.0.1:400${i} -peers 127.0.0.1:7001 -data-dir peer${i} -name peer${i}" C-m
+	tmux send-keys "${DIR}/../bin/etcd -cors='*' -peer-addr 127.0.0.1:700${i} -addr 127.0.0.1:400${i} -data-dir peer${i} -name peer${i} ${peer_args}" C-m
 done
 
 # Attach to session

+ 3 - 3
server/peer_server_handlers.go

@@ -6,13 +6,13 @@ import (
 	"strconv"
 	"time"
 
+	"github.com/coreos/etcd/third_party/github.com/coreos/raft"
+	"github.com/coreos/etcd/third_party/github.com/gorilla/mux"
+
 	etcdErr "github.com/coreos/etcd/error"
 	uhttp "github.com/coreos/etcd/pkg/http"
 	"github.com/coreos/etcd/log"
 	"github.com/coreos/etcd/store"
-
-	"github.com/coreos/etcd/third_party/github.com/coreos/raft"
-	"github.com/coreos/etcd/third_party/github.com/gorilla/mux"
 )
 
 // Get all the current logs

+ 1 - 1
server/usage.go

@@ -26,7 +26,7 @@ Options:
   -vv               Enabled very verbose logging.
 
 Cluster Configuration Options:
-  -bootstrap-url=<url>            URL to use for bootstrapping the peer list.
+  -discovery=<url>                Discovery service used to find a peer list.
   -peers-file=<path>              Path to a file containing the peer list.
   -peers=<host:port>,<host:port>  Comma-separated list of peers. The members
                                   should match the peer's '-peer-addr' flag.