Browse Source

Merge pull request #2189 from yichengq/314

support disaster recovery from rc1 data dir
Yicheng Qin 11 years ago
parent
commit
a65556abe2
3 changed files with 15 additions and 6 deletions
  1. 10 1
      etcdctl/command/backup_command.go
  2. 1 1
      etcdserver/server.go
  3. 4 4
      wal/wal.go

+ 10 - 1
etcdctl/command/backup_command.go

@@ -15,6 +15,7 @@
 package command
 package command
 
 
 import (
 import (
+	"fmt"
 	"log"
 	"log"
 	"os"
 	"os"
 	"path"
 	"path"
@@ -71,7 +72,12 @@ func handleBackup(c *cli.Context) {
 	}
 	}
 	defer w.Close()
 	defer w.Close()
 	wmetadata, state, ents, err := w.ReadAll()
 	wmetadata, state, ents, err := w.ReadAll()
-	if err != nil {
+	switch err {
+	case nil:
+	case wal.ErrSnapshotNotFound:
+		fmt.Printf("Failed to find the match snapshot record %+v in wal %v.", walsnap, srcWAL)
+		fmt.Printf("etcdctl will add it back. Start auto fixing...")
+	default:
 		log.Fatal(err)
 		log.Fatal(err)
 	}
 	}
 	var metadata etcdserverpb.Metadata
 	var metadata etcdserverpb.Metadata
@@ -88,4 +94,7 @@ func handleBackup(c *cli.Context) {
 	if err := neww.Save(state, ents); err != nil {
 	if err := neww.Save(state, ents); err != nil {
 		log.Fatal(err)
 		log.Fatal(err)
 	}
 	}
+	if err := neww.SaveSnapshot(walsnap); err != nil {
+		log.Fatal(err)
+	}
 }
 }

+ 1 - 1
etcdserver/server.go

@@ -253,7 +253,7 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
 	tr := rafthttp.NewTransporter(cfg.Transport, id, cfg.Cluster.ID(), srv, srv.errorc, sstats, lstats)
 	tr := rafthttp.NewTransporter(cfg.Transport, id, cfg.Cluster.ID(), srv, srv.errorc, sstats, lstats)
 	// add all the remote members into sendhub
 	// add all the remote members into sendhub
 	for _, m := range cfg.Cluster.Members() {
 	for _, m := range cfg.Cluster.Members() {
-		if m.Name != cfg.Name {
+		if m.ID != id {
 			tr.AddPeer(m.ID, m.PeerURLs)
 			tr.AddPeer(m.ID, m.PeerURLs)
 		}
 		}
 	}
 	}

+ 4 - 4
wal/wal.go

@@ -203,7 +203,7 @@ func openAtIndex(dirpath string, snap walpb.Snapshot, all bool) (*WAL, error) {
 // ReadAll reads out all records of the current WAL.
 // ReadAll reads out all records of the current WAL.
 // If it cannot read out the expected snap, it will return ErrSnapshotNotFound.
 // If it cannot read out the expected snap, it will return ErrSnapshotNotFound.
 // If loaded snap doesn't match with the expected one, it will return
 // If loaded snap doesn't match with the expected one, it will return
-// ErrSnapshotMismatch.
+// all the records and error ErrSnapshotMismatch.
 // TODO: detect not-last-snap error.
 // TODO: detect not-last-snap error.
 // TODO: maybe loose the checking of match.
 // TODO: maybe loose the checking of match.
 // After ReadAll, the WAL will be ready for appending new records.
 // After ReadAll, the WAL will be ready for appending new records.
@@ -256,9 +256,9 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
 		state.Reset()
 		state.Reset()
 		return nil, state, nil, err
 		return nil, state, nil, err
 	}
 	}
+	err = nil
 	if !match {
 	if !match {
-		state.Reset()
-		return nil, state, nil, ErrSnapshotNotFound
+		err = ErrSnapshotNotFound
 	}
 	}
 
 
 	// close decoder, disable reading
 	// close decoder, disable reading
@@ -269,7 +269,7 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
 	// create encoder (chain crc with the decoder), enable appending
 	// create encoder (chain crc with the decoder), enable appending
 	w.encoder = newEncoder(w.f, w.decoder.lastCRC())
 	w.encoder = newEncoder(w.f, w.decoder.lastCRC())
 	w.decoder = nil
 	w.decoder = nil
-	return metadata, state, ents, nil
+	return metadata, state, ents, err
 }
 }
 
 
 // Cut closes current file written and creates a new one ready to append.
 // Cut closes current file written and creates a new one ready to append.