|
|
@@ -25,6 +25,7 @@ import (
|
|
|
"os"
|
|
|
"path/filepath"
|
|
|
"reflect"
|
|
|
+ "time"
|
|
|
|
|
|
"github.com/coreos/etcd/clientv3"
|
|
|
"github.com/coreos/etcd/etcdserver"
|
|
|
@@ -48,65 +49,34 @@ import (
|
|
|
|
|
|
// Manager defines snapshot methods.
|
|
|
type Manager interface {
|
|
|
- // Save fetches snapshot from remote etcd server and saves data to target path.
|
|
|
- // If the context "ctx" is canceled or timed out, snapshot save stream will error out
|
|
|
- // (e.g. context.Canceled, context.DeadlineExceeded).
|
|
|
- Save(ctx context.Context, dbPath string) error
|
|
|
+ // Save fetches snapshot from remote etcd server and saves data
|
|
|
+ // to target path. If the context "ctx" is canceled or timed out,
|
|
|
+ // snapshot save stream will error out (e.g. context.Canceled,
|
|
|
+ // context.DeadlineExceeded). Make sure to specify only one endpoint
|
|
|
+ // in client configuration. Snapshot API must be requested to a
|
|
|
+ // selected node, and saved snapshot is the point-in-time state of
|
|
|
+ // the selected node.
|
|
|
+ Save(ctx context.Context, cfg clientv3.Config, dbPath string) error
|
|
|
|
|
|
// Status returns the snapshot file information.
|
|
|
Status(dbPath string) (Status, error)
|
|
|
|
|
|
- // Restore restores a new etcd data directory from given snapshot file.
|
|
|
- // It returns an error if specified data directory already exists, to
|
|
|
- // prevent unintended data directory overwrites.
|
|
|
- Restore(dbPath string, cfg RestoreConfig) error
|
|
|
-}
|
|
|
-
|
|
|
-// Status is the snapshot file status.
|
|
|
-type Status struct {
|
|
|
- Hash uint32 `json:"hash"`
|
|
|
- Revision int64 `json:"revision"`
|
|
|
- TotalKey int `json:"totalKey"`
|
|
|
- TotalSize int64 `json:"totalSize"`
|
|
|
-}
|
|
|
-
|
|
|
-// RestoreConfig configures snapshot restore operation.
|
|
|
-type RestoreConfig struct {
|
|
|
- // Name is the human-readable name of this member.
|
|
|
- Name string
|
|
|
- // OutputDataDir is the target data directory to save restored data.
|
|
|
- // OutputDataDir should not conflict with existing etcd data directory.
|
|
|
- // If OutputDataDir already exists, it will return an error to prevent
|
|
|
- // unintended data directory overwrites.
|
|
|
- // Defaults to "[Name].etcd" if not given.
|
|
|
- OutputDataDir string
|
|
|
- // OutputWALDir is the target WAL data directory.
|
|
|
- // Defaults to "[OutputDataDir]/member/wal" if not given.
|
|
|
- OutputWALDir string
|
|
|
- // InitialCluster is the initial cluster configuration for restore bootstrap.
|
|
|
- InitialCluster types.URLsMap
|
|
|
- // InitialClusterToken is the initial cluster token for etcd cluster during restore bootstrap.
|
|
|
- InitialClusterToken string
|
|
|
- // PeerURLs is a list of member's peer URLs to advertise to the rest of the cluster.
|
|
|
- PeerURLs types.URLs
|
|
|
- // SkipHashCheck is "true" to ignore snapshot integrity hash value
|
|
|
- // (required if copied from data directory).
|
|
|
- SkipHashCheck bool
|
|
|
+ // Restore restores a new etcd data directory from given snapshot
|
|
|
+ // file. It returns an error if specified data directory already
|
|
|
+ // exists, to prevent unintended data directory overwrites.
|
|
|
+ Restore(cfg RestoreConfig) error
|
|
|
}
|
|
|
|
|
|
// NewV3 returns a new snapshot Manager for v3.x snapshot.
|
|
|
-// "*clientv3.Client" is only used for "Save" method.
|
|
|
-// Otherwise, pass "nil".
|
|
|
-func NewV3(cli *clientv3.Client, lg *zap.Logger) Manager {
|
|
|
+func NewV3(lg *zap.Logger) Manager {
|
|
|
if lg == nil {
|
|
|
lg = zap.NewExample()
|
|
|
}
|
|
|
- return &v3Manager{cli: cli, lg: lg}
|
|
|
+ return &v3Manager{lg: lg}
|
|
|
}
|
|
|
|
|
|
type v3Manager struct {
|
|
|
- cli *clientv3.Client
|
|
|
- lg *zap.Logger
|
|
|
+ lg *zap.Logger
|
|
|
|
|
|
name string
|
|
|
dbPath string
|
|
|
@@ -117,11 +87,23 @@ type v3Manager struct {
|
|
|
skipHashCheck bool
|
|
|
}
|
|
|
|
|
|
-func (s *v3Manager) Save(ctx context.Context, dbPath string) error {
|
|
|
+// Save fetches snapshot from remote etcd server and saves data to target path.
|
|
|
+func (s *v3Manager) Save(ctx context.Context, cfg clientv3.Config, dbPath string) error {
|
|
|
+ if len(cfg.Endpoints) != 1 {
|
|
|
+ return fmt.Errorf("snapshot must be requested to one selected node, not multiple %v", cfg.Endpoints)
|
|
|
+ }
|
|
|
+ cli, err := clientv3.New(cfg)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ defer cli.Close()
|
|
|
+
|
|
|
partpath := dbPath + ".part"
|
|
|
- f, err := os.Create(partpath)
|
|
|
+ defer os.RemoveAll(partpath)
|
|
|
+
|
|
|
+ var f *os.File
|
|
|
+ f, err = os.Create(partpath)
|
|
|
if err != nil {
|
|
|
- os.RemoveAll(partpath)
|
|
|
return fmt.Errorf("could not open %s (%v)", partpath, err)
|
|
|
}
|
|
|
s.lg.Info(
|
|
|
@@ -129,34 +111,47 @@ func (s *v3Manager) Save(ctx context.Context, dbPath string) error {
|
|
|
zap.String("path", partpath),
|
|
|
)
|
|
|
|
|
|
+ now := time.Now()
|
|
|
var rd io.ReadCloser
|
|
|
- rd, err = s.cli.Snapshot(ctx)
|
|
|
+ rd, err = cli.Snapshot(ctx)
|
|
|
if err != nil {
|
|
|
- os.RemoveAll(partpath)
|
|
|
return err
|
|
|
}
|
|
|
- s.lg.Info("copying from snapshot stream")
|
|
|
+ s.lg.Info(
|
|
|
+ "fetching snapshot",
|
|
|
+ zap.String("endpoint", cfg.Endpoints[0]),
|
|
|
+ )
|
|
|
if _, err = io.Copy(f, rd); err != nil {
|
|
|
- os.RemoveAll(partpath)
|
|
|
return err
|
|
|
}
|
|
|
if err = fileutil.Fsync(f); err != nil {
|
|
|
- os.RemoveAll(partpath)
|
|
|
return err
|
|
|
}
|
|
|
if err = f.Close(); err != nil {
|
|
|
- os.RemoveAll(partpath)
|
|
|
return err
|
|
|
}
|
|
|
+ s.lg.Info(
|
|
|
+ "fetched snapshot",
|
|
|
+ zap.String("endpoint", cfg.Endpoints[0]),
|
|
|
+ zap.Duration("took", time.Since(now)),
|
|
|
+ )
|
|
|
|
|
|
- s.lg.Info("rename", zap.String("from", partpath), zap.String("to", dbPath))
|
|
|
if err = os.Rename(partpath, dbPath); err != nil {
|
|
|
- os.RemoveAll(partpath)
|
|
|
return fmt.Errorf("could not rename %s to %s (%v)", partpath, dbPath, err)
|
|
|
}
|
|
|
+ s.lg.Info("saved", zap.String("path", dbPath))
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
+// Status is the snapshot file status.
|
|
|
+type Status struct {
|
|
|
+ Hash uint32 `json:"hash"`
|
|
|
+ Revision int64 `json:"revision"`
|
|
|
+ TotalKey int `json:"totalKey"`
|
|
|
+ TotalSize int64 `json:"totalSize"`
|
|
|
+}
|
|
|
+
|
|
|
+// Status returns the snapshot file information.
|
|
|
func (s *v3Manager) Status(dbPath string) (ds Status, err error) {
|
|
|
if _, err = os.Stat(dbPath); err != nil {
|
|
|
return ds, err
|
|
|
@@ -200,19 +195,60 @@ func (s *v3Manager) Status(dbPath string) (ds Status, err error) {
|
|
|
return ds, nil
|
|
|
}
|
|
|
|
|
|
-func (s *v3Manager) Restore(dbPath string, cfg RestoreConfig) error {
|
|
|
+// RestoreConfig configures snapshot restore operation.
|
|
|
+type RestoreConfig struct {
|
|
|
+ // SnapshotPath is the path of snapshot file to restore from.
|
|
|
+ SnapshotPath string
|
|
|
+
|
|
|
+ // Name is the human-readable name of this member.
|
|
|
+ Name string
|
|
|
+
|
|
|
+ // OutputDataDir is the target data directory to save restored data.
|
|
|
+ // OutputDataDir should not conflict with existing etcd data directory.
|
|
|
+ // If OutputDataDir already exists, it will return an error to prevent
|
|
|
+ // unintended data directory overwrites.
|
|
|
+ // If empty, defaults to "[Name].etcd" if not given.
|
|
|
+ OutputDataDir string
|
|
|
+ // OutputWALDir is the target WAL data directory.
|
|
|
+ // If empty, defaults to "[OutputDataDir]/member/wal" if not given.
|
|
|
+ OutputWALDir string
|
|
|
+
|
|
|
+ // PeerURLs is a list of member's peer URLs to advertise to the rest of the cluster.
|
|
|
+ PeerURLs []string
|
|
|
+
|
|
|
+ // InitialCluster is the initial cluster configuration for restore bootstrap.
|
|
|
+ InitialCluster string
|
|
|
+ // InitialClusterToken is the initial cluster token for etcd cluster during restore bootstrap.
|
|
|
+ InitialClusterToken string
|
|
|
+
|
|
|
+ // SkipHashCheck is "true" to ignore snapshot integrity hash value
|
|
|
+ // (required if copied from data directory).
|
|
|
+ SkipHashCheck bool
|
|
|
+}
|
|
|
+
|
|
|
+// Restore restores a new etcd data directory from given snapshot file.
|
|
|
+func (s *v3Manager) Restore(cfg RestoreConfig) error {
|
|
|
+ pURLs, err := types.NewURLs(cfg.PeerURLs)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ var ics types.URLsMap
|
|
|
+ ics, err = types.NewURLsMap(cfg.InitialCluster)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
srv := etcdserver.ServerConfig{
|
|
|
Name: cfg.Name,
|
|
|
+ PeerURLs: pURLs,
|
|
|
+ InitialPeerURLsMap: ics,
|
|
|
InitialClusterToken: cfg.InitialClusterToken,
|
|
|
- InitialPeerURLsMap: cfg.InitialCluster,
|
|
|
- PeerURLs: cfg.PeerURLs,
|
|
|
}
|
|
|
- if err := srv.VerifyBootstrap(); err != nil {
|
|
|
+ if err = srv.VerifyBootstrap(); err != nil {
|
|
|
return err
|
|
|
}
|
|
|
|
|
|
- var err error
|
|
|
- s.cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialCluster)
|
|
|
+ s.cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, ics)
|
|
|
if err != nil {
|
|
|
return err
|
|
|
}
|
|
|
@@ -221,44 +257,44 @@ func (s *v3Manager) Restore(dbPath string, cfg RestoreConfig) error {
|
|
|
if dataDir == "" {
|
|
|
dataDir = cfg.Name + ".etcd"
|
|
|
}
|
|
|
- if _, err = os.Stat(dataDir); err == nil {
|
|
|
+ if fileutil.Exist(dataDir) {
|
|
|
return fmt.Errorf("data-dir %q exists", dataDir)
|
|
|
}
|
|
|
+
|
|
|
walDir := cfg.OutputWALDir
|
|
|
if walDir == "" {
|
|
|
walDir = filepath.Join(dataDir, "member", "wal")
|
|
|
- } else if _, err = os.Stat(walDir); err == nil {
|
|
|
+ } else if fileutil.Exist(walDir) {
|
|
|
return fmt.Errorf("wal-dir %q exists", walDir)
|
|
|
}
|
|
|
|
|
|
- s.lg.Info(
|
|
|
- "restoring snapshot file",
|
|
|
- zap.String("path", dbPath),
|
|
|
- zap.String("wal-dir", walDir),
|
|
|
- zap.String("data-dir", dataDir),
|
|
|
- zap.String("snap-dir", s.snapDir),
|
|
|
- )
|
|
|
-
|
|
|
s.name = cfg.Name
|
|
|
- s.dbPath = dbPath
|
|
|
+ s.dbPath = cfg.SnapshotPath
|
|
|
s.walDir = walDir
|
|
|
s.snapDir = filepath.Join(dataDir, "member", "snap")
|
|
|
s.skipHashCheck = cfg.SkipHashCheck
|
|
|
|
|
|
+ s.lg.Info(
|
|
|
+ "restoring snapshot",
|
|
|
+ zap.String("path", s.dbPath),
|
|
|
+ zap.String("wal-dir", s.walDir),
|
|
|
+ zap.String("data-dir", dataDir),
|
|
|
+ zap.String("snap-dir", s.snapDir),
|
|
|
+ )
|
|
|
if err = s.saveDB(); err != nil {
|
|
|
return err
|
|
|
}
|
|
|
if err = s.saveWALAndSnap(); err != nil {
|
|
|
return err
|
|
|
}
|
|
|
-
|
|
|
s.lg.Info(
|
|
|
- "finished restoring snapshot file",
|
|
|
- zap.String("path", dbPath),
|
|
|
- zap.String("wal-dir", walDir),
|
|
|
+ "restored snapshot",
|
|
|
+ zap.String("path", s.dbPath),
|
|
|
+ zap.String("wal-dir", s.walDir),
|
|
|
zap.String("data-dir", dataDir),
|
|
|
zap.String("snap-dir", s.snapDir),
|
|
|
)
|
|
|
+
|
|
|
return nil
|
|
|
}
|
|
|
|