v3_snapshot_test.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // Copyright 2018 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package snapshot
  15. import (
  16. "context"
  17. "fmt"
  18. "math/rand"
  19. "net/url"
  20. "os"
  21. "path/filepath"
  22. "strings"
  23. "testing"
  24. "time"
  25. "go.etcd.io/etcd/clientv3"
  26. "go.etcd.io/etcd/embed"
  27. "go.etcd.io/etcd/pkg/fileutil"
  28. "go.etcd.io/etcd/pkg/testutil"
  29. "go.uber.org/zap"
  30. )
  31. // TestSnapshotV3RestoreSingle tests single node cluster restoring
  32. // from a snapshot file.
  33. func TestSnapshotV3RestoreSingle(t *testing.T) {
  34. kvs := []kv{{"foo1", "bar1"}, {"foo2", "bar2"}, {"foo3", "bar3"}}
  35. dbPath := createSnapshotFile(t, kvs)
  36. defer os.RemoveAll(dbPath)
  37. clusterN := 1
  38. urls := newEmbedURLs(clusterN * 2)
  39. cURLs, pURLs := urls[:clusterN], urls[clusterN:]
  40. cfg := embed.NewConfig()
  41. cfg.Logger = "zap"
  42. cfg.LogOutputs = []string{"/dev/null"}
  43. cfg.Debug = false
  44. cfg.Name = "s1"
  45. cfg.InitialClusterToken = testClusterTkn
  46. cfg.ClusterState = "existing"
  47. cfg.LCUrls, cfg.ACUrls = cURLs, cURLs
  48. cfg.LPUrls, cfg.APUrls = pURLs, pURLs
  49. cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, pURLs[0].String())
  50. cfg.Dir = filepath.Join(os.TempDir(), fmt.Sprint(time.Now().Nanosecond()))
  51. sp := NewV3(zap.NewExample())
  52. pss := make([]string, 0, len(pURLs))
  53. for _, p := range pURLs {
  54. pss = append(pss, p.String())
  55. }
  56. if err := sp.Restore(RestoreConfig{
  57. SnapshotPath: dbPath,
  58. Name: cfg.Name,
  59. OutputDataDir: cfg.Dir,
  60. InitialCluster: cfg.InitialCluster,
  61. InitialClusterToken: cfg.InitialClusterToken,
  62. PeerURLs: pss,
  63. }); err != nil {
  64. t.Fatal(err)
  65. }
  66. srv, err := embed.StartEtcd(cfg)
  67. if err != nil {
  68. t.Fatal(err)
  69. }
  70. defer func() {
  71. os.RemoveAll(cfg.Dir)
  72. srv.Close()
  73. }()
  74. select {
  75. case <-srv.Server.ReadyNotify():
  76. case <-time.After(3 * time.Second):
  77. t.Fatalf("failed to start restored etcd member")
  78. }
  79. var cli *clientv3.Client
  80. cli, err = clientv3.New(clientv3.Config{Endpoints: []string{cfg.ACUrls[0].String()}})
  81. if err != nil {
  82. t.Fatal(err)
  83. }
  84. defer cli.Close()
  85. for i := range kvs {
  86. var gresp *clientv3.GetResponse
  87. gresp, err = cli.Get(context.Background(), kvs[i].k)
  88. if err != nil {
  89. t.Fatal(err)
  90. }
  91. if string(gresp.Kvs[0].Value) != kvs[i].v {
  92. t.Fatalf("#%d: value expected %s, got %s", i, kvs[i].v, string(gresp.Kvs[0].Value))
  93. }
  94. }
  95. }
  96. // TestSnapshotV3RestoreMulti ensures that multiple members
  97. // can boot into the same cluster after being restored from a same
  98. // snapshot file.
  99. func TestSnapshotV3RestoreMulti(t *testing.T) {
  100. kvs := []kv{{"foo1", "bar1"}, {"foo2", "bar2"}, {"foo3", "bar3"}}
  101. dbPath := createSnapshotFile(t, kvs)
  102. defer os.RemoveAll(dbPath)
  103. clusterN := 3
  104. cURLs, _, srvs := restoreCluster(t, clusterN, dbPath)
  105. defer func() {
  106. for i := 0; i < clusterN; i++ {
  107. os.RemoveAll(srvs[i].Config().Dir)
  108. srvs[i].Close()
  109. }
  110. }()
  111. // wait for leader election
  112. time.Sleep(time.Second)
  113. for i := 0; i < clusterN; i++ {
  114. cli, err := clientv3.New(clientv3.Config{Endpoints: []string{cURLs[i].String()}})
  115. if err != nil {
  116. t.Fatal(err)
  117. }
  118. defer cli.Close()
  119. for i := range kvs {
  120. var gresp *clientv3.GetResponse
  121. gresp, err = cli.Get(context.Background(), kvs[i].k)
  122. if err != nil {
  123. t.Fatal(err)
  124. }
  125. if string(gresp.Kvs[0].Value) != kvs[i].v {
  126. t.Fatalf("#%d: value expected %s, got %s", i, kvs[i].v, string(gresp.Kvs[0].Value))
  127. }
  128. }
  129. }
  130. }
  131. // TestSnapshotFilePermissions ensures that the snapshot is saved with
  132. // the correct file permissions.
  133. func TestSnapshotFilePermissions(t *testing.T) {
  134. expectedFileMode := os.FileMode(fileutil.PrivateFileMode)
  135. kvs := []kv{{"foo1", "bar1"}, {"foo2", "bar2"}, {"foo3", "bar3"}}
  136. dbPath := createSnapshotFile(t, kvs)
  137. defer os.RemoveAll(dbPath)
  138. dbInfo, err := os.Stat(dbPath)
  139. if err != nil {
  140. t.Fatalf("failed to get test snapshot file status: %v", err)
  141. }
  142. actualFileMode := dbInfo.Mode()
  143. if expectedFileMode != actualFileMode {
  144. t.Fatalf("expected test snapshot file mode %s, got %s:", expectedFileMode, actualFileMode)
  145. }
  146. }
  147. // TestCorruptedBackupFileCheck tests if we can correctly identify a corrupted backup file.
  148. func TestCorruptedBackupFileCheck(t *testing.T) {
  149. dbPath := "testdata/corrupted_backup.db"
  150. if _, err := os.Stat(dbPath); err != nil {
  151. t.Fatalf("test file [%s] does not exist: %v", dbPath, err)
  152. }
  153. sp := NewV3(zap.NewExample())
  154. _, err := sp.Status(dbPath)
  155. expectedErrKeywords := "snapshot file integrity check failed"
  156. /* example error message:
  157. snapshot file integrity check failed. 2 errors found.
  158. page 3: already freed
  159. page 4: unreachable unfreed
  160. */
  161. if err == nil {
  162. t.Error("expected error due to corrupted snapshot file, got no error")
  163. }
  164. if !strings.Contains(err.Error(), expectedErrKeywords) {
  165. t.Errorf("expected error message to contain the following keywords:\n%s\n"+
  166. "actual error message:\n%s",
  167. expectedErrKeywords, err.Error())
  168. }
  169. }
  170. type kv struct {
  171. k, v string
  172. }
  173. // creates a snapshot file and returns the file path.
  174. func createSnapshotFile(t *testing.T, kvs []kv) string {
  175. clusterN := 1
  176. urls := newEmbedURLs(clusterN * 2)
  177. cURLs, pURLs := urls[:clusterN], urls[clusterN:]
  178. cfg := embed.NewConfig()
  179. cfg.Logger = "zap"
  180. cfg.LogOutputs = []string{"/dev/null"}
  181. cfg.Debug = false
  182. cfg.Name = "default"
  183. cfg.ClusterState = "new"
  184. cfg.LCUrls, cfg.ACUrls = cURLs, cURLs
  185. cfg.LPUrls, cfg.APUrls = pURLs, pURLs
  186. cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, pURLs[0].String())
  187. cfg.Dir = filepath.Join(os.TempDir(), fmt.Sprint(time.Now().Nanosecond()))
  188. srv, err := embed.StartEtcd(cfg)
  189. if err != nil {
  190. t.Fatal(err)
  191. }
  192. defer func() {
  193. os.RemoveAll(cfg.Dir)
  194. srv.Close()
  195. }()
  196. select {
  197. case <-srv.Server.ReadyNotify():
  198. case <-time.After(3 * time.Second):
  199. t.Fatalf("failed to start embed.Etcd for creating snapshots")
  200. }
  201. ccfg := clientv3.Config{Endpoints: []string{cfg.ACUrls[0].String()}}
  202. cli, err := clientv3.New(ccfg)
  203. if err != nil {
  204. t.Fatal(err)
  205. }
  206. defer cli.Close()
  207. for i := range kvs {
  208. ctx, cancel := context.WithTimeout(context.Background(), testutil.RequestTimeout)
  209. _, err = cli.Put(ctx, kvs[i].k, kvs[i].v)
  210. cancel()
  211. if err != nil {
  212. t.Fatal(err)
  213. }
  214. }
  215. sp := NewV3(zap.NewExample())
  216. dpPath := filepath.Join(os.TempDir(), fmt.Sprintf("snapshot%d.db", time.Now().Nanosecond()))
  217. if err = sp.Save(context.Background(), ccfg, dpPath); err != nil {
  218. t.Fatal(err)
  219. }
  220. os.RemoveAll(cfg.Dir)
  221. srv.Close()
  222. return dpPath
  223. }
  224. const testClusterTkn = "tkn"
  225. func restoreCluster(t *testing.T, clusterN int, dbPath string) (
  226. cURLs []url.URL,
  227. pURLs []url.URL,
  228. srvs []*embed.Etcd) {
  229. urls := newEmbedURLs(clusterN * 2)
  230. cURLs, pURLs = urls[:clusterN], urls[clusterN:]
  231. ics := ""
  232. for i := 0; i < clusterN; i++ {
  233. ics += fmt.Sprintf(",%d=%s", i, pURLs[i].String())
  234. }
  235. ics = ics[1:]
  236. cfgs := make([]*embed.Config, clusterN)
  237. for i := 0; i < clusterN; i++ {
  238. cfg := embed.NewConfig()
  239. cfg.Logger = "zap"
  240. cfg.LogOutputs = []string{"/dev/null"}
  241. cfg.Debug = false
  242. cfg.Name = fmt.Sprintf("%d", i)
  243. cfg.InitialClusterToken = testClusterTkn
  244. cfg.ClusterState = "existing"
  245. cfg.LCUrls, cfg.ACUrls = []url.URL{cURLs[i]}, []url.URL{cURLs[i]}
  246. cfg.LPUrls, cfg.APUrls = []url.URL{pURLs[i]}, []url.URL{pURLs[i]}
  247. cfg.InitialCluster = ics
  248. cfg.Dir = filepath.Join(os.TempDir(), fmt.Sprint(time.Now().Nanosecond()+i))
  249. sp := NewV3(zap.NewExample())
  250. if err := sp.Restore(RestoreConfig{
  251. SnapshotPath: dbPath,
  252. Name: cfg.Name,
  253. OutputDataDir: cfg.Dir,
  254. PeerURLs: []string{pURLs[i].String()},
  255. InitialCluster: ics,
  256. InitialClusterToken: cfg.InitialClusterToken,
  257. }); err != nil {
  258. t.Fatal(err)
  259. }
  260. cfgs[i] = cfg
  261. }
  262. sch := make(chan *embed.Etcd)
  263. for i := range cfgs {
  264. go func(idx int) {
  265. srv, err := embed.StartEtcd(cfgs[idx])
  266. if err != nil {
  267. t.Error(err)
  268. }
  269. <-srv.Server.ReadyNotify()
  270. sch <- srv
  271. }(i)
  272. }
  273. srvs = make([]*embed.Etcd, clusterN)
  274. for i := 0; i < clusterN; i++ {
  275. select {
  276. case srv := <-sch:
  277. srvs[i] = srv
  278. case <-time.After(5 * time.Second):
  279. t.Fatalf("#%d: failed to start embed.Etcd", i)
  280. }
  281. }
  282. return cURLs, pURLs, srvs
  283. }
  284. // TODO: TLS
  285. func newEmbedURLs(n int) (urls []url.URL) {
  286. urls = make([]url.URL, n)
  287. for i := 0; i < n; i++ {
  288. rand.Seed(int64(time.Now().Nanosecond()))
  289. u, _ := url.Parse(fmt.Sprintf("unix://localhost:%d", rand.Intn(45000)))
  290. urls[i] = *u
  291. }
  292. return urls
  293. }