|
|
@@ -0,0 +1,182 @@
|
|
|
+// Copyright 2018 The etcd Authors
|
|
|
+//
|
|
|
+// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
+// you may not use this file except in compliance with the License.
|
|
|
+// You may obtain a copy of the License at
|
|
|
+//
|
|
|
+// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+//
|
|
|
+// Unless required by applicable law or agreed to in writing, software
|
|
|
+// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+// See the License for the specific language governing permissions and
|
|
|
+// limitations under the License.
|
|
|
+
|
|
|
+package tester
|
|
|
+
|
|
|
+import (
|
|
|
+ "context"
|
|
|
+ "fmt"
|
|
|
+ "sort"
|
|
|
+ "strings"
|
|
|
+ "time"
|
|
|
+
|
|
|
+ "github.com/coreos/etcd/clientv3"
|
|
|
+ "github.com/coreos/etcd/functional/rpcpb"
|
|
|
+ "go.uber.org/zap"
|
|
|
+)
|
|
|
+
|
|
|
+func inject_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
|
|
|
+ cli1, err := clus.Members[idx1].CreateEtcdClient()
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ defer cli1.Close()
|
|
|
+
|
|
|
+ var mresp *clientv3.MemberListResponse
|
|
|
+ mresp, err = cli1.MemberList(context.Background())
|
|
|
+ mss := []string{}
|
|
|
+ if err == nil && mresp != nil {
|
|
|
+ mss = describeMembers(mresp)
|
|
|
+ }
|
|
|
+ clus.lg.Info(
|
|
|
+ "member list before disastrous machine failure",
|
|
|
+ zap.String("request-to", clus.Members[idx1].EtcdClientEndpoint),
|
|
|
+ zap.Strings("members", mss),
|
|
|
+ zap.Error(err),
|
|
|
+ )
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ sresp, serr := cli1.Status(context.Background(), clus.Members[idx1].EtcdClientEndpoint)
|
|
|
+ if serr != nil {
|
|
|
+ return serr
|
|
|
+ }
|
|
|
+ id1 := sresp.Header.MemberId
|
|
|
+ is1 := fmt.Sprintf("%016x", id1)
|
|
|
+
|
|
|
+ err = clus.sendOp(idx1, rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA)
|
|
|
+ clus.lg.Info(
|
|
|
+ "disastrous machine failure",
|
|
|
+ zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
|
|
|
+ zap.String("target-member-id", is1),
|
|
|
+ zap.Error(err),
|
|
|
+ )
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ time.Sleep(3 * time.Second)
|
|
|
+
|
|
|
+ idx2 := (idx1 + 1) % len(clus.Members)
|
|
|
+ var cli2 *clientv3.Client
|
|
|
+ cli2, err = clus.Members[idx2].CreateEtcdClient()
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ defer cli2.Close()
|
|
|
+
|
|
|
+ _, err = cli2.MemberRemove(context.Background(), id1)
|
|
|
+ clus.lg.Info(
|
|
|
+ "member remove after disaster",
|
|
|
+ zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
|
|
|
+ zap.String("target-member-id", is1),
|
|
|
+ zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
|
|
|
+ zap.Error(err),
|
|
|
+ )
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ time.Sleep(5 * time.Second)
|
|
|
+
|
|
|
+ mresp, err = cli2.MemberList(context.Background())
|
|
|
+ mss = []string{}
|
|
|
+ if err == nil && mresp != nil {
|
|
|
+ mss = describeMembers(mresp)
|
|
|
+ }
|
|
|
+ clus.lg.Info(
|
|
|
+ "member list after member remove",
|
|
|
+ zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
|
|
|
+ zap.Strings("members", mss),
|
|
|
+ zap.Error(err),
|
|
|
+ )
|
|
|
+ return err
|
|
|
+}
|
|
|
+
|
|
|
+func recover_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
|
|
|
+ idx2 := (idx1 + 1) % len(clus.Members)
|
|
|
+ cli2, err := clus.Members[idx2].CreateEtcdClient()
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ defer cli2.Close()
|
|
|
+
|
|
|
+ _, err = cli2.MemberAdd(context.Background(), clus.Members[idx1].Etcd.AdvertisePeerURLs)
|
|
|
+ clus.lg.Info(
|
|
|
+ "member add before fresh restart",
|
|
|
+ zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
|
|
|
+ zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
|
|
|
+ zap.Error(err),
|
|
|
+ )
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ time.Sleep(3 * time.Second)
|
|
|
+
|
|
|
+ clus.Members[idx1].Etcd.InitialClusterState = "existing"
|
|
|
+ err = clus.sendOp(idx1, rpcpb.Operation_RESTART_ETCD)
|
|
|
+ clus.lg.Info(
|
|
|
+ "fresh restart after member add",
|
|
|
+ zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
|
|
|
+ zap.Error(err),
|
|
|
+ )
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ time.Sleep(3 * time.Second)
|
|
|
+
|
|
|
+ var mresp *clientv3.MemberListResponse
|
|
|
+ mresp, err = cli2.MemberList(context.Background())
|
|
|
+ mss := []string{}
|
|
|
+ if err == nil && mresp != nil {
|
|
|
+ mss = describeMembers(mresp)
|
|
|
+ }
|
|
|
+ clus.lg.Info(
|
|
|
+ "member list after member add",
|
|
|
+ zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
|
|
|
+ zap.Strings("members", mss),
|
|
|
+ zap.Error(err),
|
|
|
+ )
|
|
|
+ return err
|
|
|
+}
|
|
|
+
|
|
|
+func new_FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus *Cluster) Failure {
|
|
|
+ ff := failureByFunc{
|
|
|
+ failureCase: rpcpb.FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER,
|
|
|
+ injectMember: inject_SIGQUIT_ETCD_AND_REMOVE_DATA,
|
|
|
+ recoverMember: recover_SIGQUIT_ETCD_AND_REMOVE_DATA,
|
|
|
+ }
|
|
|
+ f := &failureFollower{ff, -1, -1}
|
|
|
+ return &failureDelay{
|
|
|
+ Failure: f,
|
|
|
+ delayDuration: clus.GetFailureDelayDuration(),
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func describeMembers(mresp *clientv3.MemberListResponse) (ss []string) {
|
|
|
+ ss = make([]string, len(mresp.Members))
|
|
|
+ for i, m := range mresp.Members {
|
|
|
+ ss[i] = fmt.Sprintf("Name %s / ID %016x / ClientURLs %s / PeerURLs %s",
|
|
|
+ m.Name,
|
|
|
+ m.ID,
|
|
|
+ strings.Join(m.ClientURLs, ","),
|
|
|
+ strings.Join(m.PeerURLs, ","),
|
|
|
+ )
|
|
|
+ }
|
|
|
+ sort.Strings(ss)
|
|
|
+ return ss
|
|
|
+}
|