// Copyright 2018 The etcd Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package tester import ( "fmt" "io/ioutil" "net/http" "strings" "sync" "go.etcd.io/etcd/functional/rpcpb" ) type failpointStats struct { mu sync.Mutex // crashes counts the number of crashes for a failpoint crashes map[string]int } var fpStats failpointStats func failpointFailures(clus *Cluster) (ret []Case, err error) { var fps []string fps, err = failpointPaths(clus.Members[0].FailpointHTTPAddr) if err != nil { return nil, err } // create failure objects for all failpoints for _, fp := range fps { if len(fp) == 0 { continue } fpFails := casesFromFailpoint(fp, clus.Tester.FailpointCommands) // wrap in delays so failpoint has time to trigger for i, fpf := range fpFails { if strings.Contains(fp, "Snap") { // hack to trigger snapshot failpoints fpFails[i] = &caseUntilSnapshot{ desc: fpf.Desc(), rpcpbCase: rpcpb.Case_FAILPOINTS, Case: fpf, } } else { fpFails[i] = &caseDelay{ Case: fpf, delayDuration: clus.GetCaseDelayDuration(), } } } ret = append(ret, fpFails...) } fpStats.crashes = make(map[string]int) return ret, err } func failpointPaths(endpoint string) ([]string, error) { resp, err := http.Get(endpoint) if err != nil { return nil, err } defer resp.Body.Close() body, rerr := ioutil.ReadAll(resp.Body) if rerr != nil { return nil, rerr } var fps []string for _, l := range strings.Split(string(body), "\n") { fp := strings.Split(l, "=")[0] fps = append(fps, fp) } return fps, nil } // failpoints follows FreeBSD FAIL_POINT syntax. // e.g. panic("etcd-tester"),1*sleep(1000)->panic("etcd-tester") func casesFromFailpoint(fp string, failpointCommands []string) (fs []Case) { recov := makeRecoverFailpoint(fp) for _, fcmd := range failpointCommands { inject := makeInjectFailpoint(fp, fcmd) fs = append(fs, []Case{ &caseFollower{ caseByFunc: caseByFunc{ desc: fmt.Sprintf("failpoint %q (one: %q)", fp, fcmd), rpcpbCase: rpcpb.Case_FAILPOINTS, injectMember: inject, recoverMember: recov, }, last: -1, lead: -1, }, &caseLeader{ caseByFunc: caseByFunc{ desc: fmt.Sprintf("failpoint %q (leader: %q)", fp, fcmd), rpcpbCase: rpcpb.Case_FAILPOINTS, injectMember: inject, recoverMember: recov, }, last: -1, lead: -1, }, &caseQuorum{ caseByFunc: caseByFunc{ desc: fmt.Sprintf("failpoint %q (quorum: %q)", fp, fcmd), rpcpbCase: rpcpb.Case_FAILPOINTS, injectMember: inject, recoverMember: recov, }, injected: make(map[int]struct{}), }, &caseAll{ desc: fmt.Sprintf("failpoint %q (all: %q)", fp, fcmd), rpcpbCase: rpcpb.Case_FAILPOINTS, injectMember: inject, recoverMember: recov, }, }...) } return fs } func makeInjectFailpoint(fp, val string) injectMemberFunc { return func(clus *Cluster, idx int) (err error) { return putFailpoint(clus.Members[idx].FailpointHTTPAddr, fp, val) } } func makeRecoverFailpoint(fp string) recoverMemberFunc { return func(clus *Cluster, idx int) error { if err := delFailpoint(clus.Members[idx].FailpointHTTPAddr, fp); err == nil { return nil } // node not responding, likely dead from fp panic; restart fpStats.mu.Lock() fpStats.crashes[fp]++ fpStats.mu.Unlock() return recover_SIGTERM_ETCD(clus, idx) } } func putFailpoint(ep, fp, val string) error { req, _ := http.NewRequest(http.MethodPut, ep+"/"+fp, strings.NewReader(val)) c := http.Client{} resp, err := c.Do(req) if err != nil { return err } resp.Body.Close() if resp.StatusCode/100 != 2 { return fmt.Errorf("failed to PUT %s=%s at %s (%v)", fp, val, ep, resp.Status) } return nil } func delFailpoint(ep, fp string) error { req, _ := http.NewRequest(http.MethodDelete, ep+"/"+fp, strings.NewReader("")) c := http.Client{} resp, err := c.Do(req) if err != nil { return err } resp.Body.Close() if resp.StatusCode/100 != 2 { return fmt.Errorf("failed to DELETE %s at %s (%v)", fp, ep, resp.Status) } return nil }