main.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package main
  15. import (
  16. "flag"
  17. "fmt"
  18. "io/ioutil"
  19. "net/http"
  20. "os"
  21. "strings"
  22. "github.com/coreos/etcd/pkg/debugutil"
  23. "github.com/coreos/pkg/capnslog"
  24. "github.com/prometheus/client_golang/prometheus/promhttp"
  25. "golang.org/x/time/rate"
  26. "google.golang.org/grpc/grpclog"
  27. )
  28. var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-tester")
  29. const (
  30. defaultClientPort = 2379
  31. defaultPeerPort = 2380
  32. defaultFailpointPort = 2381
  33. )
  34. func main() {
  35. endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.")
  36. clientPorts := flag.String("client-ports", "", "etcd client port for each agent endpoint")
  37. advertiseClientPorts := flag.String("advertise-client-ports", "", "etcd advertise client port for each agent endpoint")
  38. peerPorts := flag.String("peer-ports", "", "etcd peer port for each agent endpoint")
  39. advertisePeerPorts := flag.String("advertise-peer-ports", "", "etcd advertise peer port for each agent endpoint")
  40. failpointPorts := flag.String("failpoint-ports", "", "etcd failpoint port for each agent endpoint")
  41. stressKeyLargeSize := flag.Uint("stress-key-large-size", 32*1024+1, "the size of each large key written into etcd.")
  42. stressKeySize := flag.Uint("stress-key-size", 100, "the size of each small key written into etcd.")
  43. stressKeySuffixRange := flag.Uint("stress-key-count", 250000, "the count of key range written into etcd.")
  44. stressKeyTxnSuffixRange := flag.Uint("stress-key-txn-count", 100, "the count of key range written into etcd txn (max 100).")
  45. stressKeyTxnOps := flag.Uint("stress-key-txn-ops", 1, "number of operations per a transaction (max 64).")
  46. limit := flag.Int("limit", -1, "the limit of rounds to run failure set (-1 to run without limits).")
  47. exitOnFailure := flag.Bool("exit-on-failure", false, "exit tester on first failure")
  48. stressQPS := flag.Int("stress-qps", 10000, "maximum number of stresser requests per second.")
  49. schedCases := flag.String("schedule-cases", "", "test case schedule")
  50. consistencyCheck := flag.Bool("consistency-check", true, "true to check consistency (revision, hash)")
  51. stresserType := flag.String("stresser", "keys,lease", "comma separated list of stressers (keys, lease, v2keys, nop, election-runner, watch-runner, lock-racer-runner, lease-runner).")
  52. etcdRunnerPath := flag.String("etcd-runner", "", "specify a path of etcd runner binary")
  53. failureTypes := flag.String("failures", "default,failpoints", "specify failures (concat of \"default\" and \"failpoints\").")
  54. failpoints := flag.String("failpoints", `panic("etcd-tester")`, `comma separated list of failpoint terms to inject (e.g. 'panic("etcd-tester"),1*sleep(1000)')`)
  55. externalFailures := flag.String("external-failures", "", "specify a path of script for enabling/disabling an external fault injector")
  56. enablePprof := flag.Bool("enable-pprof", false, "true to enable pprof")
  57. flag.Parse()
  58. // to discard gRPC-side balancer logs
  59. grpclog.SetLoggerV2(grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard))
  60. eps := strings.Split(*endpointStr, ",")
  61. cports := portsFromArg(*clientPorts, len(eps), defaultClientPort)
  62. acports := portsFromArg(*advertiseClientPorts, len(eps), defaultClientPort)
  63. pports := portsFromArg(*peerPorts, len(eps), defaultPeerPort)
  64. apports := portsFromArg(*advertisePeerPorts, len(eps), defaultPeerPort)
  65. fports := portsFromArg(*failpointPorts, len(eps), defaultFailpointPort)
  66. agents := make([]agentConfig, len(eps))
  67. for i := range eps {
  68. agents[i].endpoint = eps[i]
  69. agents[i].clientPort = cports[i]
  70. agents[i].advertiseClientPort = acports[i]
  71. agents[i].peerPort = pports[i]
  72. agents[i].advertisePeerPort = apports[i]
  73. agents[i].failpointPort = fports[i]
  74. }
  75. c := &cluster{agents: agents}
  76. if err := c.bootstrap(); err != nil {
  77. plog.Fatal(err)
  78. }
  79. defer c.Terminate()
  80. // ensure cluster is fully booted to know failpoints are available
  81. c.WaitHealth()
  82. var failures []failure
  83. if failureTypes != nil && *failureTypes != "" {
  84. types, failpoints := strings.Split(*failureTypes, ","), strings.Split(*failpoints, ",")
  85. failures = makeFailures(types, failpoints, c)
  86. }
  87. if externalFailures != nil && *externalFailures != "" {
  88. if len(failures) != 0 {
  89. plog.Errorf("specify only one of -failures or -external-failures")
  90. os.Exit(1)
  91. }
  92. failures = append(failures, newFailureExternal(*externalFailures))
  93. }
  94. if len(failures) == 0 {
  95. plog.Infof("no failures\n")
  96. failures = append(failures, newFailureNop())
  97. }
  98. schedule := failures
  99. if schedCases != nil && *schedCases != "" {
  100. cases := strings.Split(*schedCases, " ")
  101. schedule = make([]failure, len(cases))
  102. for i := range cases {
  103. caseNum := 0
  104. n, err := fmt.Sscanf(cases[i], "%d", &caseNum)
  105. if n == 0 || err != nil {
  106. plog.Fatalf(`couldn't parse case "%s" (%v)`, cases[i], err)
  107. }
  108. schedule[i] = failures[caseNum]
  109. }
  110. }
  111. scfg := stressConfig{
  112. rateLimiter: rate.NewLimiter(rate.Limit(*stressQPS), *stressQPS),
  113. keyLargeSize: int(*stressKeyLargeSize),
  114. keySize: int(*stressKeySize),
  115. keySuffixRange: int(*stressKeySuffixRange),
  116. keyTxnSuffixRange: int(*stressKeyTxnSuffixRange),
  117. keyTxnOps: int(*stressKeyTxnOps),
  118. numLeases: 10,
  119. keysPerLease: 10,
  120. etcdRunnerPath: *etcdRunnerPath,
  121. }
  122. if scfg.keyTxnSuffixRange > 100 {
  123. plog.Fatalf("stress-key-txn-count is maximum 100, got %d", scfg.keyTxnSuffixRange)
  124. }
  125. if scfg.keyTxnOps > 64 {
  126. plog.Fatalf("stress-key-txn-ops is maximum 64, got %d", scfg.keyTxnOps)
  127. }
  128. t := &tester{
  129. failures: schedule,
  130. cluster: c,
  131. limit: *limit,
  132. exitOnFailure: *exitOnFailure,
  133. scfg: scfg,
  134. stresserType: *stresserType,
  135. doChecks: *consistencyCheck,
  136. }
  137. sh := statusHandler{status: &t.status}
  138. http.Handle("/status", sh)
  139. http.Handle("/metrics", promhttp.Handler())
  140. if *enablePprof {
  141. for p, h := range debugutil.PProfHandlers() {
  142. http.Handle(p, h)
  143. }
  144. }
  145. go func() { plog.Fatal(http.ListenAndServe(":9028", nil)) }()
  146. t.runLoop()
  147. }
  148. // portsFromArg converts a comma separated list into a slice of ints
  149. func portsFromArg(arg string, n, defaultPort int) []int {
  150. ret := make([]int, n)
  151. if len(arg) == 0 {
  152. for i := range ret {
  153. ret[i] = defaultPort
  154. }
  155. return ret
  156. }
  157. s := strings.Split(arg, ",")
  158. if len(s) != n {
  159. fmt.Printf("expected %d ports, got %d (%s)\n", n, len(s), arg)
  160. os.Exit(1)
  161. }
  162. for i := range s {
  163. if _, err := fmt.Sscanf(s[i], "%d", &ret[i]); err != nil {
  164. fmt.Println(err)
  165. os.Exit(1)
  166. }
  167. }
  168. return ret
  169. }
  170. func makeFailures(types, failpoints []string, c *cluster) []failure {
  171. var failures []failure
  172. for i := range types {
  173. switch types[i] {
  174. case "default":
  175. defaultFailures := []failure{
  176. newFailureKillAll(),
  177. newFailureKillMajority(),
  178. newFailureKillOne(),
  179. newFailureKillLeader(),
  180. newFailureKillOneForLongTime(),
  181. newFailureKillLeaderForLongTime(),
  182. newFailureIsolate(),
  183. newFailureIsolateAll(),
  184. newFailureSlowNetworkOneMember(),
  185. newFailureSlowNetworkLeader(),
  186. newFailureSlowNetworkAll(),
  187. }
  188. failures = append(failures, defaultFailures...)
  189. case "failpoints":
  190. fpFailures, fperr := failpointFailures(c, failpoints)
  191. if len(fpFailures) == 0 {
  192. plog.Infof("no failpoints found (%v)", fperr)
  193. }
  194. failures = append(failures, fpFailures...)
  195. default:
  196. plog.Errorf("unknown failure: %s\n", types[i])
  197. os.Exit(1)
  198. }
  199. }
  200. return failures
  201. }