key_stresser.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package main
  15. import (
  16. "context"
  17. "fmt"
  18. "math/rand"
  19. "sync"
  20. "sync/atomic"
  21. "time"
  22. "github.com/coreos/etcd/etcdserver"
  23. "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  24. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  25. "golang.org/x/time/rate"
  26. "google.golang.org/grpc"
  27. "google.golang.org/grpc/transport"
  28. )
  29. type keyStresser struct {
  30. Endpoint string
  31. keyLargeSize int
  32. keySize int
  33. keySuffixRange int
  34. keyTxnSuffixRange int
  35. keyTxnOps int
  36. N int
  37. rateLimiter *rate.Limiter
  38. wg sync.WaitGroup
  39. cancel func()
  40. conn *grpc.ClientConn
  41. // atomicModifiedKeys records the number of keys created and deleted by the stresser.
  42. atomicModifiedKeys int64
  43. stressTable *stressTable
  44. }
  45. func (s *keyStresser) Stress() error {
  46. // TODO: add backoff option
  47. conn, err := grpc.Dial(s.Endpoint, grpc.WithInsecure())
  48. if err != nil {
  49. return fmt.Errorf("%v (%s)", err, s.Endpoint)
  50. }
  51. ctx, cancel := context.WithCancel(context.Background())
  52. s.wg.Add(s.N)
  53. s.conn = conn
  54. s.cancel = cancel
  55. kvc := pb.NewKVClient(conn)
  56. var stressEntries = []stressEntry{
  57. {weight: 0.7, f: newStressPut(kvc, s.keySuffixRange, s.keySize)},
  58. {
  59. weight: 0.7 * float32(s.keySize) / float32(s.keyLargeSize),
  60. f: newStressPut(kvc, s.keySuffixRange, s.keyLargeSize),
  61. },
  62. {weight: 0.07, f: newStressRange(kvc, s.keySuffixRange)},
  63. {weight: 0.07, f: newStressRangeInterval(kvc, s.keySuffixRange)},
  64. {weight: 0.07, f: newStressDelete(kvc, s.keySuffixRange)},
  65. {weight: 0.07, f: newStressDeleteInterval(kvc, s.keySuffixRange)},
  66. }
  67. if s.keyTxnSuffixRange > 0 {
  68. // adjust to make up ±70% of workloads with writes
  69. stressEntries[0].weight = 0.24
  70. stressEntries[1].weight = 0.24
  71. stressEntries = append(stressEntries, stressEntry{
  72. weight: 0.24,
  73. f: newStressTxn(kvc, s.keyTxnSuffixRange, s.keyTxnOps),
  74. })
  75. }
  76. s.stressTable = createStressTable(stressEntries)
  77. for i := 0; i < s.N; i++ {
  78. go s.run(ctx)
  79. }
  80. plog.Infof("keyStresser %q is started", s.Endpoint)
  81. return nil
  82. }
  83. func (s *keyStresser) run(ctx context.Context) {
  84. defer s.wg.Done()
  85. for {
  86. if err := s.rateLimiter.Wait(ctx); err == context.Canceled {
  87. return
  88. }
  89. // TODO: 10-second is enough timeout to cover leader failure
  90. // and immediate leader election. Find out what other cases this
  91. // could be timed out.
  92. sctx, scancel := context.WithTimeout(ctx, 10*time.Second)
  93. err, modifiedKeys := s.stressTable.choose()(sctx)
  94. scancel()
  95. if err == nil {
  96. atomic.AddInt64(&s.atomicModifiedKeys, modifiedKeys)
  97. continue
  98. }
  99. switch rpctypes.ErrorDesc(err) {
  100. case context.DeadlineExceeded.Error():
  101. // This retries when request is triggered at the same time as
  102. // leader failure. When we terminate the leader, the request to
  103. // that leader cannot be processed, and times out. Also requests
  104. // to followers cannot be forwarded to the old leader, so timing out
  105. // as well. We want to keep stressing until the cluster elects a
  106. // new leader and start processing requests again.
  107. case etcdserver.ErrTimeoutDueToLeaderFail.Error(), etcdserver.ErrTimeout.Error():
  108. // This retries when request is triggered at the same time as
  109. // leader failure and follower nodes receive time out errors
  110. // from losing their leader. Followers should retry to connect
  111. // to the new leader.
  112. case etcdserver.ErrStopped.Error():
  113. // one of the etcd nodes stopped from failure injection
  114. case transport.ErrConnClosing.Desc:
  115. // server closed the transport (failure injected node)
  116. case rpctypes.ErrNotCapable.Error():
  117. // capability check has not been done (in the beginning)
  118. case rpctypes.ErrTooManyRequests.Error():
  119. // hitting the recovering member.
  120. case context.Canceled.Error():
  121. // from stresser.Cancel method:
  122. return
  123. case grpc.ErrClientConnClosing.Error():
  124. // from stresser.Cancel method:
  125. return
  126. default:
  127. plog.Errorf("keyStresser %v exited with error (%v)", s.Endpoint, err)
  128. return
  129. }
  130. }
  131. }
  132. func (s *keyStresser) Pause() {
  133. s.Close()
  134. }
  135. func (s *keyStresser) Close() {
  136. s.cancel()
  137. s.conn.Close()
  138. s.wg.Wait()
  139. plog.Infof("keyStresser %q is closed", s.Endpoint)
  140. }
  141. func (s *keyStresser) ModifiedKeys() int64 {
  142. return atomic.LoadInt64(&s.atomicModifiedKeys)
  143. }
  144. func (s *keyStresser) Checker() Checker { return nil }
  145. type stressFunc func(ctx context.Context) (err error, modifiedKeys int64)
  146. type stressEntry struct {
  147. weight float32
  148. f stressFunc
  149. }
  150. type stressTable struct {
  151. entries []stressEntry
  152. sumWeights float32
  153. }
  154. func createStressTable(entries []stressEntry) *stressTable {
  155. st := stressTable{entries: entries}
  156. for _, entry := range st.entries {
  157. st.sumWeights += entry.weight
  158. }
  159. return &st
  160. }
  161. func (st *stressTable) choose() stressFunc {
  162. v := rand.Float32() * st.sumWeights
  163. var sum float32
  164. var idx int
  165. for i := range st.entries {
  166. sum += st.entries[i].weight
  167. if sum >= v {
  168. idx = i
  169. break
  170. }
  171. }
  172. return st.entries[idx].f
  173. }
  174. func newStressPut(kvc pb.KVClient, keySuffixRange, keySize int) stressFunc {
  175. return func(ctx context.Context) (error, int64) {
  176. _, err := kvc.Put(ctx, &pb.PutRequest{
  177. Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
  178. Value: randBytes(keySize),
  179. }, grpc.FailFast(false))
  180. return err, 1
  181. }
  182. }
  183. func newStressTxn(kvc pb.KVClient, keyTxnSuffixRange, txnOps int) stressFunc {
  184. keys := make([]string, keyTxnSuffixRange)
  185. for i := range keys {
  186. keys[i] = fmt.Sprintf("/k%03d", i)
  187. }
  188. return writeTxn(kvc, keys, txnOps)
  189. }
  190. func writeTxn(kvc pb.KVClient, keys []string, txnOps int) stressFunc {
  191. return func(ctx context.Context) (error, int64) {
  192. ks := make(map[string]struct{}, txnOps)
  193. for len(ks) != txnOps {
  194. ks[keys[rand.Intn(len(keys))]] = struct{}{}
  195. }
  196. selected := make([]string, 0, txnOps)
  197. for k := range ks {
  198. selected = append(selected, k)
  199. }
  200. com, delOp, putOp := getTxnReqs(selected[0], "bar00")
  201. txnReq := &pb.TxnRequest{
  202. Compare: []*pb.Compare{com},
  203. Success: []*pb.RequestOp{delOp},
  204. Failure: []*pb.RequestOp{putOp},
  205. }
  206. // add nested txns if any
  207. for i := 1; i < txnOps; i++ {
  208. k, v := selected[i], fmt.Sprintf("bar%02d", i)
  209. com, delOp, putOp = getTxnReqs(k, v)
  210. nested := &pb.RequestOp{
  211. Request: &pb.RequestOp_RequestTxn{
  212. RequestTxn: &pb.TxnRequest{
  213. Compare: []*pb.Compare{com},
  214. Success: []*pb.RequestOp{delOp},
  215. Failure: []*pb.RequestOp{putOp},
  216. },
  217. },
  218. }
  219. txnReq.Success = append(txnReq.Success, nested)
  220. txnReq.Failure = append(txnReq.Failure, nested)
  221. }
  222. _, err := kvc.Txn(ctx, txnReq, grpc.FailFast(false))
  223. return err, int64(txnOps)
  224. }
  225. }
  226. func getTxnReqs(key, val string) (com *pb.Compare, delOp *pb.RequestOp, putOp *pb.RequestOp) {
  227. // if key exists (version > 0)
  228. com = &pb.Compare{
  229. Key: []byte(key),
  230. Target: pb.Compare_VERSION,
  231. Result: pb.Compare_GREATER,
  232. TargetUnion: &pb.Compare_Version{Version: 0},
  233. }
  234. delOp = &pb.RequestOp{
  235. Request: &pb.RequestOp_RequestDeleteRange{
  236. RequestDeleteRange: &pb.DeleteRangeRequest{
  237. Key: []byte(key),
  238. },
  239. },
  240. }
  241. putOp = &pb.RequestOp{
  242. Request: &pb.RequestOp_RequestPut{
  243. RequestPut: &pb.PutRequest{
  244. Key: []byte(key),
  245. Value: []byte(val),
  246. },
  247. },
  248. }
  249. return com, delOp, putOp
  250. }
  251. func newStressRange(kvc pb.KVClient, keySuffixRange int) stressFunc {
  252. return func(ctx context.Context) (error, int64) {
  253. _, err := kvc.Range(ctx, &pb.RangeRequest{
  254. Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
  255. }, grpc.FailFast(false))
  256. return err, 0
  257. }
  258. }
  259. func newStressRangeInterval(kvc pb.KVClient, keySuffixRange int) stressFunc {
  260. return func(ctx context.Context) (error, int64) {
  261. start := rand.Intn(keySuffixRange)
  262. end := start + 500
  263. _, err := kvc.Range(ctx, &pb.RangeRequest{
  264. Key: []byte(fmt.Sprintf("foo%016x", start)),
  265. RangeEnd: []byte(fmt.Sprintf("foo%016x", end)),
  266. }, grpc.FailFast(false))
  267. return err, 0
  268. }
  269. }
  270. func newStressDelete(kvc pb.KVClient, keySuffixRange int) stressFunc {
  271. return func(ctx context.Context) (error, int64) {
  272. _, err := kvc.DeleteRange(ctx, &pb.DeleteRangeRequest{
  273. Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
  274. }, grpc.FailFast(false))
  275. return err, 1
  276. }
  277. }
  278. func newStressDeleteInterval(kvc pb.KVClient, keySuffixRange int) stressFunc {
  279. return func(ctx context.Context) (error, int64) {
  280. start := rand.Intn(keySuffixRange)
  281. end := start + 500
  282. resp, err := kvc.DeleteRange(ctx, &pb.DeleteRangeRequest{
  283. Key: []byte(fmt.Sprintf("foo%016x", start)),
  284. RangeEnd: []byte(fmt.Sprintf("foo%016x", end)),
  285. }, grpc.FailFast(false))
  286. if err == nil {
  287. return nil, resp.Deleted
  288. }
  289. return err, 0
  290. }
  291. }