check.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package command
  15. import (
  16. "context"
  17. "encoding/binary"
  18. "fmt"
  19. "math"
  20. "math/rand"
  21. "os"
  22. "strconv"
  23. "sync"
  24. "time"
  25. v3 "go.etcd.io/etcd/clientv3"
  26. "go.etcd.io/etcd/pkg/report"
  27. "github.com/spf13/cobra"
  28. "golang.org/x/time/rate"
  29. "gopkg.in/cheggaaa/pb.v1"
  30. )
  31. var (
  32. checkPerfLoad string
  33. checkPerfPrefix string
  34. checkDatascaleLoad string
  35. checkDatascalePrefix string
  36. autoCompact bool
  37. autoDefrag bool
  38. )
  39. type checkPerfCfg struct {
  40. limit int
  41. clients int
  42. duration int
  43. }
  44. var checkPerfCfgMap = map[string]checkPerfCfg{
  45. // TODO: support read limit
  46. "s": {
  47. limit: 150,
  48. clients: 50,
  49. duration: 60,
  50. },
  51. "m": {
  52. limit: 1000,
  53. clients: 200,
  54. duration: 60,
  55. },
  56. "l": {
  57. limit: 8000,
  58. clients: 500,
  59. duration: 60,
  60. },
  61. "xl": {
  62. limit: 15000,
  63. clients: 1000,
  64. duration: 60,
  65. },
  66. }
  67. type checkDatascaleCfg struct {
  68. limit int
  69. kvSize int
  70. clients int
  71. }
  72. var checkDatascaleCfgMap = map[string]checkDatascaleCfg{
  73. "s": {
  74. limit: 10000,
  75. kvSize: 1024,
  76. clients: 50,
  77. },
  78. "m": {
  79. limit: 100000,
  80. kvSize: 1024,
  81. clients: 200,
  82. },
  83. "l": {
  84. limit: 1000000,
  85. kvSize: 1024,
  86. clients: 500,
  87. },
  88. "xl": {
  89. // xl tries to hit the upper bound aggressively which is 3 versions of 1M objects (3M in total)
  90. limit: 3000000,
  91. kvSize: 1024,
  92. clients: 1000,
  93. },
  94. }
  95. // NewCheckCommand returns the cobra command for "check".
  96. func NewCheckCommand() *cobra.Command {
  97. cc := &cobra.Command{
  98. Use: "check <subcommand>",
  99. Short: "commands for checking properties of the etcd cluster",
  100. }
  101. cc.AddCommand(NewCheckPerfCommand())
  102. cc.AddCommand(NewCheckDatascaleCommand())
  103. return cc
  104. }
  105. // NewCheckPerfCommand returns the cobra command for "check perf".
  106. func NewCheckPerfCommand() *cobra.Command {
  107. cmd := &cobra.Command{
  108. Use: "perf [options]",
  109. Short: "Check the performance of the etcd cluster",
  110. Run: newCheckPerfCommand,
  111. }
  112. // TODO: support customized configuration
  113. cmd.Flags().StringVar(&checkPerfLoad, "load", "s", "The performance check's workload model. Accepted workloads: s(small), m(medium), l(large), xl(xLarge)")
  114. cmd.Flags().StringVar(&checkPerfPrefix, "prefix", "/etcdctl-check-perf/", "The prefix for writing the performance check's keys.")
  115. cmd.Flags().BoolVar(&autoCompact, "auto-compact", false, "Compact storage with last revision after test is finished.")
  116. cmd.Flags().BoolVar(&autoDefrag, "auto-defrag", false, "Defragment storage after test is finished.")
  117. return cmd
  118. }
  119. // newCheckPerfCommand executes the "check perf" command.
  120. func newCheckPerfCommand(cmd *cobra.Command, args []string) {
  121. var checkPerfAlias = map[string]string{
  122. "s": "s", "small": "s",
  123. "m": "m", "medium": "m",
  124. "l": "l", "large": "l",
  125. "xl": "xl", "xLarge": "xl",
  126. }
  127. model, ok := checkPerfAlias[checkPerfLoad]
  128. if !ok {
  129. ExitWithError(ExitBadFeature, fmt.Errorf("unknown load option %v", checkPerfLoad))
  130. }
  131. cfg := checkPerfCfgMap[model]
  132. requests := make(chan v3.Op, cfg.clients)
  133. limit := rate.NewLimiter(rate.Limit(cfg.limit), 1)
  134. cc := clientConfigFromCmd(cmd)
  135. clients := make([]*v3.Client, cfg.clients)
  136. for i := 0; i < cfg.clients; i++ {
  137. clients[i] = cc.mustClient()
  138. }
  139. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(cfg.duration)*time.Second)
  140. resp, err := clients[0].Get(ctx, checkPerfPrefix, v3.WithPrefix(), v3.WithLimit(1))
  141. cancel()
  142. if err != nil {
  143. ExitWithError(ExitError, err)
  144. }
  145. if len(resp.Kvs) > 0 {
  146. ExitWithError(ExitInvalidInput, fmt.Errorf("prefix %q has keys. Delete with etcdctl del --prefix %s first", checkPerfPrefix, checkPerfPrefix))
  147. }
  148. ksize, vsize := 256, 1024
  149. k, v := make([]byte, ksize), string(make([]byte, vsize))
  150. bar := pb.New(cfg.duration)
  151. bar.Format("Bom !")
  152. bar.Start()
  153. r := report.NewReport("%4.4f")
  154. var wg sync.WaitGroup
  155. wg.Add(len(clients))
  156. for i := range clients {
  157. go func(c *v3.Client) {
  158. defer wg.Done()
  159. for op := range requests {
  160. st := time.Now()
  161. _, derr := c.Do(context.Background(), op)
  162. r.Results() <- report.Result{Err: derr, Start: st, End: time.Now()}
  163. }
  164. }(clients[i])
  165. }
  166. go func() {
  167. cctx, ccancel := context.WithTimeout(context.Background(), time.Duration(cfg.duration)*time.Second)
  168. defer ccancel()
  169. for limit.Wait(cctx) == nil {
  170. binary.PutVarint(k, rand.Int63n(math.MaxInt64))
  171. requests <- v3.OpPut(checkPerfPrefix+string(k), v)
  172. }
  173. close(requests)
  174. }()
  175. go func() {
  176. for i := 0; i < cfg.duration; i++ {
  177. time.Sleep(time.Second)
  178. bar.Add(1)
  179. }
  180. bar.Finish()
  181. }()
  182. sc := r.Stats()
  183. wg.Wait()
  184. close(r.Results())
  185. s := <-sc
  186. ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
  187. dresp, err := clients[0].Delete(ctx, checkPerfPrefix, v3.WithPrefix())
  188. cancel()
  189. if err != nil {
  190. ExitWithError(ExitError, err)
  191. }
  192. if autoCompact {
  193. compact(clients[0], dresp.Header.Revision)
  194. }
  195. if autoDefrag {
  196. for _, ep := range clients[0].Endpoints() {
  197. defrag(clients[0], ep)
  198. }
  199. }
  200. ok = true
  201. if len(s.ErrorDist) != 0 {
  202. fmt.Println("FAIL: too many errors")
  203. for k, v := range s.ErrorDist {
  204. fmt.Printf("FAIL: ERROR(%v) -> %d\n", k, v)
  205. }
  206. ok = false
  207. }
  208. if s.RPS/float64(cfg.limit) <= 0.9 {
  209. fmt.Printf("FAIL: Throughput too low: %d writes/s\n", int(s.RPS)+1)
  210. ok = false
  211. } else {
  212. fmt.Printf("PASS: Throughput is %d writes/s\n", int(s.RPS)+1)
  213. }
  214. if s.Slowest > 0.5 { // slowest request > 500ms
  215. fmt.Printf("Slowest request took too long: %fs\n", s.Slowest)
  216. ok = false
  217. } else {
  218. fmt.Printf("PASS: Slowest request took %fs\n", s.Slowest)
  219. }
  220. if s.Stddev > 0.1 { // stddev > 100ms
  221. fmt.Printf("Stddev too high: %fs\n", s.Stddev)
  222. ok = false
  223. } else {
  224. fmt.Printf("PASS: Stddev is %fs\n", s.Stddev)
  225. }
  226. if ok {
  227. fmt.Println("PASS")
  228. } else {
  229. fmt.Println("FAIL")
  230. os.Exit(ExitError)
  231. }
  232. }
  233. // NewCheckDatascaleCommand returns the cobra command for "check datascale".
  234. func NewCheckDatascaleCommand() *cobra.Command {
  235. cmd := &cobra.Command{
  236. Use: "datascale [options]",
  237. Short: "Check the memory usage of holding data for different workloads on a given server endpoint.",
  238. Long: "If no endpoint is provided, localhost will be used. If multiple endpoints are provided, first endpoint will be used.",
  239. Run: newCheckDatascaleCommand,
  240. }
  241. cmd.Flags().StringVar(&checkDatascaleLoad, "load", "s", "The datascale check's workload model. Accepted workloads: s(small), m(medium), l(large), xl(xLarge)")
  242. cmd.Flags().StringVar(&checkDatascalePrefix, "prefix", "/etcdctl-check-datascale/", "The prefix for writing the datascale check's keys.")
  243. cmd.Flags().BoolVar(&autoCompact, "auto-compact", false, "Compact storage with last revision after test is finished.")
  244. cmd.Flags().BoolVar(&autoDefrag, "auto-defrag", false, "Defragment storage after test is finished.")
  245. return cmd
  246. }
  247. // newCheckDatascaleCommand executes the "check datascale" command.
  248. func newCheckDatascaleCommand(cmd *cobra.Command, args []string) {
  249. var checkDatascaleAlias = map[string]string{
  250. "s": "s", "small": "s",
  251. "m": "m", "medium": "m",
  252. "l": "l", "large": "l",
  253. "xl": "xl", "xLarge": "xl",
  254. }
  255. model, ok := checkDatascaleAlias[checkDatascaleLoad]
  256. if !ok {
  257. ExitWithError(ExitBadFeature, fmt.Errorf("unknown load option %v", checkDatascaleLoad))
  258. }
  259. cfg := checkDatascaleCfgMap[model]
  260. requests := make(chan v3.Op, cfg.clients)
  261. cc := clientConfigFromCmd(cmd)
  262. clients := make([]*v3.Client, cfg.clients)
  263. for i := 0; i < cfg.clients; i++ {
  264. clients[i] = cc.mustClient()
  265. }
  266. // get endpoints
  267. eps, errEndpoints := endpointsFromCmd(cmd)
  268. if errEndpoints != nil {
  269. ExitWithError(ExitError, errEndpoints)
  270. }
  271. ctx, cancel := context.WithCancel(context.Background())
  272. resp, err := clients[0].Get(ctx, checkDatascalePrefix, v3.WithPrefix(), v3.WithLimit(1))
  273. cancel()
  274. if err != nil {
  275. ExitWithError(ExitError, err)
  276. }
  277. if len(resp.Kvs) > 0 {
  278. ExitWithError(ExitInvalidInput, fmt.Errorf("prefix %q has keys. Delete with etcdctl del --prefix %s first", checkDatascalePrefix, checkDatascalePrefix))
  279. }
  280. ksize, vsize := 512, 512
  281. k, v := make([]byte, ksize), string(make([]byte, vsize))
  282. r := report.NewReport("%4.4f")
  283. var wg sync.WaitGroup
  284. wg.Add(len(clients))
  285. // get the process_resident_memory_bytes and process_virtual_memory_bytes before the put operations
  286. bytesBefore := endpointMemoryMetrics(eps[0])
  287. if bytesBefore == 0 {
  288. fmt.Println("FAIL: Could not read process_resident_memory_bytes before the put operations.")
  289. os.Exit(ExitError)
  290. }
  291. fmt.Println(fmt.Sprintf("Start data scale check for work load [%v key-value pairs, %v bytes per key-value, %v concurrent clients].", cfg.limit, cfg.kvSize, cfg.clients))
  292. bar := pb.New(cfg.limit)
  293. bar.Format("Bom !")
  294. bar.Start()
  295. for i := range clients {
  296. go func(c *v3.Client) {
  297. defer wg.Done()
  298. for op := range requests {
  299. st := time.Now()
  300. _, derr := c.Do(context.Background(), op)
  301. r.Results() <- report.Result{Err: derr, Start: st, End: time.Now()}
  302. bar.Increment()
  303. }
  304. }(clients[i])
  305. }
  306. go func() {
  307. for i := 0; i < cfg.limit; i++ {
  308. binary.PutVarint(k, rand.Int63n(math.MaxInt64))
  309. requests <- v3.OpPut(checkDatascalePrefix+string(k), v)
  310. }
  311. close(requests)
  312. }()
  313. sc := r.Stats()
  314. wg.Wait()
  315. close(r.Results())
  316. bar.Finish()
  317. s := <-sc
  318. // get the process_resident_memory_bytes after the put operations
  319. bytesAfter := endpointMemoryMetrics(eps[0])
  320. if bytesAfter == 0 {
  321. fmt.Println("FAIL: Could not read process_resident_memory_bytes after the put operations.")
  322. os.Exit(ExitError)
  323. }
  324. // delete the created kv pairs
  325. ctx, cancel = context.WithCancel(context.Background())
  326. dresp, derr := clients[0].Delete(ctx, checkDatascalePrefix, v3.WithPrefix())
  327. defer cancel()
  328. if derr != nil {
  329. ExitWithError(ExitError, derr)
  330. }
  331. if autoCompact {
  332. compact(clients[0], dresp.Header.Revision)
  333. }
  334. if autoDefrag {
  335. for _, ep := range clients[0].Endpoints() {
  336. defrag(clients[0], ep)
  337. }
  338. }
  339. if bytesAfter == 0 {
  340. fmt.Println("FAIL: Could not read process_resident_memory_bytes after the put operations.")
  341. os.Exit(ExitError)
  342. }
  343. bytesUsed := bytesAfter - bytesBefore
  344. mbUsed := bytesUsed / (1024 * 1024)
  345. if len(s.ErrorDist) != 0 {
  346. fmt.Println("FAIL: too many errors")
  347. for k, v := range s.ErrorDist {
  348. fmt.Printf("FAIL: ERROR(%v) -> %d\n", k, v)
  349. }
  350. os.Exit(ExitError)
  351. } else {
  352. fmt.Println(fmt.Sprintf("PASS: Approximate system memory used : %v MB.", strconv.FormatFloat(mbUsed, 'f', 2, 64)))
  353. }
  354. }