check.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package command
  15. import (
  16. "context"
  17. "encoding/binary"
  18. "fmt"
  19. "math"
  20. "math/rand"
  21. "os"
  22. "strconv"
  23. "sync"
  24. "time"
  25. v3 "github.com/coreos/etcd/clientv3"
  26. "github.com/coreos/etcd/pkg/report"
  27. "github.com/spf13/cobra"
  28. "golang.org/x/time/rate"
  29. "gopkg.in/cheggaaa/pb.v1"
  30. )
  31. var (
  32. checkPerfLoad string
  33. checkPerfPrefix string
  34. checkPerfAutoCompact bool
  35. checkPerfAutoDefrag bool
  36. checkDatascaleLoad string
  37. checkDatascalePrefix string
  38. )
  39. type checkPerfCfg struct {
  40. limit int
  41. clients int
  42. duration int
  43. }
  44. var checkPerfCfgMap = map[string]checkPerfCfg{
  45. // TODO: support read limit
  46. "s": {
  47. limit: 150,
  48. clients: 50,
  49. duration: 60,
  50. },
  51. "m": {
  52. limit: 1000,
  53. clients: 200,
  54. duration: 60,
  55. },
  56. "l": {
  57. limit: 8000,
  58. clients: 500,
  59. duration: 60,
  60. },
  61. "xl": {
  62. limit: 15000,
  63. clients: 1000,
  64. duration: 60,
  65. },
  66. }
  67. type checkDatascaleCfg struct {
  68. limit int
  69. kvSize int
  70. clients int
  71. }
  72. var checkDatascaleCfgMap = map[string]checkDatascaleCfg{
  73. "s": {
  74. limit: 10000,
  75. kvSize: 1024,
  76. clients: 50,
  77. },
  78. "m": {
  79. limit: 100000,
  80. kvSize: 1024,
  81. clients: 200,
  82. },
  83. "l": {
  84. limit: 1000000,
  85. kvSize: 1024,
  86. clients: 500,
  87. },
  88. "xl": {
  89. // xl tries to hit the upper bound aggressively which is 3 versions of 1M objects (3M in total)
  90. limit: 30000000,
  91. kvSize: 1024,
  92. clients: 1000,
  93. },
  94. }
  95. // NewCheckCommand returns the cobra command for "check".
  96. func NewCheckCommand() *cobra.Command {
  97. cc := &cobra.Command{
  98. Use: "check <subcommand>",
  99. Short: "commands for checking properties of the etcd cluster",
  100. }
  101. cc.AddCommand(NewCheckPerfCommand())
  102. cc.AddCommand(NewCheckDatascaleCommand())
  103. return cc
  104. }
  105. // NewCheckPerfCommand returns the cobra command for "check perf".
  106. func NewCheckPerfCommand() *cobra.Command {
  107. cmd := &cobra.Command{
  108. Use: "perf [options]",
  109. Short: "Check the performance of the etcd cluster",
  110. Run: newCheckPerfCommand,
  111. }
  112. // TODO: support customized configuration
  113. cmd.Flags().StringVar(&checkPerfLoad, "load", "s", "The performance check's workload model. Accepted workloads: s(small), m(medium), l(large), xl(xLarge)")
  114. cmd.Flags().StringVar(&checkPerfPrefix, "prefix", "/etcdctl-check-perf/", "The prefix for writing the performance check's keys.")
  115. cmd.Flags().BoolVar(&checkPerfAutoCompact, "auto-compact", false, "Compact storage with last revision after test is finished.")
  116. cmd.Flags().BoolVar(&checkPerfAutoDefrag, "auto-defrag", false, "Defragment storage after test is finished.")
  117. return cmd
  118. }
  119. // newCheckPerfCommand executes the "check perf" command.
  120. func newCheckPerfCommand(cmd *cobra.Command, args []string) {
  121. var checkPerfAlias = map[string]string{
  122. "s": "s", "small": "s",
  123. "m": "m", "medium": "m",
  124. "l": "l", "large": "l",
  125. "xl": "xl", "xLarge": "xl",
  126. }
  127. model, ok := checkPerfAlias[checkPerfLoad]
  128. if !ok {
  129. ExitWithError(ExitBadFeature, fmt.Errorf("unknown load option %v", checkPerfLoad))
  130. }
  131. cfg := checkPerfCfgMap[model]
  132. requests := make(chan v3.Op, cfg.clients)
  133. limit := rate.NewLimiter(rate.Limit(cfg.limit), 1)
  134. cc := clientConfigFromCmd(cmd)
  135. clients := make([]*v3.Client, cfg.clients)
  136. for i := 0; i < cfg.clients; i++ {
  137. clients[i] = cc.mustClient()
  138. }
  139. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(cfg.duration)*time.Second)
  140. resp, err := clients[0].Get(ctx, checkPerfPrefix, v3.WithPrefix(), v3.WithLimit(1))
  141. cancel()
  142. if err != nil {
  143. ExitWithError(ExitError, err)
  144. }
  145. if len(resp.Kvs) > 0 {
  146. ExitWithError(ExitInvalidInput, fmt.Errorf("prefix %q has keys. Delete with etcdctl del --prefix %s first.", checkPerfPrefix, checkPerfPrefix))
  147. }
  148. ksize, vsize := 256, 1024
  149. k, v := make([]byte, ksize), string(make([]byte, vsize))
  150. bar := pb.New(cfg.duration)
  151. bar.Format("Bom !")
  152. bar.Start()
  153. r := report.NewReport("%4.4f")
  154. var wg sync.WaitGroup
  155. wg.Add(len(clients))
  156. for i := range clients {
  157. go func(c *v3.Client) {
  158. defer wg.Done()
  159. for op := range requests {
  160. st := time.Now()
  161. _, derr := c.Do(context.Background(), op)
  162. r.Results() <- report.Result{Err: derr, Start: st, End: time.Now()}
  163. }
  164. }(clients[i])
  165. }
  166. go func() {
  167. cctx, ccancel := context.WithTimeout(context.Background(), time.Duration(cfg.duration)*time.Second)
  168. defer ccancel()
  169. for limit.Wait(cctx) == nil {
  170. binary.PutVarint(k, int64(rand.Int63n(math.MaxInt64)))
  171. requests <- v3.OpPut(checkPerfPrefix+string(k), v)
  172. }
  173. close(requests)
  174. }()
  175. go func() {
  176. for i := 0; i < cfg.duration; i++ {
  177. time.Sleep(time.Second)
  178. bar.Add(1)
  179. }
  180. bar.Finish()
  181. }()
  182. sc := r.Stats()
  183. wg.Wait()
  184. close(r.Results())
  185. s := <-sc
  186. ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
  187. dresp, err := clients[0].Delete(ctx, checkPerfPrefix, v3.WithPrefix())
  188. cancel()
  189. if err != nil {
  190. ExitWithError(ExitError, err)
  191. }
  192. if checkPerfAutoCompact {
  193. compact(clients[0], dresp.Header.Revision)
  194. }
  195. if checkPerfAutoDefrag {
  196. for _, ep := range clients[0].Endpoints() {
  197. defrag(clients[0], ep)
  198. }
  199. }
  200. ok = true
  201. if len(s.ErrorDist) != 0 {
  202. fmt.Println("FAIL: too many errors")
  203. for k, v := range s.ErrorDist {
  204. fmt.Printf("FAIL: ERROR(%v) -> %d\n", k, v)
  205. }
  206. ok = false
  207. }
  208. if s.RPS/float64(cfg.limit) <= 0.9 {
  209. fmt.Printf("FAIL: Throughput too low: %d writes/s\n", int(s.RPS)+1)
  210. ok = false
  211. } else {
  212. fmt.Printf("PASS: Throughput is %d writes/s\n", int(s.RPS)+1)
  213. }
  214. if s.Slowest > 0.5 { // slowest request > 500ms
  215. fmt.Printf("Slowest request took too long: %fs\n", s.Slowest)
  216. ok = false
  217. } else {
  218. fmt.Printf("PASS: Slowest request took %fs\n", s.Slowest)
  219. }
  220. if s.Stddev > 0.1 { // stddev > 100ms
  221. fmt.Printf("Stddev too high: %fs\n", s.Stddev)
  222. ok = false
  223. } else {
  224. fmt.Printf("PASS: Stddev is %fs\n", s.Stddev)
  225. }
  226. if ok {
  227. fmt.Println("PASS")
  228. } else {
  229. fmt.Println("FAIL")
  230. os.Exit(ExitError)
  231. }
  232. }
  233. func compact(c *v3.Client, rev int64) {
  234. fmt.Printf("Compacting with revision %d\n", rev)
  235. ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
  236. _, err := c.Compact(ctx, rev, v3.WithCompactPhysical())
  237. cancel()
  238. if err != nil {
  239. ExitWithError(ExitError, err)
  240. }
  241. fmt.Printf("Compacted with revision %d\n", rev)
  242. }
  243. func defrag(c *v3.Client, ep string) {
  244. fmt.Printf("Defragmenting %q\n", ep)
  245. ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
  246. _, err := c.Defragment(ctx, ep)
  247. cancel()
  248. if err != nil {
  249. ExitWithError(ExitError, err)
  250. }
  251. fmt.Printf("Defragmented %q\n", ep)
  252. }
  253. // NewCheckDatascaleCommand returns the cobra command for "check datascale".
  254. func NewCheckDatascaleCommand() *cobra.Command {
  255. cmd := &cobra.Command{
  256. Use: "datascale [options]",
  257. Short: "Check the memory usage of holding data for diferent workloads on a given server endpoint.",
  258. Long: "If no endpoint is provided, localhost will be used. If multiple endpoints are provided, first endpoint will be used.",
  259. Run: newCheckDatascaleCommand,
  260. }
  261. cmd.Flags().StringVar(&checkDatascaleLoad, "load", "s", "The datascale check's workload model. Accepted workloads: s(small), m(medium), l(large), xl(xLarge)")
  262. cmd.Flags().StringVar(&checkDatascalePrefix, "prefix", "/etcdctl-check-datascale/", "The prefix for writing the datascale check's keys.")
  263. return cmd
  264. }
  265. // newCheckDatascaleCommand executes the "check datascale" command.
  266. func newCheckDatascaleCommand(cmd *cobra.Command, args []string) {
  267. var checkDatascaleAlias = map[string]string{
  268. "s": "s", "small": "s",
  269. "m": "m", "medium": "m",
  270. "l": "l", "large": "l",
  271. "xl": "xl", "xLarge": "xl",
  272. }
  273. model, ok := checkDatascaleAlias[checkDatascaleLoad]
  274. if !ok {
  275. ExitWithError(ExitBadFeature, fmt.Errorf("unknown load option %v", checkDatascaleLoad))
  276. }
  277. cfg := checkDatascaleCfgMap[model]
  278. requests := make(chan v3.Op, cfg.clients)
  279. cc := clientConfigFromCmd(cmd)
  280. clients := make([]*v3.Client, cfg.clients)
  281. for i := 0; i < cfg.clients; i++ {
  282. clients[i] = cc.mustClient()
  283. }
  284. // get endpoints
  285. eps, errEndpoints := endpointsFromCmd(cmd)
  286. if errEndpoints != nil {
  287. ExitWithError(ExitError, errEndpoints)
  288. }
  289. ctx, cancel := context.WithCancel(context.Background())
  290. resp, err := clients[0].Get(ctx, checkDatascalePrefix, v3.WithPrefix(), v3.WithLimit(1))
  291. cancel()
  292. if err != nil {
  293. ExitWithError(ExitError, err)
  294. }
  295. if len(resp.Kvs) > 0 {
  296. ExitWithError(ExitInvalidInput, fmt.Errorf("prefix %q has keys. Delete with etcdctl del --prefix %s first.", checkDatascalePrefix, checkDatascalePrefix))
  297. }
  298. ksize, vsize := 512, 512
  299. k, v := make([]byte, ksize), string(make([]byte, vsize))
  300. r := report.NewReport("%4.4f")
  301. var wg sync.WaitGroup
  302. wg.Add(len(clients))
  303. // get the process_resident_memory_bytes and process_virtual_memory_bytes before the put operations
  304. bytesBefore := endpointMemoryMetrics(eps[0])
  305. if bytesBefore == 0 {
  306. fmt.Println("FAIL: Could not read process_resident_memory_bytes before the put operations.")
  307. os.Exit(ExitError)
  308. }
  309. fmt.Println(fmt.Sprintf("Start data scale check for work load [%v key-value pairs, %v bytes per key-value, %v concurrent clients].", cfg.limit, cfg.kvSize, cfg.clients))
  310. for i := range clients {
  311. go func(c *v3.Client) {
  312. defer wg.Done()
  313. for op := range requests {
  314. st := time.Now()
  315. _, derr := c.Do(context.Background(), op)
  316. r.Results() <- report.Result{Err: derr, Start: st, End: time.Now()}
  317. }
  318. }(clients[i])
  319. }
  320. go func() {
  321. for i := 0; i < cfg.limit; i++ {
  322. binary.PutVarint(k, int64(rand.Int63n(math.MaxInt64)))
  323. requests <- v3.OpPut(checkDatascalePrefix+string(k), v)
  324. }
  325. close(requests)
  326. }()
  327. sc := r.Stats()
  328. wg.Wait()
  329. close(r.Results())
  330. s := <-sc
  331. // get the process_resident_memory_bytes after the put operations
  332. bytesAfter := endpointMemoryMetrics(eps[0])
  333. if bytesAfter == 0 {
  334. fmt.Println("FAIL: Could not read process_resident_memory_bytes after the put operations.")
  335. os.Exit(ExitError)
  336. }
  337. // delete the created kv pairs
  338. ctx, cancel = context.WithCancel(context.Background())
  339. _, err = clients[0].Delete(ctx, checkDatascalePrefix, v3.WithPrefix())
  340. defer cancel()
  341. if err != nil {
  342. ExitWithError(ExitError, err)
  343. }
  344. if bytesAfter == 0 {
  345. fmt.Println("FAIL: Could not read process_resident_memory_bytes after the put operations.")
  346. os.Exit(ExitError)
  347. }
  348. bytesUsed := bytesAfter - bytesBefore
  349. mbUsed := bytesUsed / (1024 * 1024)
  350. if len(s.ErrorDist) != 0 {
  351. fmt.Println("FAIL: too many errors")
  352. for k, v := range s.ErrorDist {
  353. fmt.Printf("FAIL: ERROR(%v) -> %d\n", k, v)
  354. }
  355. os.Exit(ExitError)
  356. } else {
  357. fmt.Println(fmt.Sprintf("PASS: Approximate system memory used : %v MB.", strconv.FormatFloat(float64(mbUsed), 'f', 2, 64)))
  358. }
  359. }