mixin.libsonnet 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293
  1. {
  2. _config+:: {
  3. etcd_selector: 'job=~".*etcd.*"',
  4. },
  5. prometheusAlerts+:: {
  6. groups+: [
  7. {
  8. name: 'etcd',
  9. rules: [
  10. {
  11. alert: 'etcdMembersDown',
  12. expr: |||
  13. max by (job) (
  14. sum by (job) (up{%(etcd_selector)s} == bool 0)
  15. or
  16. count by (job,endpoint) (
  17. sum by (job,endpoint,To) (rate(etcd_network_peer_sent_failures_total{%(etcd_selector)s}[3m])) > 0.01
  18. )
  19. )
  20. > 0
  21. ||| % $._config,
  22. 'for': '3m',
  23. labels: {
  24. severity: 'critical',
  25. },
  26. annotations: {
  27. message: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value }}).',
  28. },
  29. },
  30. {
  31. alert: 'etcdInsufficientMembers',
  32. expr: |||
  33. sum(up{%(etcd_selector)s} == bool 1) by (job) < ((count(up{%(etcd_selector)s}) by (job) + 1) / 2)
  34. ||| % $._config,
  35. 'for': '3m',
  36. labels: {
  37. severity: 'critical',
  38. },
  39. annotations: {
  40. message: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).',
  41. },
  42. },
  43. {
  44. alert: 'etcdNoLeader',
  45. expr: |||
  46. etcd_server_has_leader{%(etcd_selector)s} == 0
  47. ||| % $._config,
  48. 'for': '1m',
  49. labels: {
  50. severity: 'critical',
  51. },
  52. annotations: {
  53. message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.',
  54. },
  55. },
  56. {
  57. alert: 'etcdHighNumberOfLeaderChanges',
  58. expr: |||
  59. rate(etcd_server_leader_changes_seen_total{%(etcd_selector)s}[15m]) > 3
  60. ||| % $._config,
  61. 'for': '15m',
  62. labels: {
  63. severity: 'warning',
  64. },
  65. annotations: {
  66. message: 'etcd cluster "{{ $labels.job }}": instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last 30 minutes.',
  67. },
  68. },
  69. {
  70. alert: 'etcdHighNumberOfFailedGRPCRequests',
  71. expr: |||
  72. 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
  73. /
  74. sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) BY (job, instance, grpc_service, grpc_method)
  75. > 1
  76. ||| % $._config,
  77. 'for': '10m',
  78. labels: {
  79. severity: 'warning',
  80. },
  81. annotations: {
  82. message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
  83. },
  84. },
  85. {
  86. alert: 'etcdHighNumberOfFailedGRPCRequests',
  87. expr: |||
  88. 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
  89. /
  90. sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) BY (job, instance, grpc_service, grpc_method)
  91. > 5
  92. ||| % $._config,
  93. 'for': '5m',
  94. labels: {
  95. severity: 'critical',
  96. },
  97. annotations: {
  98. message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
  99. },
  100. },
  101. {
  102. alert: 'etcdGRPCRequestsSlow',
  103. expr: |||
  104. histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{%(etcd_selector)s, grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
  105. > 0.15
  106. ||| % $._config,
  107. 'for': '10m',
  108. labels: {
  109. severity: 'critical',
  110. },
  111. annotations: {
  112. message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
  113. },
  114. },
  115. {
  116. alert: 'etcdMemberCommunicationSlow',
  117. expr: |||
  118. histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{%(etcd_selector)s}[5m]))
  119. > 0.15
  120. ||| % $._config,
  121. 'for': '10m',
  122. labels: {
  123. severity: 'warning',
  124. },
  125. annotations: {
  126. message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
  127. },
  128. },
  129. {
  130. alert: 'etcdHighNumberOfFailedProposals',
  131. expr: |||
  132. rate(etcd_server_proposals_failed_total{%(etcd_selector)s}[15m]) > 5
  133. ||| % $._config,
  134. 'for': '15m',
  135. labels: {
  136. severity: 'warning',
  137. },
  138. annotations: {
  139. message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.',
  140. },
  141. },
  142. {
  143. alert: 'etcdHighFsyncDurations',
  144. expr: |||
  145. histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  146. > 0.5
  147. ||| % $._config,
  148. 'for': '10m',
  149. labels: {
  150. severity: 'warning',
  151. },
  152. annotations: {
  153. message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
  154. },
  155. },
  156. {
  157. alert: 'etcdHighCommitDurations',
  158. expr: |||
  159. histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  160. > 0.25
  161. ||| % $._config,
  162. 'for': '10m',
  163. labels: {
  164. severity: 'warning',
  165. },
  166. annotations: {
  167. message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.',
  168. },
  169. },
  170. {
  171. alert: 'etcdHighNumberOfFailedHTTPRequests',
  172. expr: |||
  173. sum(rate(etcd_http_failed_total{%(etcd_selector)s, code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
  174. BY (method) > 0.01
  175. ||| % $._config,
  176. 'for': '10m',
  177. labels: {
  178. severity: 'warning',
  179. },
  180. annotations: {
  181. message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}',
  182. },
  183. },
  184. {
  185. alert: 'etcdHighNumberOfFailedHTTPRequests',
  186. expr: |||
  187. sum(rate(etcd_http_failed_total{%(etcd_selector)s, code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
  188. BY (method) > 0.05
  189. ||| % $._config,
  190. 'for': '10m',
  191. labels: {
  192. severity: 'critical',
  193. },
  194. annotations: {
  195. message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.',
  196. },
  197. },
  198. {
  199. alert: 'etcdHTTPRequestsSlow',
  200. expr: |||
  201. histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
  202. > 0.15
  203. ||| % $._config,
  204. 'for': '10m',
  205. labels: {
  206. severity: 'warning',
  207. },
  208. annotations: {
  209. message: 'etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.',
  210. },
  211. },
  212. ],
  213. },
  214. ],
  215. },
  216. grafanaDashboards+:: {
  217. 'etcd.json': {
  218. id: 6,
  219. title: 'etcd',
  220. description: 'etcd sample Grafana dashboard with Prometheus',
  221. tags: [],
  222. style: 'dark',
  223. timezone: 'browser',
  224. editable: true,
  225. hideControls: false,
  226. sharedCrosshair: false,
  227. rows: [
  228. {
  229. collapse: false,
  230. editable: true,
  231. height: '250px',
  232. panels: [
  233. {
  234. cacheTimeout: null,
  235. colorBackground: false,
  236. colorValue: false,
  237. colors: [
  238. 'rgba(245, 54, 54, 0.9)',
  239. 'rgba(237, 129, 40, 0.89)',
  240. 'rgba(50, 172, 45, 0.97)',
  241. ],
  242. datasource: '$datasource',
  243. editable: true,
  244. 'error': false,
  245. format: 'none',
  246. gauge: {
  247. maxValue: 100,
  248. minValue: 0,
  249. show: false,
  250. thresholdLabels: false,
  251. thresholdMarkers: true,
  252. },
  253. id: 28,
  254. interval: null,
  255. isNew: true,
  256. links: [],
  257. mappingType: 1,
  258. mappingTypes: [
  259. {
  260. name: 'value to text',
  261. value: 1,
  262. },
  263. {
  264. name: 'range to text',
  265. value: 2,
  266. },
  267. ],
  268. maxDataPoints: 100,
  269. nullPointMode: 'connected',
  270. nullText: null,
  271. postfix: '',
  272. postfixFontSize: '50%',
  273. prefix: '',
  274. prefixFontSize: '50%',
  275. rangeMaps: [{
  276. from: 'null',
  277. text: 'N/A',
  278. to: 'null',
  279. }],
  280. span: 3,
  281. sparkline: {
  282. fillColor: 'rgba(31, 118, 189, 0.18)',
  283. full: false,
  284. lineColor: 'rgb(31, 120, 193)',
  285. show: false,
  286. },
  287. targets: [{
  288. expr: 'sum(etcd_server_has_leader{job="$cluster"})',
  289. intervalFactor: 2,
  290. legendFormat: '',
  291. metric: 'etcd_server_has_leader',
  292. refId: 'A',
  293. step: 20,
  294. }],
  295. thresholds: '',
  296. title: 'Up',
  297. type: 'singlestat',
  298. valueFontSize: '200%',
  299. valueMaps: [{
  300. op: '=',
  301. text: 'N/A',
  302. value: 'null',
  303. }],
  304. valueName: 'avg',
  305. },
  306. {
  307. aliasColors: {},
  308. bars: false,
  309. datasource: '$datasource',
  310. editable: true,
  311. 'error': false,
  312. fill: 0,
  313. id: 23,
  314. isNew: true,
  315. legend: {
  316. avg: false,
  317. current: false,
  318. max: false,
  319. min: false,
  320. show: false,
  321. total: false,
  322. values: false,
  323. },
  324. lines: true,
  325. linewidth: 2,
  326. links: [],
  327. nullPointMode: 'connected',
  328. percentage: false,
  329. pointradius: 5,
  330. points: false,
  331. renderer: 'flot',
  332. seriesOverrides: [],
  333. span: 5,
  334. stack: false,
  335. steppedLine: false,
  336. targets: [
  337. {
  338. expr: 'sum(rate(grpc_server_started_total{job="$cluster",grpc_type="unary"}[5m]))',
  339. format: 'time_series',
  340. intervalFactor: 2,
  341. legendFormat: 'RPC Rate',
  342. metric: 'grpc_server_started_total',
  343. refId: 'A',
  344. step: 2,
  345. },
  346. {
  347. expr: 'sum(rate(grpc_server_handled_total{job="$cluster",grpc_type="unary",grpc_code!="OK"}[5m]))',
  348. format: 'time_series',
  349. intervalFactor: 2,
  350. legendFormat: 'RPC Failed Rate',
  351. metric: 'grpc_server_handled_total',
  352. refId: 'B',
  353. step: 2,
  354. },
  355. ],
  356. thresholds: [],
  357. timeFrom: null,
  358. timeShift: null,
  359. title: 'RPC Rate',
  360. tooltip: {
  361. msResolution: false,
  362. shared: true,
  363. sort: 0,
  364. value_type: 'individual',
  365. },
  366. type: 'graph',
  367. xaxis: {
  368. mode: 'time',
  369. name: null,
  370. show: true,
  371. values: [],
  372. },
  373. yaxes: [
  374. {
  375. format: 'ops',
  376. label: null,
  377. logBase: 1,
  378. max: null,
  379. min: null,
  380. show: true,
  381. },
  382. {
  383. format: 'short',
  384. label: null,
  385. logBase: 1,
  386. max: null,
  387. min: null,
  388. show: true,
  389. },
  390. ],
  391. },
  392. {
  393. aliasColors: {},
  394. bars: false,
  395. datasource: '$datasource',
  396. editable: true,
  397. 'error': false,
  398. fill: 0,
  399. id: 41,
  400. isNew: true,
  401. legend: {
  402. avg: false,
  403. current: false,
  404. max: false,
  405. min: false,
  406. show: false,
  407. total: false,
  408. values: false,
  409. },
  410. lines: true,
  411. linewidth: 2,
  412. links: [],
  413. nullPointMode: 'connected',
  414. percentage: false,
  415. pointradius: 5,
  416. points: false,
  417. renderer: 'flot',
  418. seriesOverrides: [],
  419. span: 4,
  420. stack: true,
  421. steppedLine: false,
  422. targets: [
  423. {
  424. expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})',
  425. intervalFactor: 2,
  426. legendFormat: 'Watch Streams',
  427. metric: 'grpc_server_handled_total',
  428. refId: 'A',
  429. step: 4,
  430. },
  431. {
  432. expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})',
  433. intervalFactor: 2,
  434. legendFormat: 'Lease Streams',
  435. metric: 'grpc_server_handled_total',
  436. refId: 'B',
  437. step: 4,
  438. },
  439. ],
  440. thresholds: [],
  441. timeFrom: null,
  442. timeShift: null,
  443. title: 'Active Streams',
  444. tooltip: {
  445. msResolution: false,
  446. shared: true,
  447. sort: 0,
  448. value_type: 'individual',
  449. },
  450. type: 'graph',
  451. xaxis: {
  452. mode: 'time',
  453. name: null,
  454. show: true,
  455. values: [],
  456. },
  457. yaxes: [
  458. {
  459. format: 'short',
  460. label: '',
  461. logBase: 1,
  462. max: null,
  463. min: null,
  464. show: true,
  465. },
  466. {
  467. format: 'short',
  468. label: null,
  469. logBase: 1,
  470. max: null,
  471. min: null,
  472. show: true,
  473. },
  474. ],
  475. },
  476. ],
  477. showTitle: false,
  478. title: 'Row',
  479. },
  480. {
  481. collapse: false,
  482. editable: true,
  483. height: '250px',
  484. panels: [
  485. {
  486. aliasColors: {},
  487. bars: false,
  488. datasource: '$datasource',
  489. decimals: null,
  490. editable: true,
  491. 'error': false,
  492. fill: 0,
  493. grid: {},
  494. id: 1,
  495. legend: {
  496. avg: false,
  497. current: false,
  498. max: false,
  499. min: false,
  500. show: false,
  501. total: false,
  502. values: false,
  503. },
  504. lines: true,
  505. linewidth: 2,
  506. links: [],
  507. nullPointMode: 'connected',
  508. percentage: false,
  509. pointradius: 5,
  510. points: false,
  511. renderer: 'flot',
  512. seriesOverrides: [],
  513. span: 4,
  514. stack: false,
  515. steppedLine: false,
  516. targets: [{
  517. expr: 'etcd_debugging_mvcc_db_total_size_in_bytes{job="$cluster"}',
  518. hide: false,
  519. interval: '',
  520. intervalFactor: 2,
  521. legendFormat: '{{instance}} DB Size',
  522. metric: '',
  523. refId: 'A',
  524. step: 4,
  525. }],
  526. thresholds: [],
  527. timeFrom: null,
  528. timeShift: null,
  529. title: 'DB Size',
  530. tooltip: {
  531. msResolution: false,
  532. shared: true,
  533. sort: 0,
  534. value_type: 'cumulative',
  535. },
  536. type: 'graph',
  537. xaxis: {
  538. mode: 'time',
  539. name: null,
  540. show: true,
  541. values: [],
  542. },
  543. yaxes: [
  544. {
  545. format: 'bytes',
  546. logBase: 1,
  547. max: null,
  548. min: null,
  549. show: true,
  550. },
  551. {
  552. format: 'short',
  553. logBase: 1,
  554. max: null,
  555. min: null,
  556. show: false,
  557. },
  558. ],
  559. },
  560. {
  561. aliasColors: {},
  562. bars: false,
  563. datasource: '$datasource',
  564. editable: true,
  565. 'error': false,
  566. fill: 0,
  567. grid: {},
  568. id: 3,
  569. legend: {
  570. avg: false,
  571. current: false,
  572. max: false,
  573. min: false,
  574. show: false,
  575. total: false,
  576. values: false,
  577. },
  578. lines: true,
  579. linewidth: 2,
  580. links: [],
  581. nullPointMode: 'connected',
  582. percentage: false,
  583. pointradius: 1,
  584. points: false,
  585. renderer: 'flot',
  586. seriesOverrides: [],
  587. span: 4,
  588. stack: false,
  589. steppedLine: true,
  590. targets: [
  591. {
  592. expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
  593. hide: false,
  594. intervalFactor: 2,
  595. legendFormat: '{{instance}} WAL fsync',
  596. metric: 'etcd_disk_wal_fsync_duration_seconds_bucket',
  597. refId: 'A',
  598. step: 4,
  599. },
  600. {
  601. expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
  602. intervalFactor: 2,
  603. legendFormat: '{{instance}} DB fsync',
  604. metric: 'etcd_disk_backend_commit_duration_seconds_bucket',
  605. refId: 'B',
  606. step: 4,
  607. },
  608. ],
  609. thresholds: [],
  610. timeFrom: null,
  611. timeShift: null,
  612. title: 'Disk Sync Duration',
  613. tooltip: {
  614. msResolution: false,
  615. shared: true,
  616. sort: 0,
  617. value_type: 'cumulative',
  618. },
  619. type: 'graph',
  620. xaxis: {
  621. mode: 'time',
  622. name: null,
  623. show: true,
  624. values: [],
  625. },
  626. yaxes: [
  627. {
  628. format: 's',
  629. logBase: 1,
  630. max: null,
  631. min: null,
  632. show: true,
  633. },
  634. {
  635. format: 'short',
  636. logBase: 1,
  637. max: null,
  638. min: null,
  639. show: false,
  640. },
  641. ],
  642. },
  643. {
  644. aliasColors: {},
  645. bars: false,
  646. datasource: '$datasource',
  647. editable: true,
  648. 'error': false,
  649. fill: 0,
  650. id: 29,
  651. isNew: true,
  652. legend: {
  653. avg: false,
  654. current: false,
  655. max: false,
  656. min: false,
  657. show: false,
  658. total: false,
  659. values: false,
  660. },
  661. lines: true,
  662. linewidth: 2,
  663. links: [],
  664. nullPointMode: 'connected',
  665. percentage: false,
  666. pointradius: 5,
  667. points: false,
  668. renderer: 'flot',
  669. seriesOverrides: [],
  670. span: 4,
  671. stack: false,
  672. steppedLine: false,
  673. targets: [{
  674. expr: 'process_resident_memory_bytes{job="$cluster"}',
  675. intervalFactor: 2,
  676. legendFormat: '{{instance}} Resident Memory',
  677. metric: 'process_resident_memory_bytes',
  678. refId: 'A',
  679. step: 4,
  680. }],
  681. thresholds: [],
  682. timeFrom: null,
  683. timeShift: null,
  684. title: 'Memory',
  685. tooltip: {
  686. msResolution: false,
  687. shared: true,
  688. sort: 0,
  689. value_type: 'individual',
  690. },
  691. type: 'graph',
  692. xaxis: {
  693. mode: 'time',
  694. name: null,
  695. show: true,
  696. values: [],
  697. },
  698. yaxes: [
  699. {
  700. format: 'bytes',
  701. label: null,
  702. logBase: 1,
  703. max: null,
  704. min: null,
  705. show: true,
  706. },
  707. {
  708. format: 'short',
  709. label: null,
  710. logBase: 1,
  711. max: null,
  712. min: null,
  713. show: true,
  714. },
  715. ],
  716. },
  717. ],
  718. title: 'New row',
  719. },
  720. {
  721. collapse: false,
  722. editable: true,
  723. height: '250px',
  724. panels: [
  725. {
  726. aliasColors: {},
  727. bars: false,
  728. datasource: '$datasource',
  729. editable: true,
  730. 'error': false,
  731. fill: 5,
  732. id: 22,
  733. isNew: true,
  734. legend: {
  735. avg: false,
  736. current: false,
  737. max: false,
  738. min: false,
  739. show: false,
  740. total: false,
  741. values: false,
  742. },
  743. lines: true,
  744. linewidth: 2,
  745. links: [],
  746. nullPointMode: 'connected',
  747. percentage: false,
  748. pointradius: 5,
  749. points: false,
  750. renderer: 'flot',
  751. seriesOverrides: [],
  752. span: 3,
  753. stack: true,
  754. steppedLine: false,
  755. targets: [{
  756. expr: 'rate(etcd_network_client_grpc_received_bytes_total{job="$cluster"}[5m])',
  757. intervalFactor: 2,
  758. legendFormat: '{{instance}} Client Traffic In',
  759. metric: 'etcd_network_client_grpc_received_bytes_total',
  760. refId: 'A',
  761. step: 4,
  762. }],
  763. thresholds: [],
  764. timeFrom: null,
  765. timeShift: null,
  766. title: 'Client Traffic In',
  767. tooltip: {
  768. msResolution: false,
  769. shared: true,
  770. sort: 0,
  771. value_type: 'individual',
  772. },
  773. type: 'graph',
  774. xaxis: {
  775. mode: 'time',
  776. name: null,
  777. show: true,
  778. values: [],
  779. },
  780. yaxes: [
  781. {
  782. format: 'Bps',
  783. label: null,
  784. logBase: 1,
  785. max: null,
  786. min: null,
  787. show: true,
  788. },
  789. {
  790. format: 'short',
  791. label: null,
  792. logBase: 1,
  793. max: null,
  794. min: null,
  795. show: true,
  796. },
  797. ],
  798. },
  799. {
  800. aliasColors: {},
  801. bars: false,
  802. datasource: '$datasource',
  803. editable: true,
  804. 'error': false,
  805. fill: 5,
  806. id: 21,
  807. isNew: true,
  808. legend: {
  809. avg: false,
  810. current: false,
  811. max: false,
  812. min: false,
  813. show: false,
  814. total: false,
  815. values: false,
  816. },
  817. lines: true,
  818. linewidth: 2,
  819. links: [],
  820. nullPointMode: 'connected',
  821. percentage: false,
  822. pointradius: 5,
  823. points: false,
  824. renderer: 'flot',
  825. seriesOverrides: [],
  826. span: 3,
  827. stack: true,
  828. steppedLine: false,
  829. targets: [{
  830. expr: 'rate(etcd_network_client_grpc_sent_bytes_total{job="$cluster"}[5m])',
  831. intervalFactor: 2,
  832. legendFormat: '{{instance}} Client Traffic Out',
  833. metric: 'etcd_network_client_grpc_sent_bytes_total',
  834. refId: 'A',
  835. step: 4,
  836. }],
  837. thresholds: [],
  838. timeFrom: null,
  839. timeShift: null,
  840. title: 'Client Traffic Out',
  841. tooltip: {
  842. msResolution: false,
  843. shared: true,
  844. sort: 0,
  845. value_type: 'individual',
  846. },
  847. type: 'graph',
  848. xaxis: {
  849. mode: 'time',
  850. name: null,
  851. show: true,
  852. values: [],
  853. },
  854. yaxes: [
  855. {
  856. format: 'Bps',
  857. label: null,
  858. logBase: 1,
  859. max: null,
  860. min: null,
  861. show: true,
  862. },
  863. {
  864. format: 'short',
  865. label: null,
  866. logBase: 1,
  867. max: null,
  868. min: null,
  869. show: true,
  870. },
  871. ],
  872. },
  873. {
  874. aliasColors: {},
  875. bars: false,
  876. datasource: '$datasource',
  877. editable: true,
  878. 'error': false,
  879. fill: 0,
  880. id: 20,
  881. isNew: true,
  882. legend: {
  883. avg: false,
  884. current: false,
  885. max: false,
  886. min: false,
  887. show: false,
  888. total: false,
  889. values: false,
  890. },
  891. lines: true,
  892. linewidth: 2,
  893. links: [],
  894. nullPointMode: 'connected',
  895. percentage: false,
  896. pointradius: 5,
  897. points: false,
  898. renderer: 'flot',
  899. seriesOverrides: [],
  900. span: 3,
  901. stack: false,
  902. steppedLine: false,
  903. targets: [{
  904. expr: 'sum(rate(etcd_network_peer_received_bytes_total{job="$cluster"}[5m])) by (instance)',
  905. intervalFactor: 2,
  906. legendFormat: '{{instance}} Peer Traffic In',
  907. metric: 'etcd_network_peer_received_bytes_total',
  908. refId: 'A',
  909. step: 4,
  910. }],
  911. thresholds: [],
  912. timeFrom: null,
  913. timeShift: null,
  914. title: 'Peer Traffic In',
  915. tooltip: {
  916. msResolution: false,
  917. shared: true,
  918. sort: 0,
  919. value_type: 'individual',
  920. },
  921. type: 'graph',
  922. xaxis: {
  923. mode: 'time',
  924. name: null,
  925. show: true,
  926. values: [],
  927. },
  928. yaxes: [
  929. {
  930. format: 'Bps',
  931. label: null,
  932. logBase: 1,
  933. max: null,
  934. min: null,
  935. show: true,
  936. },
  937. {
  938. format: 'short',
  939. label: null,
  940. logBase: 1,
  941. max: null,
  942. min: null,
  943. show: true,
  944. },
  945. ],
  946. },
  947. {
  948. aliasColors: {},
  949. bars: false,
  950. datasource: '$datasource',
  951. decimals: null,
  952. editable: true,
  953. 'error': false,
  954. fill: 0,
  955. grid: {},
  956. id: 16,
  957. legend: {
  958. avg: false,
  959. current: false,
  960. max: false,
  961. min: false,
  962. show: false,
  963. total: false,
  964. values: false,
  965. },
  966. lines: true,
  967. linewidth: 2,
  968. links: [],
  969. nullPointMode: 'connected',
  970. percentage: false,
  971. pointradius: 5,
  972. points: false,
  973. renderer: 'flot',
  974. seriesOverrides: [],
  975. span: 3,
  976. stack: false,
  977. steppedLine: false,
  978. targets: [{
  979. expr: 'sum(rate(etcd_network_peer_sent_bytes_total{job="$cluster"}[5m])) by (instance)',
  980. hide: false,
  981. interval: '',
  982. intervalFactor: 2,
  983. legendFormat: '{{instance}} Peer Traffic Out',
  984. metric: 'etcd_network_peer_sent_bytes_total',
  985. refId: 'A',
  986. step: 4,
  987. }],
  988. thresholds: [],
  989. timeFrom: null,
  990. timeShift: null,
  991. title: 'Peer Traffic Out',
  992. tooltip: {
  993. msResolution: false,
  994. shared: true,
  995. sort: 0,
  996. value_type: 'cumulative',
  997. },
  998. type: 'graph',
  999. xaxis: {
  1000. mode: 'time',
  1001. name: null,
  1002. show: true,
  1003. values: [],
  1004. },
  1005. yaxes: [
  1006. {
  1007. format: 'Bps',
  1008. logBase: 1,
  1009. max: null,
  1010. min: null,
  1011. show: true,
  1012. },
  1013. {
  1014. format: 'short',
  1015. logBase: 1,
  1016. max: null,
  1017. min: null,
  1018. show: true,
  1019. },
  1020. ],
  1021. },
  1022. ],
  1023. title: 'New row',
  1024. },
  1025. {
  1026. collapse: false,
  1027. editable: true,
  1028. height: '250px',
  1029. panels: [
  1030. {
  1031. aliasColors: {},
  1032. bars: false,
  1033. datasource: '$datasource',
  1034. editable: true,
  1035. 'error': false,
  1036. fill: 0,
  1037. id: 40,
  1038. isNew: true,
  1039. legend: {
  1040. avg: false,
  1041. current: false,
  1042. max: false,
  1043. min: false,
  1044. show: false,
  1045. total: false,
  1046. values: false,
  1047. },
  1048. lines: true,
  1049. linewidth: 2,
  1050. links: [],
  1051. nullPointMode: 'connected',
  1052. percentage: false,
  1053. pointradius: 5,
  1054. points: false,
  1055. renderer: 'flot',
  1056. seriesOverrides: [],
  1057. span: 6,
  1058. stack: false,
  1059. steppedLine: false,
  1060. targets: [
  1061. {
  1062. expr: 'sum(rate(etcd_server_proposals_failed_total{job="$cluster"}[5m]))',
  1063. intervalFactor: 2,
  1064. legendFormat: 'Proposal Failure Rate',
  1065. metric: 'etcd_server_proposals_failed_total',
  1066. refId: 'A',
  1067. step: 2,
  1068. },
  1069. {
  1070. expr: 'sum(etcd_server_proposals_pending{job="$cluster"})',
  1071. intervalFactor: 2,
  1072. legendFormat: 'Proposal Pending Total',
  1073. metric: 'etcd_server_proposals_pending',
  1074. refId: 'B',
  1075. step: 2,
  1076. },
  1077. {
  1078. expr: 'sum(rate(etcd_server_proposals_committed_total{job="$cluster"}[5m]))',
  1079. intervalFactor: 2,
  1080. legendFormat: 'Proposal Commit Rate',
  1081. metric: 'etcd_server_proposals_committed_total',
  1082. refId: 'C',
  1083. step: 2,
  1084. },
  1085. {
  1086. expr: 'sum(rate(etcd_server_proposals_applied_total{job="$cluster"}[5m]))',
  1087. intervalFactor: 2,
  1088. legendFormat: 'Proposal Apply Rate',
  1089. refId: 'D',
  1090. step: 2,
  1091. },
  1092. ],
  1093. thresholds: [],
  1094. timeFrom: null,
  1095. timeShift: null,
  1096. title: 'Raft Proposals',
  1097. tooltip: {
  1098. msResolution: false,
  1099. shared: true,
  1100. sort: 0,
  1101. value_type: 'individual',
  1102. },
  1103. type: 'graph',
  1104. xaxis: {
  1105. mode: 'time',
  1106. name: null,
  1107. show: true,
  1108. values: [],
  1109. },
  1110. yaxes: [
  1111. {
  1112. format: 'short',
  1113. label: '',
  1114. logBase: 1,
  1115. max: null,
  1116. min: null,
  1117. show: true,
  1118. },
  1119. {
  1120. format: 'short',
  1121. label: null,
  1122. logBase: 1,
  1123. max: null,
  1124. min: null,
  1125. show: true,
  1126. },
  1127. ],
  1128. },
  1129. {
  1130. aliasColors: {},
  1131. bars: false,
  1132. datasource: '$datasource',
  1133. decimals: 0,
  1134. editable: true,
  1135. 'error': false,
  1136. fill: 0,
  1137. id: 19,
  1138. isNew: true,
  1139. legend: {
  1140. alignAsTable: false,
  1141. avg: false,
  1142. current: false,
  1143. max: false,
  1144. min: false,
  1145. rightSide: false,
  1146. show: false,
  1147. total: false,
  1148. values: false,
  1149. },
  1150. lines: true,
  1151. linewidth: 2,
  1152. links: [],
  1153. nullPointMode: 'connected',
  1154. percentage: false,
  1155. pointradius: 5,
  1156. points: false,
  1157. renderer: 'flot',
  1158. seriesOverrides: [],
  1159. span: 6,
  1160. stack: false,
  1161. steppedLine: false,
  1162. targets: [{
  1163. expr: 'changes(etcd_server_leader_changes_seen_total{job="$cluster"}[1d])',
  1164. intervalFactor: 2,
  1165. legendFormat: '{{instance}} Total Leader Elections Per Day',
  1166. metric: 'etcd_server_leader_changes_seen_total',
  1167. refId: 'A',
  1168. step: 2,
  1169. }],
  1170. thresholds: [],
  1171. timeFrom: null,
  1172. timeShift: null,
  1173. title: 'Total Leader Elections Per Day',
  1174. tooltip: {
  1175. msResolution: false,
  1176. shared: true,
  1177. sort: 0,
  1178. value_type: 'individual',
  1179. },
  1180. type: 'graph',
  1181. xaxis: {
  1182. mode: 'time',
  1183. name: null,
  1184. show: true,
  1185. values: [],
  1186. },
  1187. yaxes: [
  1188. {
  1189. format: 'short',
  1190. label: null,
  1191. logBase: 1,
  1192. max: null,
  1193. min: null,
  1194. show: true,
  1195. },
  1196. {
  1197. format: 'short',
  1198. label: null,
  1199. logBase: 1,
  1200. max: null,
  1201. min: null,
  1202. show: true,
  1203. },
  1204. ],
  1205. },
  1206. ],
  1207. title: 'New row',
  1208. },
  1209. ],
  1210. time: {
  1211. from: 'now-15m',
  1212. to: 'now',
  1213. },
  1214. timepicker: {
  1215. now: true,
  1216. refresh_intervals: [
  1217. '5s',
  1218. '10s',
  1219. '30s',
  1220. '1m',
  1221. '5m',
  1222. '15m',
  1223. '30m',
  1224. '1h',
  1225. '2h',
  1226. '1d',
  1227. ],
  1228. time_options: [
  1229. '5m',
  1230. '15m',
  1231. '1h',
  1232. '6h',
  1233. '12h',
  1234. '24h',
  1235. '2d',
  1236. '7d',
  1237. '30d',
  1238. ],
  1239. },
  1240. templating: {
  1241. list: [
  1242. {
  1243. current: {
  1244. text: 'Prometheus',
  1245. value: 'Prometheus',
  1246. },
  1247. hide: 0,
  1248. label: null,
  1249. name: 'datasource',
  1250. options: [],
  1251. query: 'prometheus',
  1252. refresh: 1,
  1253. regex: '',
  1254. type: 'datasource',
  1255. },
  1256. {
  1257. allValue: null,
  1258. current: {
  1259. text: 'prod',
  1260. value: 'prod',
  1261. },
  1262. datasource: '$datasource',
  1263. hide: 0,
  1264. includeAll: false,
  1265. label: 'cluster',
  1266. multi: false,
  1267. name: 'cluster',
  1268. options: [],
  1269. query: 'label_values(etcd_server_has_leader, job)',
  1270. refresh: 1,
  1271. regex: '',
  1272. sort: 2,
  1273. tagValuesQuery: '',
  1274. tags: [],
  1275. tagsQuery: '',
  1276. type: 'query',
  1277. useTags: false,
  1278. },
  1279. ],
  1280. },
  1281. annotations: {
  1282. list: [],
  1283. },
  1284. refresh: '10s',
  1285. schemaVersion: 13,
  1286. version: 215,
  1287. links: [],
  1288. gnetId: null,
  1289. },
  1290. },
  1291. }