mixin.libsonnet 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273
  1. {
  2. _config+:: {
  3. etcd_selector: 'job=~".*etcd.*"',
  4. },
  5. prometheusAlerts+:: {
  6. groups+: [
  7. {
  8. name: 'etcd',
  9. rules: [
  10. {
  11. alert: 'etcdInsufficientMembers',
  12. expr: |||
  13. sum(up{%(etcd_selector)s} == bool 1) by (job) < ((count(up{%(etcd_selector)s}) by (job) + 1) / 2)
  14. ||| % $._config,
  15. 'for': '3m',
  16. labels: {
  17. severity: 'critical',
  18. },
  19. annotations: {
  20. message: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).',
  21. },
  22. },
  23. {
  24. alert: 'etcdNoLeader',
  25. expr: |||
  26. etcd_server_has_leader{%(etcd_selector)s} == 0
  27. ||| % $._config,
  28. 'for': '1m',
  29. labels: {
  30. severity: 'critical',
  31. },
  32. annotations: {
  33. message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.',
  34. },
  35. },
  36. {
  37. alert: 'etcdHighNumberOfLeaderChanges',
  38. expr: |||
  39. rate(etcd_server_leader_changes_seen_total{%(etcd_selector)s}[15m]) > 3
  40. ||| % $._config,
  41. 'for': '15m',
  42. labels: {
  43. severity: 'warning',
  44. },
  45. annotations: {
  46. message: 'etcd cluster "{{ $labels.job }}": instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last 30 minutes.',
  47. },
  48. },
  49. {
  50. alert: 'etcdHighNumberOfFailedGRPCRequests',
  51. expr: |||
  52. 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
  53. /
  54. sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) BY (job, instance, grpc_service, grpc_method)
  55. > 1
  56. ||| % $._config,
  57. 'for': '10m',
  58. labels: {
  59. severity: 'warning',
  60. },
  61. annotations: {
  62. message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
  63. },
  64. },
  65. {
  66. alert: 'etcdHighNumberOfFailedGRPCRequests',
  67. expr: |||
  68. 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
  69. /
  70. sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) BY (job, instance, grpc_service, grpc_method)
  71. > 5
  72. ||| % $._config,
  73. 'for': '5m',
  74. labels: {
  75. severity: 'critical',
  76. },
  77. annotations: {
  78. message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
  79. },
  80. },
  81. {
  82. alert: 'etcdGRPCRequestsSlow',
  83. expr: |||
  84. histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{%(etcd_selector)s, grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
  85. > 0.15
  86. ||| % $._config,
  87. 'for': '10m',
  88. labels: {
  89. severity: 'critical',
  90. },
  91. annotations: {
  92. message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
  93. },
  94. },
  95. {
  96. alert: 'etcdMemberCommunicationSlow',
  97. expr: |||
  98. histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{%(etcd_selector)s}[5m]))
  99. > 0.15
  100. ||| % $._config,
  101. 'for': '10m',
  102. labels: {
  103. severity: 'warning',
  104. },
  105. annotations: {
  106. message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
  107. },
  108. },
  109. {
  110. alert: 'etcdHighNumberOfFailedProposals',
  111. expr: |||
  112. rate(etcd_server_proposals_failed_total{%(etcd_selector)s}[15m]) > 5
  113. ||| % $._config,
  114. 'for': '15m',
  115. labels: {
  116. severity: 'warning',
  117. },
  118. annotations: {
  119. message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.',
  120. },
  121. },
  122. {
  123. alert: 'etcdHighFsyncDurations',
  124. expr: |||
  125. histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  126. > 0.5
  127. ||| % $._config,
  128. 'for': '10m',
  129. labels: {
  130. severity: 'warning',
  131. },
  132. annotations: {
  133. message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
  134. },
  135. },
  136. {
  137. alert: 'etcdHighCommitDurations',
  138. expr: |||
  139. histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  140. > 0.25
  141. ||| % $._config,
  142. 'for': '10m',
  143. labels: {
  144. severity: 'warning',
  145. },
  146. annotations: {
  147. message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.',
  148. },
  149. },
  150. {
  151. alert: 'etcdHighNumberOfFailedHTTPRequests',
  152. expr: |||
  153. sum(rate(etcd_http_failed_total{%(etcd_selector)s, code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
  154. BY (method) > 0.01
  155. ||| % $._config,
  156. 'for': '10m',
  157. labels: {
  158. severity: 'warning',
  159. },
  160. annotations: {
  161. message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}',
  162. },
  163. },
  164. {
  165. alert: 'etcdHighNumberOfFailedHTTPRequests',
  166. expr: |||
  167. sum(rate(etcd_http_failed_total{%(etcd_selector)s, code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
  168. BY (method) > 0.05
  169. ||| % $._config,
  170. 'for': '10m',
  171. labels: {
  172. severity: 'critical',
  173. },
  174. annotations: {
  175. message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.',
  176. },
  177. },
  178. {
  179. alert: 'etcdHTTPRequestsSlow',
  180. expr: |||
  181. histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
  182. > 0.15
  183. ||| % $._config,
  184. 'for': '10m',
  185. labels: {
  186. severity: 'warning',
  187. },
  188. annotations: {
  189. message: 'etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.',
  190. },
  191. },
  192. ],
  193. },
  194. ],
  195. },
  196. grafanaDashboards+:: {
  197. 'etcd.json': {
  198. id: 6,
  199. title: 'etcd',
  200. description: 'etcd sample Grafana dashboard with Prometheus',
  201. tags: [],
  202. style: 'dark',
  203. timezone: 'browser',
  204. editable: true,
  205. hideControls: false,
  206. sharedCrosshair: false,
  207. rows: [
  208. {
  209. collapse: false,
  210. editable: true,
  211. height: '250px',
  212. panels: [
  213. {
  214. cacheTimeout: null,
  215. colorBackground: false,
  216. colorValue: false,
  217. colors: [
  218. 'rgba(245, 54, 54, 0.9)',
  219. 'rgba(237, 129, 40, 0.89)',
  220. 'rgba(50, 172, 45, 0.97)',
  221. ],
  222. datasource: '$datasource',
  223. editable: true,
  224. 'error': false,
  225. format: 'none',
  226. gauge: {
  227. maxValue: 100,
  228. minValue: 0,
  229. show: false,
  230. thresholdLabels: false,
  231. thresholdMarkers: true,
  232. },
  233. id: 28,
  234. interval: null,
  235. isNew: true,
  236. links: [],
  237. mappingType: 1,
  238. mappingTypes: [
  239. {
  240. name: 'value to text',
  241. value: 1,
  242. },
  243. {
  244. name: 'range to text',
  245. value: 2,
  246. },
  247. ],
  248. maxDataPoints: 100,
  249. nullPointMode: 'connected',
  250. nullText: null,
  251. postfix: '',
  252. postfixFontSize: '50%',
  253. prefix: '',
  254. prefixFontSize: '50%',
  255. rangeMaps: [{
  256. from: 'null',
  257. text: 'N/A',
  258. to: 'null',
  259. }],
  260. span: 3,
  261. sparkline: {
  262. fillColor: 'rgba(31, 118, 189, 0.18)',
  263. full: false,
  264. lineColor: 'rgb(31, 120, 193)',
  265. show: false,
  266. },
  267. targets: [{
  268. expr: 'sum(etcd_server_has_leader{job="$cluster"})',
  269. intervalFactor: 2,
  270. legendFormat: '',
  271. metric: 'etcd_server_has_leader',
  272. refId: 'A',
  273. step: 20,
  274. }],
  275. thresholds: '',
  276. title: 'Up',
  277. type: 'singlestat',
  278. valueFontSize: '200%',
  279. valueMaps: [{
  280. op: '=',
  281. text: 'N/A',
  282. value: 'null',
  283. }],
  284. valueName: 'avg',
  285. },
  286. {
  287. aliasColors: {},
  288. bars: false,
  289. datasource: '$datasource',
  290. editable: true,
  291. 'error': false,
  292. fill: 0,
  293. id: 23,
  294. isNew: true,
  295. legend: {
  296. avg: false,
  297. current: false,
  298. max: false,
  299. min: false,
  300. show: false,
  301. total: false,
  302. values: false,
  303. },
  304. lines: true,
  305. linewidth: 2,
  306. links: [],
  307. nullPointMode: 'connected',
  308. percentage: false,
  309. pointradius: 5,
  310. points: false,
  311. renderer: 'flot',
  312. seriesOverrides: [],
  313. span: 5,
  314. stack: false,
  315. steppedLine: false,
  316. targets: [
  317. {
  318. expr: 'sum(rate(grpc_server_started_total{job="$cluster",grpc_type="unary"}[5m]))',
  319. format: 'time_series',
  320. intervalFactor: 2,
  321. legendFormat: 'RPC Rate',
  322. metric: 'grpc_server_started_total',
  323. refId: 'A',
  324. step: 2,
  325. },
  326. {
  327. expr: 'sum(rate(grpc_server_handled_total{job="$cluster",grpc_type="unary",grpc_code!="OK"}[5m]))',
  328. format: 'time_series',
  329. intervalFactor: 2,
  330. legendFormat: 'RPC Failed Rate',
  331. metric: 'grpc_server_handled_total',
  332. refId: 'B',
  333. step: 2,
  334. },
  335. ],
  336. thresholds: [],
  337. timeFrom: null,
  338. timeShift: null,
  339. title: 'RPC Rate',
  340. tooltip: {
  341. msResolution: false,
  342. shared: true,
  343. sort: 0,
  344. value_type: 'individual',
  345. },
  346. type: 'graph',
  347. xaxis: {
  348. mode: 'time',
  349. name: null,
  350. show: true,
  351. values: [],
  352. },
  353. yaxes: [
  354. {
  355. format: 'ops',
  356. label: null,
  357. logBase: 1,
  358. max: null,
  359. min: null,
  360. show: true,
  361. },
  362. {
  363. format: 'short',
  364. label: null,
  365. logBase: 1,
  366. max: null,
  367. min: null,
  368. show: true,
  369. },
  370. ],
  371. },
  372. {
  373. aliasColors: {},
  374. bars: false,
  375. datasource: '$datasource',
  376. editable: true,
  377. 'error': false,
  378. fill: 0,
  379. id: 41,
  380. isNew: true,
  381. legend: {
  382. avg: false,
  383. current: false,
  384. max: false,
  385. min: false,
  386. show: false,
  387. total: false,
  388. values: false,
  389. },
  390. lines: true,
  391. linewidth: 2,
  392. links: [],
  393. nullPointMode: 'connected',
  394. percentage: false,
  395. pointradius: 5,
  396. points: false,
  397. renderer: 'flot',
  398. seriesOverrides: [],
  399. span: 4,
  400. stack: true,
  401. steppedLine: false,
  402. targets: [
  403. {
  404. expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})',
  405. intervalFactor: 2,
  406. legendFormat: 'Watch Streams',
  407. metric: 'grpc_server_handled_total',
  408. refId: 'A',
  409. step: 4,
  410. },
  411. {
  412. expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})',
  413. intervalFactor: 2,
  414. legendFormat: 'Lease Streams',
  415. metric: 'grpc_server_handled_total',
  416. refId: 'B',
  417. step: 4,
  418. },
  419. ],
  420. thresholds: [],
  421. timeFrom: null,
  422. timeShift: null,
  423. title: 'Active Streams',
  424. tooltip: {
  425. msResolution: false,
  426. shared: true,
  427. sort: 0,
  428. value_type: 'individual',
  429. },
  430. type: 'graph',
  431. xaxis: {
  432. mode: 'time',
  433. name: null,
  434. show: true,
  435. values: [],
  436. },
  437. yaxes: [
  438. {
  439. format: 'short',
  440. label: '',
  441. logBase: 1,
  442. max: null,
  443. min: null,
  444. show: true,
  445. },
  446. {
  447. format: 'short',
  448. label: null,
  449. logBase: 1,
  450. max: null,
  451. min: null,
  452. show: true,
  453. },
  454. ],
  455. },
  456. ],
  457. showTitle: false,
  458. title: 'Row',
  459. },
  460. {
  461. collapse: false,
  462. editable: true,
  463. height: '250px',
  464. panels: [
  465. {
  466. aliasColors: {},
  467. bars: false,
  468. datasource: '$datasource',
  469. decimals: null,
  470. editable: true,
  471. 'error': false,
  472. fill: 0,
  473. grid: {},
  474. id: 1,
  475. legend: {
  476. avg: false,
  477. current: false,
  478. max: false,
  479. min: false,
  480. show: false,
  481. total: false,
  482. values: false,
  483. },
  484. lines: true,
  485. linewidth: 2,
  486. links: [],
  487. nullPointMode: 'connected',
  488. percentage: false,
  489. pointradius: 5,
  490. points: false,
  491. renderer: 'flot',
  492. seriesOverrides: [],
  493. span: 4,
  494. stack: false,
  495. steppedLine: false,
  496. targets: [{
  497. expr: 'etcd_debugging_mvcc_db_total_size_in_bytes{job="$cluster"}',
  498. hide: false,
  499. interval: '',
  500. intervalFactor: 2,
  501. legendFormat: '{{instance}} DB Size',
  502. metric: '',
  503. refId: 'A',
  504. step: 4,
  505. }],
  506. thresholds: [],
  507. timeFrom: null,
  508. timeShift: null,
  509. title: 'DB Size',
  510. tooltip: {
  511. msResolution: false,
  512. shared: true,
  513. sort: 0,
  514. value_type: 'cumulative',
  515. },
  516. type: 'graph',
  517. xaxis: {
  518. mode: 'time',
  519. name: null,
  520. show: true,
  521. values: [],
  522. },
  523. yaxes: [
  524. {
  525. format: 'bytes',
  526. logBase: 1,
  527. max: null,
  528. min: null,
  529. show: true,
  530. },
  531. {
  532. format: 'short',
  533. logBase: 1,
  534. max: null,
  535. min: null,
  536. show: false,
  537. },
  538. ],
  539. },
  540. {
  541. aliasColors: {},
  542. bars: false,
  543. datasource: '$datasource',
  544. editable: true,
  545. 'error': false,
  546. fill: 0,
  547. grid: {},
  548. id: 3,
  549. legend: {
  550. avg: false,
  551. current: false,
  552. max: false,
  553. min: false,
  554. show: false,
  555. total: false,
  556. values: false,
  557. },
  558. lines: true,
  559. linewidth: 2,
  560. links: [],
  561. nullPointMode: 'connected',
  562. percentage: false,
  563. pointradius: 1,
  564. points: false,
  565. renderer: 'flot',
  566. seriesOverrides: [],
  567. span: 4,
  568. stack: false,
  569. steppedLine: true,
  570. targets: [
  571. {
  572. expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
  573. hide: false,
  574. intervalFactor: 2,
  575. legendFormat: '{{instance}} WAL fsync',
  576. metric: 'etcd_disk_wal_fsync_duration_seconds_bucket',
  577. refId: 'A',
  578. step: 4,
  579. },
  580. {
  581. expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
  582. intervalFactor: 2,
  583. legendFormat: '{{instance}} DB fsync',
  584. metric: 'etcd_disk_backend_commit_duration_seconds_bucket',
  585. refId: 'B',
  586. step: 4,
  587. },
  588. ],
  589. thresholds: [],
  590. timeFrom: null,
  591. timeShift: null,
  592. title: 'Disk Sync Duration',
  593. tooltip: {
  594. msResolution: false,
  595. shared: true,
  596. sort: 0,
  597. value_type: 'cumulative',
  598. },
  599. type: 'graph',
  600. xaxis: {
  601. mode: 'time',
  602. name: null,
  603. show: true,
  604. values: [],
  605. },
  606. yaxes: [
  607. {
  608. format: 's',
  609. logBase: 1,
  610. max: null,
  611. min: null,
  612. show: true,
  613. },
  614. {
  615. format: 'short',
  616. logBase: 1,
  617. max: null,
  618. min: null,
  619. show: false,
  620. },
  621. ],
  622. },
  623. {
  624. aliasColors: {},
  625. bars: false,
  626. datasource: '$datasource',
  627. editable: true,
  628. 'error': false,
  629. fill: 0,
  630. id: 29,
  631. isNew: true,
  632. legend: {
  633. avg: false,
  634. current: false,
  635. max: false,
  636. min: false,
  637. show: false,
  638. total: false,
  639. values: false,
  640. },
  641. lines: true,
  642. linewidth: 2,
  643. links: [],
  644. nullPointMode: 'connected',
  645. percentage: false,
  646. pointradius: 5,
  647. points: false,
  648. renderer: 'flot',
  649. seriesOverrides: [],
  650. span: 4,
  651. stack: false,
  652. steppedLine: false,
  653. targets: [{
  654. expr: 'process_resident_memory_bytes{job="$cluster"}',
  655. intervalFactor: 2,
  656. legendFormat: '{{instance}} Resident Memory',
  657. metric: 'process_resident_memory_bytes',
  658. refId: 'A',
  659. step: 4,
  660. }],
  661. thresholds: [],
  662. timeFrom: null,
  663. timeShift: null,
  664. title: 'Memory',
  665. tooltip: {
  666. msResolution: false,
  667. shared: true,
  668. sort: 0,
  669. value_type: 'individual',
  670. },
  671. type: 'graph',
  672. xaxis: {
  673. mode: 'time',
  674. name: null,
  675. show: true,
  676. values: [],
  677. },
  678. yaxes: [
  679. {
  680. format: 'bytes',
  681. label: null,
  682. logBase: 1,
  683. max: null,
  684. min: null,
  685. show: true,
  686. },
  687. {
  688. format: 'short',
  689. label: null,
  690. logBase: 1,
  691. max: null,
  692. min: null,
  693. show: true,
  694. },
  695. ],
  696. },
  697. ],
  698. title: 'New row',
  699. },
  700. {
  701. collapse: false,
  702. editable: true,
  703. height: '250px',
  704. panels: [
  705. {
  706. aliasColors: {},
  707. bars: false,
  708. datasource: '$datasource',
  709. editable: true,
  710. 'error': false,
  711. fill: 5,
  712. id: 22,
  713. isNew: true,
  714. legend: {
  715. avg: false,
  716. current: false,
  717. max: false,
  718. min: false,
  719. show: false,
  720. total: false,
  721. values: false,
  722. },
  723. lines: true,
  724. linewidth: 2,
  725. links: [],
  726. nullPointMode: 'connected',
  727. percentage: false,
  728. pointradius: 5,
  729. points: false,
  730. renderer: 'flot',
  731. seriesOverrides: [],
  732. span: 3,
  733. stack: true,
  734. steppedLine: false,
  735. targets: [{
  736. expr: 'rate(etcd_network_client_grpc_received_bytes_total{job="$cluster"}[5m])',
  737. intervalFactor: 2,
  738. legendFormat: '{{instance}} Client Traffic In',
  739. metric: 'etcd_network_client_grpc_received_bytes_total',
  740. refId: 'A',
  741. step: 4,
  742. }],
  743. thresholds: [],
  744. timeFrom: null,
  745. timeShift: null,
  746. title: 'Client Traffic In',
  747. tooltip: {
  748. msResolution: false,
  749. shared: true,
  750. sort: 0,
  751. value_type: 'individual',
  752. },
  753. type: 'graph',
  754. xaxis: {
  755. mode: 'time',
  756. name: null,
  757. show: true,
  758. values: [],
  759. },
  760. yaxes: [
  761. {
  762. format: 'Bps',
  763. label: null,
  764. logBase: 1,
  765. max: null,
  766. min: null,
  767. show: true,
  768. },
  769. {
  770. format: 'short',
  771. label: null,
  772. logBase: 1,
  773. max: null,
  774. min: null,
  775. show: true,
  776. },
  777. ],
  778. },
  779. {
  780. aliasColors: {},
  781. bars: false,
  782. datasource: '$datasource',
  783. editable: true,
  784. 'error': false,
  785. fill: 5,
  786. id: 21,
  787. isNew: true,
  788. legend: {
  789. avg: false,
  790. current: false,
  791. max: false,
  792. min: false,
  793. show: false,
  794. total: false,
  795. values: false,
  796. },
  797. lines: true,
  798. linewidth: 2,
  799. links: [],
  800. nullPointMode: 'connected',
  801. percentage: false,
  802. pointradius: 5,
  803. points: false,
  804. renderer: 'flot',
  805. seriesOverrides: [],
  806. span: 3,
  807. stack: true,
  808. steppedLine: false,
  809. targets: [{
  810. expr: 'rate(etcd_network_client_grpc_sent_bytes_total{job="$cluster"}[5m])',
  811. intervalFactor: 2,
  812. legendFormat: '{{instance}} Client Traffic Out',
  813. metric: 'etcd_network_client_grpc_sent_bytes_total',
  814. refId: 'A',
  815. step: 4,
  816. }],
  817. thresholds: [],
  818. timeFrom: null,
  819. timeShift: null,
  820. title: 'Client Traffic Out',
  821. tooltip: {
  822. msResolution: false,
  823. shared: true,
  824. sort: 0,
  825. value_type: 'individual',
  826. },
  827. type: 'graph',
  828. xaxis: {
  829. mode: 'time',
  830. name: null,
  831. show: true,
  832. values: [],
  833. },
  834. yaxes: [
  835. {
  836. format: 'Bps',
  837. label: null,
  838. logBase: 1,
  839. max: null,
  840. min: null,
  841. show: true,
  842. },
  843. {
  844. format: 'short',
  845. label: null,
  846. logBase: 1,
  847. max: null,
  848. min: null,
  849. show: true,
  850. },
  851. ],
  852. },
  853. {
  854. aliasColors: {},
  855. bars: false,
  856. datasource: '$datasource',
  857. editable: true,
  858. 'error': false,
  859. fill: 0,
  860. id: 20,
  861. isNew: true,
  862. legend: {
  863. avg: false,
  864. current: false,
  865. max: false,
  866. min: false,
  867. show: false,
  868. total: false,
  869. values: false,
  870. },
  871. lines: true,
  872. linewidth: 2,
  873. links: [],
  874. nullPointMode: 'connected',
  875. percentage: false,
  876. pointradius: 5,
  877. points: false,
  878. renderer: 'flot',
  879. seriesOverrides: [],
  880. span: 3,
  881. stack: false,
  882. steppedLine: false,
  883. targets: [{
  884. expr: 'sum(rate(etcd_network_peer_received_bytes_total{job="$cluster"}[5m])) by (instance)',
  885. intervalFactor: 2,
  886. legendFormat: '{{instance}} Peer Traffic In',
  887. metric: 'etcd_network_peer_received_bytes_total',
  888. refId: 'A',
  889. step: 4,
  890. }],
  891. thresholds: [],
  892. timeFrom: null,
  893. timeShift: null,
  894. title: 'Peer Traffic In',
  895. tooltip: {
  896. msResolution: false,
  897. shared: true,
  898. sort: 0,
  899. value_type: 'individual',
  900. },
  901. type: 'graph',
  902. xaxis: {
  903. mode: 'time',
  904. name: null,
  905. show: true,
  906. values: [],
  907. },
  908. yaxes: [
  909. {
  910. format: 'Bps',
  911. label: null,
  912. logBase: 1,
  913. max: null,
  914. min: null,
  915. show: true,
  916. },
  917. {
  918. format: 'short',
  919. label: null,
  920. logBase: 1,
  921. max: null,
  922. min: null,
  923. show: true,
  924. },
  925. ],
  926. },
  927. {
  928. aliasColors: {},
  929. bars: false,
  930. datasource: '$datasource',
  931. decimals: null,
  932. editable: true,
  933. 'error': false,
  934. fill: 0,
  935. grid: {},
  936. id: 16,
  937. legend: {
  938. avg: false,
  939. current: false,
  940. max: false,
  941. min: false,
  942. show: false,
  943. total: false,
  944. values: false,
  945. },
  946. lines: true,
  947. linewidth: 2,
  948. links: [],
  949. nullPointMode: 'connected',
  950. percentage: false,
  951. pointradius: 5,
  952. points: false,
  953. renderer: 'flot',
  954. seriesOverrides: [],
  955. span: 3,
  956. stack: false,
  957. steppedLine: false,
  958. targets: [{
  959. expr: 'sum(rate(etcd_network_peer_sent_bytes_total{job="$cluster"}[5m])) by (instance)',
  960. hide: false,
  961. interval: '',
  962. intervalFactor: 2,
  963. legendFormat: '{{instance}} Peer Traffic Out',
  964. metric: 'etcd_network_peer_sent_bytes_total',
  965. refId: 'A',
  966. step: 4,
  967. }],
  968. thresholds: [],
  969. timeFrom: null,
  970. timeShift: null,
  971. title: 'Peer Traffic Out',
  972. tooltip: {
  973. msResolution: false,
  974. shared: true,
  975. sort: 0,
  976. value_type: 'cumulative',
  977. },
  978. type: 'graph',
  979. xaxis: {
  980. mode: 'time',
  981. name: null,
  982. show: true,
  983. values: [],
  984. },
  985. yaxes: [
  986. {
  987. format: 'Bps',
  988. logBase: 1,
  989. max: null,
  990. min: null,
  991. show: true,
  992. },
  993. {
  994. format: 'short',
  995. logBase: 1,
  996. max: null,
  997. min: null,
  998. show: true,
  999. },
  1000. ],
  1001. },
  1002. ],
  1003. title: 'New row',
  1004. },
  1005. {
  1006. collapse: false,
  1007. editable: true,
  1008. height: '250px',
  1009. panels: [
  1010. {
  1011. aliasColors: {},
  1012. bars: false,
  1013. datasource: '$datasource',
  1014. editable: true,
  1015. 'error': false,
  1016. fill: 0,
  1017. id: 40,
  1018. isNew: true,
  1019. legend: {
  1020. avg: false,
  1021. current: false,
  1022. max: false,
  1023. min: false,
  1024. show: false,
  1025. total: false,
  1026. values: false,
  1027. },
  1028. lines: true,
  1029. linewidth: 2,
  1030. links: [],
  1031. nullPointMode: 'connected',
  1032. percentage: false,
  1033. pointradius: 5,
  1034. points: false,
  1035. renderer: 'flot',
  1036. seriesOverrides: [],
  1037. span: 6,
  1038. stack: false,
  1039. steppedLine: false,
  1040. targets: [
  1041. {
  1042. expr: 'sum(rate(etcd_server_proposals_failed_total{job="$cluster"}[5m]))',
  1043. intervalFactor: 2,
  1044. legendFormat: 'Proposal Failure Rate',
  1045. metric: 'etcd_server_proposals_failed_total',
  1046. refId: 'A',
  1047. step: 2,
  1048. },
  1049. {
  1050. expr: 'sum(etcd_server_proposals_pending{job="$cluster"})',
  1051. intervalFactor: 2,
  1052. legendFormat: 'Proposal Pending Total',
  1053. metric: 'etcd_server_proposals_pending',
  1054. refId: 'B',
  1055. step: 2,
  1056. },
  1057. {
  1058. expr: 'sum(rate(etcd_server_proposals_committed_total{job="$cluster"}[5m]))',
  1059. intervalFactor: 2,
  1060. legendFormat: 'Proposal Commit Rate',
  1061. metric: 'etcd_server_proposals_committed_total',
  1062. refId: 'C',
  1063. step: 2,
  1064. },
  1065. {
  1066. expr: 'sum(rate(etcd_server_proposals_applied_total{job="$cluster"}[5m]))',
  1067. intervalFactor: 2,
  1068. legendFormat: 'Proposal Apply Rate',
  1069. refId: 'D',
  1070. step: 2,
  1071. },
  1072. ],
  1073. thresholds: [],
  1074. timeFrom: null,
  1075. timeShift: null,
  1076. title: 'Raft Proposals',
  1077. tooltip: {
  1078. msResolution: false,
  1079. shared: true,
  1080. sort: 0,
  1081. value_type: 'individual',
  1082. },
  1083. type: 'graph',
  1084. xaxis: {
  1085. mode: 'time',
  1086. name: null,
  1087. show: true,
  1088. values: [],
  1089. },
  1090. yaxes: [
  1091. {
  1092. format: 'short',
  1093. label: '',
  1094. logBase: 1,
  1095. max: null,
  1096. min: null,
  1097. show: true,
  1098. },
  1099. {
  1100. format: 'short',
  1101. label: null,
  1102. logBase: 1,
  1103. max: null,
  1104. min: null,
  1105. show: true,
  1106. },
  1107. ],
  1108. },
  1109. {
  1110. aliasColors: {},
  1111. bars: false,
  1112. datasource: '$datasource',
  1113. decimals: 0,
  1114. editable: true,
  1115. 'error': false,
  1116. fill: 0,
  1117. id: 19,
  1118. isNew: true,
  1119. legend: {
  1120. alignAsTable: false,
  1121. avg: false,
  1122. current: false,
  1123. max: false,
  1124. min: false,
  1125. rightSide: false,
  1126. show: false,
  1127. total: false,
  1128. values: false,
  1129. },
  1130. lines: true,
  1131. linewidth: 2,
  1132. links: [],
  1133. nullPointMode: 'connected',
  1134. percentage: false,
  1135. pointradius: 5,
  1136. points: false,
  1137. renderer: 'flot',
  1138. seriesOverrides: [],
  1139. span: 6,
  1140. stack: false,
  1141. steppedLine: false,
  1142. targets: [{
  1143. expr: 'changes(etcd_server_leader_changes_seen_total{job="$cluster"}[1d])',
  1144. intervalFactor: 2,
  1145. legendFormat: '{{instance}} Total Leader Elections Per Day',
  1146. metric: 'etcd_server_leader_changes_seen_total',
  1147. refId: 'A',
  1148. step: 2,
  1149. }],
  1150. thresholds: [],
  1151. timeFrom: null,
  1152. timeShift: null,
  1153. title: 'Total Leader Elections Per Day',
  1154. tooltip: {
  1155. msResolution: false,
  1156. shared: true,
  1157. sort: 0,
  1158. value_type: 'individual',
  1159. },
  1160. type: 'graph',
  1161. xaxis: {
  1162. mode: 'time',
  1163. name: null,
  1164. show: true,
  1165. values: [],
  1166. },
  1167. yaxes: [
  1168. {
  1169. format: 'short',
  1170. label: null,
  1171. logBase: 1,
  1172. max: null,
  1173. min: null,
  1174. show: true,
  1175. },
  1176. {
  1177. format: 'short',
  1178. label: null,
  1179. logBase: 1,
  1180. max: null,
  1181. min: null,
  1182. show: true,
  1183. },
  1184. ],
  1185. },
  1186. ],
  1187. title: 'New row',
  1188. },
  1189. ],
  1190. time: {
  1191. from: 'now-15m',
  1192. to: 'now',
  1193. },
  1194. timepicker: {
  1195. now: true,
  1196. refresh_intervals: [
  1197. '5s',
  1198. '10s',
  1199. '30s',
  1200. '1m',
  1201. '5m',
  1202. '15m',
  1203. '30m',
  1204. '1h',
  1205. '2h',
  1206. '1d',
  1207. ],
  1208. time_options: [
  1209. '5m',
  1210. '15m',
  1211. '1h',
  1212. '6h',
  1213. '12h',
  1214. '24h',
  1215. '2d',
  1216. '7d',
  1217. '30d',
  1218. ],
  1219. },
  1220. templating: {
  1221. list: [
  1222. {
  1223. current: {
  1224. text: 'Prometheus',
  1225. value: 'Prometheus',
  1226. },
  1227. hide: 0,
  1228. label: null,
  1229. name: 'datasource',
  1230. options: [],
  1231. query: 'prometheus',
  1232. refresh: 1,
  1233. regex: '',
  1234. type: 'datasource',
  1235. },
  1236. {
  1237. allValue: null,
  1238. current: {
  1239. text: 'prod',
  1240. value: 'prod',
  1241. },
  1242. datasource: '$datasource',
  1243. hide: 0,
  1244. includeAll: false,
  1245. label: 'cluster',
  1246. multi: false,
  1247. name: 'cluster',
  1248. options: [],
  1249. query: 'label_values(etcd_server_has_leader, job)',
  1250. refresh: 1,
  1251. regex: '',
  1252. sort: 2,
  1253. tagValuesQuery: '',
  1254. tags: [],
  1255. tagsQuery: '',
  1256. type: 'query',
  1257. useTags: false,
  1258. },
  1259. ],
  1260. },
  1261. annotations: {
  1262. list: [],
  1263. },
  1264. refresh: '10s',
  1265. schemaVersion: 13,
  1266. version: 215,
  1267. links: [],
  1268. gnetId: null,
  1269. },
  1270. },
  1271. }