mixin.libsonnet 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261
  1. {
  2. _config+:: {
  3. etcd_selector: 'job=~".*etcd.*"',
  4. },
  5. prometheusAlerts+:: {
  6. groups+: [
  7. {
  8. name: 'etcd',
  9. rules: [
  10. {
  11. alert: 'EtcdInsufficientMembers',
  12. expr: |||
  13. sum(up{%(etcd_selector)s} == bool 1) by (job) < ((count(up{%(etcd_selector)s}) by (job) + 1) / 2)
  14. ||| % $._config,
  15. 'for': '3m',
  16. labels: {
  17. severity: 'critical',
  18. },
  19. annotations: {
  20. message: 'Etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).',
  21. },
  22. },
  23. {
  24. alert: 'EtcdNoLeader',
  25. expr: |||
  26. etcd_server_has_leader{%(etcd_selector)s} == 0
  27. ||| % $._config,
  28. 'for': '1m',
  29. labels: {
  30. severity: 'critical',
  31. },
  32. annotations: {
  33. message: 'Etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.',
  34. },
  35. },
  36. {
  37. alert: 'EtcdHighNumberOfLeaderChanges',
  38. expr: |||
  39. rate(etcd_server_leader_changes_seen_total{%(etcd_selector)s}[15m]) > 3
  40. ||| % $._config,
  41. 'for': '15m',
  42. labels: {
  43. severity: 'warning',
  44. },
  45. annotations: {
  46. message: 'Etcd cluster "{{ $labels.job }}": instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour.',
  47. },
  48. },
  49. {
  50. alert: 'EtcdHighNumberOfFailedGRPCRequests',
  51. expr: |||
  52. 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
  53. /
  54. sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) BY (job, instance, grpc_service, grpc_method)
  55. > 1
  56. ||| % $._config,
  57. 'for': '10m',
  58. labels: {
  59. severity: 'warning',
  60. },
  61. annotations: {
  62. message: 'Etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
  63. },
  64. },
  65. {
  66. alert: 'EtcdHighNumberOfFailedGRPCRequests',
  67. expr: |||
  68. 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
  69. /
  70. sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) BY (job, instance, grpc_service, grpc_method)
  71. > 5
  72. ||| % $._config,
  73. 'for': '5m',
  74. labels: {
  75. severity: 'critical',
  76. },
  77. annotations: {
  78. message: 'Etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
  79. },
  80. },
  81. {
  82. alert: 'EtcdGRPCRequestsSlow',
  83. expr: |||
  84. histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{%(etcd_selector)s, grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
  85. > 0.15
  86. ||| % $._config,
  87. 'for': '10m',
  88. labels: {
  89. severity: 'critical',
  90. },
  91. annotations: {
  92. message: 'Etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
  93. },
  94. },
  95. {
  96. alert: 'EtcdMemberCommunicationSlow',
  97. expr: |||
  98. histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{%(etcd_selector)s}[5m]))
  99. > 0.15
  100. ||| % $._config,
  101. 'for': '10m',
  102. labels: {
  103. severity: 'warning',
  104. },
  105. annotations: {
  106. message: 'Etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
  107. },
  108. },
  109. {
  110. alert: 'EtcdHighNumberOfFailedProposals',
  111. expr: |||
  112. rate(etcd_server_proposals_failed_total{%(etcd_selector)s}[15m]) > 5
  113. ||| % $._config,
  114. 'for': '15m',
  115. labels: {
  116. severity: 'warning',
  117. },
  118. annotations: {
  119. message: 'Etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last hour on etcd instance {{ $labels.instance }}.',
  120. },
  121. },
  122. {
  123. alert: 'EtcdHighFsyncDurations',
  124. expr: |||
  125. histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  126. > 0.5
  127. ||| % $._config,
  128. 'for': '10m',
  129. labels: {
  130. severity: 'warning',
  131. },
  132. annotations: {
  133. message: 'Etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
  134. },
  135. },
  136. {
  137. alert: 'EtcdHighCommitDurations',
  138. expr: |||
  139. histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  140. > 0.25
  141. ||| % $._config,
  142. 'for': '10m',
  143. labels: {
  144. severity: 'warning',
  145. },
  146. annotations: {
  147. message: 'Etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.',
  148. },
  149. },
  150. {
  151. record: 'instance:fd_utilization',
  152. expr: 'process_open_fds / process_max_fds',
  153. },
  154. {
  155. alert: 'FdExhaustionClose',
  156. expr: |||
  157. predict_linear(instance:fd_utilization{%(etcd_selector)s}[1h], 3600 * 4) > 1
  158. ||| % $._config,
  159. 'for': '10m',
  160. labels: {
  161. severity: 'warning',
  162. },
  163. annotations: {
  164. message: '{{ $labels.job }} instance {{ $labels.instance }} will exhaust its file descriptors soon',
  165. },
  166. },
  167. {
  168. alert: 'FdExhaustionClose',
  169. expr: |||
  170. predict_linear(instance:fd_utilization{%(etcd_selector)s}[10m], 3600) > 1
  171. ||| % $._config,
  172. 'for': '10m',
  173. labels: {
  174. severity: 'critical',
  175. },
  176. annotations: {
  177. description: '{{ $labels.job }} instance {{ $labels.instance }} will exhaust its file descriptors soon',
  178. },
  179. },
  180. ],
  181. },
  182. ],
  183. },
  184. grafanaDashboards+:: {
  185. 'etcd.json': {
  186. id: 6,
  187. title: 'etcd',
  188. description: 'etcd sample Grafana dashboard with Prometheus',
  189. tags: [],
  190. style: 'dark',
  191. timezone: 'browser',
  192. editable: true,
  193. hideControls: false,
  194. sharedCrosshair: false,
  195. rows: [
  196. {
  197. collapse: false,
  198. editable: true,
  199. height: '250px',
  200. panels: [
  201. {
  202. cacheTimeout: null,
  203. colorBackground: false,
  204. colorValue: false,
  205. colors: [
  206. 'rgba(245, 54, 54, 0.9)',
  207. 'rgba(237, 129, 40, 0.89)',
  208. 'rgba(50, 172, 45, 0.97)',
  209. ],
  210. datasource: '$datasource',
  211. editable: true,
  212. 'error': false,
  213. format: 'none',
  214. gauge: {
  215. maxValue: 100,
  216. minValue: 0,
  217. show: false,
  218. thresholdLabels: false,
  219. thresholdMarkers: true,
  220. },
  221. id: 28,
  222. interval: null,
  223. isNew: true,
  224. links: [],
  225. mappingType: 1,
  226. mappingTypes: [
  227. {
  228. name: 'value to text',
  229. value: 1,
  230. },
  231. {
  232. name: 'range to text',
  233. value: 2,
  234. },
  235. ],
  236. maxDataPoints: 100,
  237. nullPointMode: 'connected',
  238. nullText: null,
  239. postfix: '',
  240. postfixFontSize: '50%',
  241. prefix: '',
  242. prefixFontSize: '50%',
  243. rangeMaps: [{
  244. from: 'null',
  245. text: 'N/A',
  246. to: 'null',
  247. }],
  248. span: 3,
  249. sparkline: {
  250. fillColor: 'rgba(31, 118, 189, 0.18)',
  251. full: false,
  252. lineColor: 'rgb(31, 120, 193)',
  253. show: false,
  254. },
  255. targets: [{
  256. expr: 'sum(etcd_server_has_leader{job="$cluster"})',
  257. intervalFactor: 2,
  258. legendFormat: '',
  259. metric: 'etcd_server_has_leader',
  260. refId: 'A',
  261. step: 20,
  262. }],
  263. thresholds: '',
  264. title: 'Up',
  265. type: 'singlestat',
  266. valueFontSize: '200%',
  267. valueMaps: [{
  268. op: '=',
  269. text: 'N/A',
  270. value: 'null',
  271. }],
  272. valueName: 'avg',
  273. },
  274. {
  275. aliasColors: {},
  276. bars: false,
  277. datasource: '$datasource',
  278. editable: true,
  279. 'error': false,
  280. fill: 0,
  281. id: 23,
  282. isNew: true,
  283. legend: {
  284. avg: false,
  285. current: false,
  286. max: false,
  287. min: false,
  288. show: false,
  289. total: false,
  290. values: false,
  291. },
  292. lines: true,
  293. linewidth: 2,
  294. links: [],
  295. nullPointMode: 'connected',
  296. percentage: false,
  297. pointradius: 5,
  298. points: false,
  299. renderer: 'flot',
  300. seriesOverrides: [],
  301. span: 5,
  302. stack: false,
  303. steppedLine: false,
  304. targets: [
  305. {
  306. expr: 'sum(rate(grpc_server_started_total{job="$cluster",grpc_type="unary"}[5m]))',
  307. format: 'time_series',
  308. intervalFactor: 2,
  309. legendFormat: 'RPC Rate',
  310. metric: 'grpc_server_started_total',
  311. refId: 'A',
  312. step: 2,
  313. },
  314. {
  315. expr: 'sum(rate(grpc_server_handled_total{job="$cluster",grpc_type="unary",grpc_code!="OK"}[5m]))',
  316. format: 'time_series',
  317. intervalFactor: 2,
  318. legendFormat: 'RPC Failed Rate',
  319. metric: 'grpc_server_handled_total',
  320. refId: 'B',
  321. step: 2,
  322. },
  323. ],
  324. thresholds: [],
  325. timeFrom: null,
  326. timeShift: null,
  327. title: 'RPC Rate',
  328. tooltip: {
  329. msResolution: false,
  330. shared: true,
  331. sort: 0,
  332. value_type: 'individual',
  333. },
  334. type: 'graph',
  335. xaxis: {
  336. mode: 'time',
  337. name: null,
  338. show: true,
  339. values: [],
  340. },
  341. yaxes: [
  342. {
  343. format: 'ops',
  344. label: null,
  345. logBase: 1,
  346. max: null,
  347. min: null,
  348. show: true,
  349. },
  350. {
  351. format: 'short',
  352. label: null,
  353. logBase: 1,
  354. max: null,
  355. min: null,
  356. show: true,
  357. },
  358. ],
  359. },
  360. {
  361. aliasColors: {},
  362. bars: false,
  363. datasource: '$datasource',
  364. editable: true,
  365. 'error': false,
  366. fill: 0,
  367. id: 41,
  368. isNew: true,
  369. legend: {
  370. avg: false,
  371. current: false,
  372. max: false,
  373. min: false,
  374. show: false,
  375. total: false,
  376. values: false,
  377. },
  378. lines: true,
  379. linewidth: 2,
  380. links: [],
  381. nullPointMode: 'connected',
  382. percentage: false,
  383. pointradius: 5,
  384. points: false,
  385. renderer: 'flot',
  386. seriesOverrides: [],
  387. span: 4,
  388. stack: true,
  389. steppedLine: false,
  390. targets: [
  391. {
  392. expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})',
  393. intervalFactor: 2,
  394. legendFormat: 'Watch Streams',
  395. metric: 'grpc_server_handled_total',
  396. refId: 'A',
  397. step: 4,
  398. },
  399. {
  400. expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})',
  401. intervalFactor: 2,
  402. legendFormat: 'Lease Streams',
  403. metric: 'grpc_server_handled_total',
  404. refId: 'B',
  405. step: 4,
  406. },
  407. ],
  408. thresholds: [],
  409. timeFrom: null,
  410. timeShift: null,
  411. title: 'Active Streams',
  412. tooltip: {
  413. msResolution: false,
  414. shared: true,
  415. sort: 0,
  416. value_type: 'individual',
  417. },
  418. type: 'graph',
  419. xaxis: {
  420. mode: 'time',
  421. name: null,
  422. show: true,
  423. values: [],
  424. },
  425. yaxes: [
  426. {
  427. format: 'short',
  428. label: '',
  429. logBase: 1,
  430. max: null,
  431. min: null,
  432. show: true,
  433. },
  434. {
  435. format: 'short',
  436. label: null,
  437. logBase: 1,
  438. max: null,
  439. min: null,
  440. show: true,
  441. },
  442. ],
  443. },
  444. ],
  445. showTitle: false,
  446. title: 'Row',
  447. },
  448. {
  449. collapse: false,
  450. editable: true,
  451. height: '250px',
  452. panels: [
  453. {
  454. aliasColors: {},
  455. bars: false,
  456. datasource: '$datasource',
  457. decimals: null,
  458. editable: true,
  459. 'error': false,
  460. fill: 0,
  461. grid: {},
  462. id: 1,
  463. legend: {
  464. avg: false,
  465. current: false,
  466. max: false,
  467. min: false,
  468. show: false,
  469. total: false,
  470. values: false,
  471. },
  472. lines: true,
  473. linewidth: 2,
  474. links: [],
  475. nullPointMode: 'connected',
  476. percentage: false,
  477. pointradius: 5,
  478. points: false,
  479. renderer: 'flot',
  480. seriesOverrides: [],
  481. span: 4,
  482. stack: false,
  483. steppedLine: false,
  484. targets: [{
  485. expr: 'etcd_debugging_mvcc_db_total_size_in_bytes{job="$cluster"}',
  486. hide: false,
  487. interval: '',
  488. intervalFactor: 2,
  489. legendFormat: '{{instance}} DB Size',
  490. metric: '',
  491. refId: 'A',
  492. step: 4,
  493. }],
  494. thresholds: [],
  495. timeFrom: null,
  496. timeShift: null,
  497. title: 'DB Size',
  498. tooltip: {
  499. msResolution: false,
  500. shared: true,
  501. sort: 0,
  502. value_type: 'cumulative',
  503. },
  504. type: 'graph',
  505. xaxis: {
  506. mode: 'time',
  507. name: null,
  508. show: true,
  509. values: [],
  510. },
  511. yaxes: [
  512. {
  513. format: 'bytes',
  514. logBase: 1,
  515. max: null,
  516. min: null,
  517. show: true,
  518. },
  519. {
  520. format: 'short',
  521. logBase: 1,
  522. max: null,
  523. min: null,
  524. show: false,
  525. },
  526. ],
  527. },
  528. {
  529. aliasColors: {},
  530. bars: false,
  531. datasource: '$datasource',
  532. editable: true,
  533. 'error': false,
  534. fill: 0,
  535. grid: {},
  536. id: 3,
  537. legend: {
  538. avg: false,
  539. current: false,
  540. max: false,
  541. min: false,
  542. show: false,
  543. total: false,
  544. values: false,
  545. },
  546. lines: true,
  547. linewidth: 2,
  548. links: [],
  549. nullPointMode: 'connected',
  550. percentage: false,
  551. pointradius: 1,
  552. points: false,
  553. renderer: 'flot',
  554. seriesOverrides: [],
  555. span: 4,
  556. stack: false,
  557. steppedLine: true,
  558. targets: [
  559. {
  560. expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
  561. hide: false,
  562. intervalFactor: 2,
  563. legendFormat: '{{instance}} WAL fsync',
  564. metric: 'etcd_disk_wal_fsync_duration_seconds_bucket',
  565. refId: 'A',
  566. step: 4,
  567. },
  568. {
  569. expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
  570. intervalFactor: 2,
  571. legendFormat: '{{instance}} DB fsync',
  572. metric: 'etcd_disk_backend_commit_duration_seconds_bucket',
  573. refId: 'B',
  574. step: 4,
  575. },
  576. ],
  577. thresholds: [],
  578. timeFrom: null,
  579. timeShift: null,
  580. title: 'Disk Sync Duration',
  581. tooltip: {
  582. msResolution: false,
  583. shared: true,
  584. sort: 0,
  585. value_type: 'cumulative',
  586. },
  587. type: 'graph',
  588. xaxis: {
  589. mode: 'time',
  590. name: null,
  591. show: true,
  592. values: [],
  593. },
  594. yaxes: [
  595. {
  596. format: 's',
  597. logBase: 1,
  598. max: null,
  599. min: null,
  600. show: true,
  601. },
  602. {
  603. format: 'short',
  604. logBase: 1,
  605. max: null,
  606. min: null,
  607. show: false,
  608. },
  609. ],
  610. },
  611. {
  612. aliasColors: {},
  613. bars: false,
  614. datasource: '$datasource',
  615. editable: true,
  616. 'error': false,
  617. fill: 0,
  618. id: 29,
  619. isNew: true,
  620. legend: {
  621. avg: false,
  622. current: false,
  623. max: false,
  624. min: false,
  625. show: false,
  626. total: false,
  627. values: false,
  628. },
  629. lines: true,
  630. linewidth: 2,
  631. links: [],
  632. nullPointMode: 'connected',
  633. percentage: false,
  634. pointradius: 5,
  635. points: false,
  636. renderer: 'flot',
  637. seriesOverrides: [],
  638. span: 4,
  639. stack: false,
  640. steppedLine: false,
  641. targets: [{
  642. expr: 'process_resident_memory_bytes{job="$cluster"}',
  643. intervalFactor: 2,
  644. legendFormat: '{{instance}} Resident Memory',
  645. metric: 'process_resident_memory_bytes',
  646. refId: 'A',
  647. step: 4,
  648. }],
  649. thresholds: [],
  650. timeFrom: null,
  651. timeShift: null,
  652. title: 'Memory',
  653. tooltip: {
  654. msResolution: false,
  655. shared: true,
  656. sort: 0,
  657. value_type: 'individual',
  658. },
  659. type: 'graph',
  660. xaxis: {
  661. mode: 'time',
  662. name: null,
  663. show: true,
  664. values: [],
  665. },
  666. yaxes: [
  667. {
  668. format: 'bytes',
  669. label: null,
  670. logBase: 1,
  671. max: null,
  672. min: null,
  673. show: true,
  674. },
  675. {
  676. format: 'short',
  677. label: null,
  678. logBase: 1,
  679. max: null,
  680. min: null,
  681. show: true,
  682. },
  683. ],
  684. },
  685. ],
  686. title: 'New row',
  687. },
  688. {
  689. collapse: false,
  690. editable: true,
  691. height: '250px',
  692. panels: [
  693. {
  694. aliasColors: {},
  695. bars: false,
  696. datasource: '$datasource',
  697. editable: true,
  698. 'error': false,
  699. fill: 5,
  700. id: 22,
  701. isNew: true,
  702. legend: {
  703. avg: false,
  704. current: false,
  705. max: false,
  706. min: false,
  707. show: false,
  708. total: false,
  709. values: false,
  710. },
  711. lines: true,
  712. linewidth: 2,
  713. links: [],
  714. nullPointMode: 'connected',
  715. percentage: false,
  716. pointradius: 5,
  717. points: false,
  718. renderer: 'flot',
  719. seriesOverrides: [],
  720. span: 3,
  721. stack: true,
  722. steppedLine: false,
  723. targets: [{
  724. expr: 'rate(etcd_network_client_grpc_received_bytes_total{job="$cluster"}[5m])',
  725. intervalFactor: 2,
  726. legendFormat: '{{instance}} Client Traffic In',
  727. metric: 'etcd_network_client_grpc_received_bytes_total',
  728. refId: 'A',
  729. step: 4,
  730. }],
  731. thresholds: [],
  732. timeFrom: null,
  733. timeShift: null,
  734. title: 'Client Traffic In',
  735. tooltip: {
  736. msResolution: false,
  737. shared: true,
  738. sort: 0,
  739. value_type: 'individual',
  740. },
  741. type: 'graph',
  742. xaxis: {
  743. mode: 'time',
  744. name: null,
  745. show: true,
  746. values: [],
  747. },
  748. yaxes: [
  749. {
  750. format: 'Bps',
  751. label: null,
  752. logBase: 1,
  753. max: null,
  754. min: null,
  755. show: true,
  756. },
  757. {
  758. format: 'short',
  759. label: null,
  760. logBase: 1,
  761. max: null,
  762. min: null,
  763. show: true,
  764. },
  765. ],
  766. },
  767. {
  768. aliasColors: {},
  769. bars: false,
  770. datasource: '$datasource',
  771. editable: true,
  772. 'error': false,
  773. fill: 5,
  774. id: 21,
  775. isNew: true,
  776. legend: {
  777. avg: false,
  778. current: false,
  779. max: false,
  780. min: false,
  781. show: false,
  782. total: false,
  783. values: false,
  784. },
  785. lines: true,
  786. linewidth: 2,
  787. links: [],
  788. nullPointMode: 'connected',
  789. percentage: false,
  790. pointradius: 5,
  791. points: false,
  792. renderer: 'flot',
  793. seriesOverrides: [],
  794. span: 3,
  795. stack: true,
  796. steppedLine: false,
  797. targets: [{
  798. expr: 'rate(etcd_network_client_grpc_sent_bytes_total{job="$cluster"}[5m])',
  799. intervalFactor: 2,
  800. legendFormat: '{{instance}} Client Traffic Out',
  801. metric: 'etcd_network_client_grpc_sent_bytes_total',
  802. refId: 'A',
  803. step: 4,
  804. }],
  805. thresholds: [],
  806. timeFrom: null,
  807. timeShift: null,
  808. title: 'Client Traffic Out',
  809. tooltip: {
  810. msResolution: false,
  811. shared: true,
  812. sort: 0,
  813. value_type: 'individual',
  814. },
  815. type: 'graph',
  816. xaxis: {
  817. mode: 'time',
  818. name: null,
  819. show: true,
  820. values: [],
  821. },
  822. yaxes: [
  823. {
  824. format: 'Bps',
  825. label: null,
  826. logBase: 1,
  827. max: null,
  828. min: null,
  829. show: true,
  830. },
  831. {
  832. format: 'short',
  833. label: null,
  834. logBase: 1,
  835. max: null,
  836. min: null,
  837. show: true,
  838. },
  839. ],
  840. },
  841. {
  842. aliasColors: {},
  843. bars: false,
  844. datasource: '$datasource',
  845. editable: true,
  846. 'error': false,
  847. fill: 0,
  848. id: 20,
  849. isNew: true,
  850. legend: {
  851. avg: false,
  852. current: false,
  853. max: false,
  854. min: false,
  855. show: false,
  856. total: false,
  857. values: false,
  858. },
  859. lines: true,
  860. linewidth: 2,
  861. links: [],
  862. nullPointMode: 'connected',
  863. percentage: false,
  864. pointradius: 5,
  865. points: false,
  866. renderer: 'flot',
  867. seriesOverrides: [],
  868. span: 3,
  869. stack: false,
  870. steppedLine: false,
  871. targets: [{
  872. expr: 'sum(rate(etcd_network_peer_received_bytes_total{job="$cluster"}[5m])) by (instance)',
  873. intervalFactor: 2,
  874. legendFormat: '{{instance}} Peer Traffic In',
  875. metric: 'etcd_network_peer_received_bytes_total',
  876. refId: 'A',
  877. step: 4,
  878. }],
  879. thresholds: [],
  880. timeFrom: null,
  881. timeShift: null,
  882. title: 'Peer Traffic In',
  883. tooltip: {
  884. msResolution: false,
  885. shared: true,
  886. sort: 0,
  887. value_type: 'individual',
  888. },
  889. type: 'graph',
  890. xaxis: {
  891. mode: 'time',
  892. name: null,
  893. show: true,
  894. values: [],
  895. },
  896. yaxes: [
  897. {
  898. format: 'Bps',
  899. label: null,
  900. logBase: 1,
  901. max: null,
  902. min: null,
  903. show: true,
  904. },
  905. {
  906. format: 'short',
  907. label: null,
  908. logBase: 1,
  909. max: null,
  910. min: null,
  911. show: true,
  912. },
  913. ],
  914. },
  915. {
  916. aliasColors: {},
  917. bars: false,
  918. datasource: '$datasource',
  919. decimals: null,
  920. editable: true,
  921. 'error': false,
  922. fill: 0,
  923. grid: {},
  924. id: 16,
  925. legend: {
  926. avg: false,
  927. current: false,
  928. max: false,
  929. min: false,
  930. show: false,
  931. total: false,
  932. values: false,
  933. },
  934. lines: true,
  935. linewidth: 2,
  936. links: [],
  937. nullPointMode: 'connected',
  938. percentage: false,
  939. pointradius: 5,
  940. points: false,
  941. renderer: 'flot',
  942. seriesOverrides: [],
  943. span: 3,
  944. stack: false,
  945. steppedLine: false,
  946. targets: [{
  947. expr: 'sum(rate(etcd_network_peer_sent_bytes_total{job="$cluster"}[5m])) by (instance)',
  948. hide: false,
  949. interval: '',
  950. intervalFactor: 2,
  951. legendFormat: '{{instance}} Peer Traffic Out',
  952. metric: 'etcd_network_peer_sent_bytes_total',
  953. refId: 'A',
  954. step: 4,
  955. }],
  956. thresholds: [],
  957. timeFrom: null,
  958. timeShift: null,
  959. title: 'Peer Traffic Out',
  960. tooltip: {
  961. msResolution: false,
  962. shared: true,
  963. sort: 0,
  964. value_type: 'cumulative',
  965. },
  966. type: 'graph',
  967. xaxis: {
  968. mode: 'time',
  969. name: null,
  970. show: true,
  971. values: [],
  972. },
  973. yaxes: [
  974. {
  975. format: 'Bps',
  976. logBase: 1,
  977. max: null,
  978. min: null,
  979. show: true,
  980. },
  981. {
  982. format: 'short',
  983. logBase: 1,
  984. max: null,
  985. min: null,
  986. show: true,
  987. },
  988. ],
  989. },
  990. ],
  991. title: 'New row',
  992. },
  993. {
  994. collapse: false,
  995. editable: true,
  996. height: '250px',
  997. panels: [
  998. {
  999. aliasColors: {},
  1000. bars: false,
  1001. datasource: '$datasource',
  1002. editable: true,
  1003. 'error': false,
  1004. fill: 0,
  1005. id: 40,
  1006. isNew: true,
  1007. legend: {
  1008. avg: false,
  1009. current: false,
  1010. max: false,
  1011. min: false,
  1012. show: false,
  1013. total: false,
  1014. values: false,
  1015. },
  1016. lines: true,
  1017. linewidth: 2,
  1018. links: [],
  1019. nullPointMode: 'connected',
  1020. percentage: false,
  1021. pointradius: 5,
  1022. points: false,
  1023. renderer: 'flot',
  1024. seriesOverrides: [],
  1025. span: 6,
  1026. stack: false,
  1027. steppedLine: false,
  1028. targets: [
  1029. {
  1030. expr: 'sum(rate(etcd_server_proposals_failed_total{job="$cluster"}[5m]))',
  1031. intervalFactor: 2,
  1032. legendFormat: 'Proposal Failure Rate',
  1033. metric: 'etcd_server_proposals_failed_total',
  1034. refId: 'A',
  1035. step: 2,
  1036. },
  1037. {
  1038. expr: 'sum(etcd_server_proposals_pending{job="$cluster"})',
  1039. intervalFactor: 2,
  1040. legendFormat: 'Proposal Pending Total',
  1041. metric: 'etcd_server_proposals_pending',
  1042. refId: 'B',
  1043. step: 2,
  1044. },
  1045. {
  1046. expr: 'sum(rate(etcd_server_proposals_committed_total{job="$cluster"}[5m]))',
  1047. intervalFactor: 2,
  1048. legendFormat: 'Proposal Commit Rate',
  1049. metric: 'etcd_server_proposals_committed_total',
  1050. refId: 'C',
  1051. step: 2,
  1052. },
  1053. {
  1054. expr: 'sum(rate(etcd_server_proposals_applied_total{job="$cluster"}[5m]))',
  1055. intervalFactor: 2,
  1056. legendFormat: 'Proposal Apply Rate',
  1057. refId: 'D',
  1058. step: 2,
  1059. },
  1060. ],
  1061. thresholds: [],
  1062. timeFrom: null,
  1063. timeShift: null,
  1064. title: 'Raft Proposals',
  1065. tooltip: {
  1066. msResolution: false,
  1067. shared: true,
  1068. sort: 0,
  1069. value_type: 'individual',
  1070. },
  1071. type: 'graph',
  1072. xaxis: {
  1073. mode: 'time',
  1074. name: null,
  1075. show: true,
  1076. values: [],
  1077. },
  1078. yaxes: [
  1079. {
  1080. format: 'short',
  1081. label: '',
  1082. logBase: 1,
  1083. max: null,
  1084. min: null,
  1085. show: true,
  1086. },
  1087. {
  1088. format: 'short',
  1089. label: null,
  1090. logBase: 1,
  1091. max: null,
  1092. min: null,
  1093. show: true,
  1094. },
  1095. ],
  1096. },
  1097. {
  1098. aliasColors: {},
  1099. bars: false,
  1100. datasource: '$datasource',
  1101. decimals: 0,
  1102. editable: true,
  1103. 'error': false,
  1104. fill: 0,
  1105. id: 19,
  1106. isNew: true,
  1107. legend: {
  1108. alignAsTable: false,
  1109. avg: false,
  1110. current: false,
  1111. max: false,
  1112. min: false,
  1113. rightSide: false,
  1114. show: false,
  1115. total: false,
  1116. values: false,
  1117. },
  1118. lines: true,
  1119. linewidth: 2,
  1120. links: [],
  1121. nullPointMode: 'connected',
  1122. percentage: false,
  1123. pointradius: 5,
  1124. points: false,
  1125. renderer: 'flot',
  1126. seriesOverrides: [],
  1127. span: 6,
  1128. stack: false,
  1129. steppedLine: false,
  1130. targets: [{
  1131. expr: 'changes(etcd_server_leader_changes_seen_total{job="$cluster"}[1d])',
  1132. intervalFactor: 2,
  1133. legendFormat: '{{instance}} Total Leader Elections Per Day',
  1134. metric: 'etcd_server_leader_changes_seen_total',
  1135. refId: 'A',
  1136. step: 2,
  1137. }],
  1138. thresholds: [],
  1139. timeFrom: null,
  1140. timeShift: null,
  1141. title: 'Total Leader Elections Per Day',
  1142. tooltip: {
  1143. msResolution: false,
  1144. shared: true,
  1145. sort: 0,
  1146. value_type: 'individual',
  1147. },
  1148. type: 'graph',
  1149. xaxis: {
  1150. mode: 'time',
  1151. name: null,
  1152. show: true,
  1153. values: [],
  1154. },
  1155. yaxes: [
  1156. {
  1157. format: 'short',
  1158. label: null,
  1159. logBase: 1,
  1160. max: null,
  1161. min: null,
  1162. show: true,
  1163. },
  1164. {
  1165. format: 'short',
  1166. label: null,
  1167. logBase: 1,
  1168. max: null,
  1169. min: null,
  1170. show: true,
  1171. },
  1172. ],
  1173. },
  1174. ],
  1175. title: 'New row',
  1176. },
  1177. ],
  1178. time: {
  1179. from: 'now-15m',
  1180. to: 'now',
  1181. },
  1182. timepicker: {
  1183. now: true,
  1184. refresh_intervals: [
  1185. '5s',
  1186. '10s',
  1187. '30s',
  1188. '1m',
  1189. '5m',
  1190. '15m',
  1191. '30m',
  1192. '1h',
  1193. '2h',
  1194. '1d',
  1195. ],
  1196. time_options: [
  1197. '5m',
  1198. '15m',
  1199. '1h',
  1200. '6h',
  1201. '12h',
  1202. '24h',
  1203. '2d',
  1204. '7d',
  1205. '30d',
  1206. ],
  1207. },
  1208. templating: {
  1209. list: [
  1210. {
  1211. current: {
  1212. text: 'Prometheus',
  1213. value: 'Prometheus',
  1214. },
  1215. hide: 0,
  1216. label: null,
  1217. name: 'datasource',
  1218. options: [],
  1219. query: 'prometheus',
  1220. refresh: 1,
  1221. regex: '',
  1222. type: 'datasource',
  1223. },
  1224. {
  1225. allValue: null,
  1226. current: {
  1227. text: 'prod',
  1228. value: 'prod',
  1229. },
  1230. datasource: '$datasource',
  1231. hide: 0,
  1232. includeAll: false,
  1233. label: 'cluster',
  1234. multi: false,
  1235. name: 'cluster',
  1236. options: [],
  1237. query: 'label_values(etcd_server_has_leader, job)',
  1238. refresh: 1,
  1239. regex: '',
  1240. sort: 2,
  1241. tagValuesQuery: '',
  1242. tags: [],
  1243. tagsQuery: '',
  1244. type: 'query',
  1245. useTags: false,
  1246. },
  1247. ],
  1248. },
  1249. annotations: {
  1250. list: [],
  1251. },
  1252. refresh: false,
  1253. schemaVersion: 13,
  1254. version: 215,
  1255. links: [],
  1256. gnetId: null,
  1257. },
  1258. },
  1259. }