mixin.libsonnet 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307
  1. {
  2. _config+:: {
  3. etcd_selector: 'job=~".*etcd.*"',
  4. },
  5. prometheusAlerts+:: {
  6. groups+: [
  7. {
  8. name: 'etcd',
  9. rules: [
  10. {
  11. alert: 'EtcdInsufficientMembers',
  12. expr: |||
  13. count(up{%(etcd_selector)s} == 0) by (job) > (count(up{%(etcd_selector)s}) by (job) / 2 - 1)
  14. ||| % $._config,
  15. 'for': '3m',
  16. labels: {
  17. severity: 'critical',
  18. },
  19. annotations: {
  20. message: 'Etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).',
  21. },
  22. },
  23. {
  24. alert: 'EtcdNoLeader',
  25. expr: |||
  26. etcd_server_has_leader{%(etcd_selector)s} == 0
  27. ||| % $._config,
  28. 'for': '1m',
  29. labels: {
  30. severity: 'critical',
  31. },
  32. annotations: {
  33. message: 'Etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.',
  34. },
  35. },
  36. {
  37. alert: 'EtcdHighNumberOfLeaderChanges',
  38. expr: |||
  39. rate(etcd_server_leader_changes_seen_total{%(etcd_selector)s}[15m]) > 3
  40. ||| % $._config,
  41. 'for': '15m',
  42. labels: {
  43. severity: 'warning',
  44. },
  45. annotations: {
  46. message: 'Etcd cluster "{{ $labels.job }}": instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour.',
  47. },
  48. },
  49. {
  50. alert: 'EtcdHighNumberOfFailedGRPCRequests',
  51. expr: |||
  52. 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
  53. /
  54. sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) BY (job, instance, grpc_service, grpc_method)
  55. > 1
  56. ||| % $._config,
  57. 'for': '10m',
  58. labels: {
  59. severity: 'warning',
  60. },
  61. annotations: {
  62. message: 'Etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
  63. },
  64. },
  65. {
  66. alert: 'EtcdHighNumberOfFailedGRPCRequests',
  67. expr: |||
  68. 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
  69. /
  70. sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) BY (job, instance, grpc_service, grpc_method)
  71. > 5
  72. ||| % $._config,
  73. 'for': '5m',
  74. labels: {
  75. severity: 'critical',
  76. },
  77. annotations: {
  78. message: 'Etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
  79. },
  80. },
  81. {
  82. alert: 'EtcdGRPCRequestsSlow',
  83. expr: |||
  84. histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{%(etcd_selector)s, grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
  85. > 0.15
  86. ||| % $._config,
  87. 'for': '10m',
  88. labels: {
  89. severity: 'critical',
  90. },
  91. annotations: {
  92. message: 'Etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
  93. },
  94. },
  95. {
  96. alert: 'EtcdHighNumberOfFailedHTTPRequests',
  97. expr: |||
  98. 100 * sum(rate(etcd_http_failed_total{%(etcd_selector)s}[5m])) BY (job, instance, method)
  99. /
  100. sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m])) BY (job, instance, method)
  101. > 1
  102. ||| % $._config,
  103. 'for': '10m',
  104. labels: {
  105. severity: 'warning',
  106. },
  107. annotations: {
  108. message: 'Etcd cluster "{{ $labels.job }}": {{ $value }}%% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.',
  109. },
  110. },
  111. {
  112. alert: 'EtcdHighNumberOfFailedHTTPRequests',
  113. expr: |||
  114. 100 * sum(rate(etcd_http_failed_total{%(etcd_selector)s}[5m])) BY (job, instance, method)
  115. /
  116. sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m])) BY (job, instance, method)
  117. > 5
  118. ||| % $._config,
  119. 'for': '5m',
  120. labels: {
  121. severity: 'critical',
  122. },
  123. annotations: {
  124. message: 'Etcd cluster "{{ $labels.job }}": {{ $value }}%% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.',
  125. },
  126. },
  127. {
  128. alert: 'EtcdHTTPRequestsSlow',
  129. expr: |||
  130. histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  131. > 0.15
  132. ||| % $._config,
  133. 'for': '10m',
  134. labels: {
  135. severity: 'warning',
  136. },
  137. annotations: {
  138. message: 'Etcd cluster "{{ $labels.job }}": HTTP requests to {{ $labels.method }} are taking {{ $value }} on etcd instance {{ $labels.instance }}.',
  139. },
  140. },
  141. {
  142. alert: 'EtcdMemberCommunicationSlow',
  143. expr: |||
  144. histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{%(etcd_selector)s}[5m]))
  145. > 0.15
  146. ||| % $._config,
  147. 'for': '10m',
  148. labels: {
  149. severity: 'warning',
  150. },
  151. annotations: {
  152. message: 'Etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
  153. },
  154. },
  155. {
  156. alert: 'EtcdHighNumberOfFailedProposals',
  157. expr: |||
  158. rate(etcd_server_proposals_failed_total{%(etcd_selector)s}[15m]) > 5
  159. ||| % $._config,
  160. 'for': '15m',
  161. labels: {
  162. severity: 'warning',
  163. },
  164. annotations: {
  165. message: 'Etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last hour on etcd instance {{ $labels.instance }}.',
  166. },
  167. },
  168. {
  169. alert: 'EtcdHighFsyncDurations',
  170. expr: |||
  171. histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  172. > 0.5
  173. ||| % $._config,
  174. 'for': '10m',
  175. labels: {
  176. severity: 'warning',
  177. },
  178. annotations: {
  179. message: 'Etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
  180. },
  181. },
  182. {
  183. alert: 'EtcdHighCommitDurations',
  184. expr: |||
  185. histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{%(etcd_selector)s}[5m]))
  186. > 0.25
  187. ||| % $._config,
  188. 'for': '10m',
  189. labels: {
  190. severity: 'warning',
  191. },
  192. annotations: {
  193. message: 'Etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.',
  194. },
  195. },
  196. {
  197. record: 'instance:fd_utilization',
  198. expr: 'process_open_fds / process_max_fds',
  199. },
  200. {
  201. alert: 'FdExhaustionClose',
  202. expr: |||
  203. predict_linear(instance:fd_utilization{%(etcd_selector)s}[1h], 3600 * 4) > 1
  204. ||| % $._config,
  205. 'for': '10m',
  206. labels: {
  207. severity: 'warning',
  208. },
  209. annotations: {
  210. message: '{{ $labels.job }} instance {{ $labels.instance }} will exhaust its file descriptors soon',
  211. },
  212. },
  213. {
  214. alert: 'FdExhaustionClose',
  215. expr: |||
  216. predict_linear(instance:fd_utilization{%(etcd_selector)s}[10m], 3600) > 1
  217. ||| % $._config,
  218. 'for': '10m',
  219. labels: {
  220. severity: 'critical',
  221. },
  222. annotations: {
  223. description: '{{ $labels.job }} instance {{ $labels.instance }} will exhaust its file descriptors soon',
  224. },
  225. },
  226. ],
  227. },
  228. ],
  229. },
  230. grafanaDashboards+:: {
  231. 'etcd.json': {
  232. id: 6,
  233. title: 'etcd',
  234. description: 'etcd sample Grafana dashboard with Prometheus',
  235. tags: [],
  236. style: 'dark',
  237. timezone: 'browser',
  238. editable: true,
  239. hideControls: false,
  240. sharedCrosshair: false,
  241. rows: [
  242. {
  243. collapse: false,
  244. editable: true,
  245. height: '250px',
  246. panels: [
  247. {
  248. cacheTimeout: null,
  249. colorBackground: false,
  250. colorValue: false,
  251. colors: [
  252. 'rgba(245, 54, 54, 0.9)',
  253. 'rgba(237, 129, 40, 0.89)',
  254. 'rgba(50, 172, 45, 0.97)',
  255. ],
  256. datasource: '$datasource',
  257. editable: true,
  258. 'error': false,
  259. format: 'none',
  260. gauge: {
  261. maxValue: 100,
  262. minValue: 0,
  263. show: false,
  264. thresholdLabels: false,
  265. thresholdMarkers: true,
  266. },
  267. id: 28,
  268. interval: null,
  269. isNew: true,
  270. links: [],
  271. mappingType: 1,
  272. mappingTypes: [
  273. {
  274. name: 'value to text',
  275. value: 1,
  276. },
  277. {
  278. name: 'range to text',
  279. value: 2,
  280. },
  281. ],
  282. maxDataPoints: 100,
  283. nullPointMode: 'connected',
  284. nullText: null,
  285. postfix: '',
  286. postfixFontSize: '50%',
  287. prefix: '',
  288. prefixFontSize: '50%',
  289. rangeMaps: [{
  290. from: 'null',
  291. text: 'N/A',
  292. to: 'null',
  293. }],
  294. span: 3,
  295. sparkline: {
  296. fillColor: 'rgba(31, 118, 189, 0.18)',
  297. full: false,
  298. lineColor: 'rgb(31, 120, 193)',
  299. show: false,
  300. },
  301. targets: [{
  302. expr: 'sum(etcd_server_has_leader{job="$cluster"})',
  303. intervalFactor: 2,
  304. legendFormat: '',
  305. metric: 'etcd_server_has_leader',
  306. refId: 'A',
  307. step: 20,
  308. }],
  309. thresholds: '',
  310. title: 'Up',
  311. type: 'singlestat',
  312. valueFontSize: '200%',
  313. valueMaps: [{
  314. op: '=',
  315. text: 'N/A',
  316. value: 'null',
  317. }],
  318. valueName: 'avg',
  319. },
  320. {
  321. aliasColors: {},
  322. bars: false,
  323. datasource: '$datasource',
  324. editable: true,
  325. 'error': false,
  326. fill: 0,
  327. id: 23,
  328. isNew: true,
  329. legend: {
  330. avg: false,
  331. current: false,
  332. max: false,
  333. min: false,
  334. show: false,
  335. total: false,
  336. values: false,
  337. },
  338. lines: true,
  339. linewidth: 2,
  340. links: [],
  341. nullPointMode: 'connected',
  342. percentage: false,
  343. pointradius: 5,
  344. points: false,
  345. renderer: 'flot',
  346. seriesOverrides: [],
  347. span: 5,
  348. stack: false,
  349. steppedLine: false,
  350. targets: [
  351. {
  352. expr: 'sum(rate(grpc_server_started_total{job="$cluster",grpc_type="unary"}[5m]))',
  353. format: 'time_series',
  354. intervalFactor: 2,
  355. legendFormat: 'RPC Rate',
  356. metric: 'grpc_server_started_total',
  357. refId: 'A',
  358. step: 2,
  359. },
  360. {
  361. expr: 'sum(rate(grpc_server_handled_total{job="$cluster",grpc_type="unary",grpc_code!="OK"}[5m]))',
  362. format: 'time_series',
  363. intervalFactor: 2,
  364. legendFormat: 'RPC Failed Rate',
  365. metric: 'grpc_server_handled_total',
  366. refId: 'B',
  367. step: 2,
  368. },
  369. ],
  370. thresholds: [],
  371. timeFrom: null,
  372. timeShift: null,
  373. title: 'RPC Rate',
  374. tooltip: {
  375. msResolution: false,
  376. shared: true,
  377. sort: 0,
  378. value_type: 'individual',
  379. },
  380. type: 'graph',
  381. xaxis: {
  382. mode: 'time',
  383. name: null,
  384. show: true,
  385. values: [],
  386. },
  387. yaxes: [
  388. {
  389. format: 'ops',
  390. label: null,
  391. logBase: 1,
  392. max: null,
  393. min: null,
  394. show: true,
  395. },
  396. {
  397. format: 'short',
  398. label: null,
  399. logBase: 1,
  400. max: null,
  401. min: null,
  402. show: true,
  403. },
  404. ],
  405. },
  406. {
  407. aliasColors: {},
  408. bars: false,
  409. datasource: '$datasource',
  410. editable: true,
  411. 'error': false,
  412. fill: 0,
  413. id: 41,
  414. isNew: true,
  415. legend: {
  416. avg: false,
  417. current: false,
  418. max: false,
  419. min: false,
  420. show: false,
  421. total: false,
  422. values: false,
  423. },
  424. lines: true,
  425. linewidth: 2,
  426. links: [],
  427. nullPointMode: 'connected',
  428. percentage: false,
  429. pointradius: 5,
  430. points: false,
  431. renderer: 'flot',
  432. seriesOverrides: [],
  433. span: 4,
  434. stack: true,
  435. steppedLine: false,
  436. targets: [
  437. {
  438. expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})',
  439. intervalFactor: 2,
  440. legendFormat: 'Watch Streams',
  441. metric: 'grpc_server_handled_total',
  442. refId: 'A',
  443. step: 4,
  444. },
  445. {
  446. expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})',
  447. intervalFactor: 2,
  448. legendFormat: 'Lease Streams',
  449. metric: 'grpc_server_handled_total',
  450. refId: 'B',
  451. step: 4,
  452. },
  453. ],
  454. thresholds: [],
  455. timeFrom: null,
  456. timeShift: null,
  457. title: 'Active Streams',
  458. tooltip: {
  459. msResolution: false,
  460. shared: true,
  461. sort: 0,
  462. value_type: 'individual',
  463. },
  464. type: 'graph',
  465. xaxis: {
  466. mode: 'time',
  467. name: null,
  468. show: true,
  469. values: [],
  470. },
  471. yaxes: [
  472. {
  473. format: 'short',
  474. label: '',
  475. logBase: 1,
  476. max: null,
  477. min: null,
  478. show: true,
  479. },
  480. {
  481. format: 'short',
  482. label: null,
  483. logBase: 1,
  484. max: null,
  485. min: null,
  486. show: true,
  487. },
  488. ],
  489. },
  490. ],
  491. showTitle: false,
  492. title: 'Row',
  493. },
  494. {
  495. collapse: false,
  496. editable: true,
  497. height: '250px',
  498. panels: [
  499. {
  500. aliasColors: {},
  501. bars: false,
  502. datasource: '$datasource',
  503. decimals: null,
  504. editable: true,
  505. 'error': false,
  506. fill: 0,
  507. grid: {},
  508. id: 1,
  509. legend: {
  510. avg: false,
  511. current: false,
  512. max: false,
  513. min: false,
  514. show: false,
  515. total: false,
  516. values: false,
  517. },
  518. lines: true,
  519. linewidth: 2,
  520. links: [],
  521. nullPointMode: 'connected',
  522. percentage: false,
  523. pointradius: 5,
  524. points: false,
  525. renderer: 'flot',
  526. seriesOverrides: [],
  527. span: 4,
  528. stack: false,
  529. steppedLine: false,
  530. targets: [{
  531. expr: 'etcd_debugging_mvcc_db_total_size_in_bytes{job="$cluster"}',
  532. hide: false,
  533. interval: '',
  534. intervalFactor: 2,
  535. legendFormat: '{{instance}} DB Size',
  536. metric: '',
  537. refId: 'A',
  538. step: 4,
  539. }],
  540. thresholds: [],
  541. timeFrom: null,
  542. timeShift: null,
  543. title: 'DB Size',
  544. tooltip: {
  545. msResolution: false,
  546. shared: true,
  547. sort: 0,
  548. value_type: 'cumulative',
  549. },
  550. type: 'graph',
  551. xaxis: {
  552. mode: 'time',
  553. name: null,
  554. show: true,
  555. values: [],
  556. },
  557. yaxes: [
  558. {
  559. format: 'bytes',
  560. logBase: 1,
  561. max: null,
  562. min: null,
  563. show: true,
  564. },
  565. {
  566. format: 'short',
  567. logBase: 1,
  568. max: null,
  569. min: null,
  570. show: false,
  571. },
  572. ],
  573. },
  574. {
  575. aliasColors: {},
  576. bars: false,
  577. datasource: '$datasource',
  578. editable: true,
  579. 'error': false,
  580. fill: 0,
  581. grid: {},
  582. id: 3,
  583. legend: {
  584. avg: false,
  585. current: false,
  586. max: false,
  587. min: false,
  588. show: false,
  589. total: false,
  590. values: false,
  591. },
  592. lines: true,
  593. linewidth: 2,
  594. links: [],
  595. nullPointMode: 'connected',
  596. percentage: false,
  597. pointradius: 1,
  598. points: false,
  599. renderer: 'flot',
  600. seriesOverrides: [],
  601. span: 4,
  602. stack: false,
  603. steppedLine: true,
  604. targets: [
  605. {
  606. expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
  607. hide: false,
  608. intervalFactor: 2,
  609. legendFormat: '{{instance}} WAL fsync',
  610. metric: 'etcd_disk_wal_fsync_duration_seconds_bucket',
  611. refId: 'A',
  612. step: 4,
  613. },
  614. {
  615. expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
  616. intervalFactor: 2,
  617. legendFormat: '{{instance}} DB fsync',
  618. metric: 'etcd_disk_backend_commit_duration_seconds_bucket',
  619. refId: 'B',
  620. step: 4,
  621. },
  622. ],
  623. thresholds: [],
  624. timeFrom: null,
  625. timeShift: null,
  626. title: 'Disk Sync Duration',
  627. tooltip: {
  628. msResolution: false,
  629. shared: true,
  630. sort: 0,
  631. value_type: 'cumulative',
  632. },
  633. type: 'graph',
  634. xaxis: {
  635. mode: 'time',
  636. name: null,
  637. show: true,
  638. values: [],
  639. },
  640. yaxes: [
  641. {
  642. format: 's',
  643. logBase: 1,
  644. max: null,
  645. min: null,
  646. show: true,
  647. },
  648. {
  649. format: 'short',
  650. logBase: 1,
  651. max: null,
  652. min: null,
  653. show: false,
  654. },
  655. ],
  656. },
  657. {
  658. aliasColors: {},
  659. bars: false,
  660. datasource: '$datasource',
  661. editable: true,
  662. 'error': false,
  663. fill: 0,
  664. id: 29,
  665. isNew: true,
  666. legend: {
  667. avg: false,
  668. current: false,
  669. max: false,
  670. min: false,
  671. show: false,
  672. total: false,
  673. values: false,
  674. },
  675. lines: true,
  676. linewidth: 2,
  677. links: [],
  678. nullPointMode: 'connected',
  679. percentage: false,
  680. pointradius: 5,
  681. points: false,
  682. renderer: 'flot',
  683. seriesOverrides: [],
  684. span: 4,
  685. stack: false,
  686. steppedLine: false,
  687. targets: [{
  688. expr: 'process_resident_memory_bytes{job="$cluster"}',
  689. intervalFactor: 2,
  690. legendFormat: '{{instance}} Resident Memory',
  691. metric: 'process_resident_memory_bytes',
  692. refId: 'A',
  693. step: 4,
  694. }],
  695. thresholds: [],
  696. timeFrom: null,
  697. timeShift: null,
  698. title: 'Memory',
  699. tooltip: {
  700. msResolution: false,
  701. shared: true,
  702. sort: 0,
  703. value_type: 'individual',
  704. },
  705. type: 'graph',
  706. xaxis: {
  707. mode: 'time',
  708. name: null,
  709. show: true,
  710. values: [],
  711. },
  712. yaxes: [
  713. {
  714. format: 'bytes',
  715. label: null,
  716. logBase: 1,
  717. max: null,
  718. min: null,
  719. show: true,
  720. },
  721. {
  722. format: 'short',
  723. label: null,
  724. logBase: 1,
  725. max: null,
  726. min: null,
  727. show: true,
  728. },
  729. ],
  730. },
  731. ],
  732. title: 'New row',
  733. },
  734. {
  735. collapse: false,
  736. editable: true,
  737. height: '250px',
  738. panels: [
  739. {
  740. aliasColors: {},
  741. bars: false,
  742. datasource: '$datasource',
  743. editable: true,
  744. 'error': false,
  745. fill: 5,
  746. id: 22,
  747. isNew: true,
  748. legend: {
  749. avg: false,
  750. current: false,
  751. max: false,
  752. min: false,
  753. show: false,
  754. total: false,
  755. values: false,
  756. },
  757. lines: true,
  758. linewidth: 2,
  759. links: [],
  760. nullPointMode: 'connected',
  761. percentage: false,
  762. pointradius: 5,
  763. points: false,
  764. renderer: 'flot',
  765. seriesOverrides: [],
  766. span: 3,
  767. stack: true,
  768. steppedLine: false,
  769. targets: [{
  770. expr: 'rate(etcd_network_client_grpc_received_bytes_total{job="$cluster"}[5m])',
  771. intervalFactor: 2,
  772. legendFormat: '{{instance}} Client Traffic In',
  773. metric: 'etcd_network_client_grpc_received_bytes_total',
  774. refId: 'A',
  775. step: 4,
  776. }],
  777. thresholds: [],
  778. timeFrom: null,
  779. timeShift: null,
  780. title: 'Client Traffic In',
  781. tooltip: {
  782. msResolution: false,
  783. shared: true,
  784. sort: 0,
  785. value_type: 'individual',
  786. },
  787. type: 'graph',
  788. xaxis: {
  789. mode: 'time',
  790. name: null,
  791. show: true,
  792. values: [],
  793. },
  794. yaxes: [
  795. {
  796. format: 'Bps',
  797. label: null,
  798. logBase: 1,
  799. max: null,
  800. min: null,
  801. show: true,
  802. },
  803. {
  804. format: 'short',
  805. label: null,
  806. logBase: 1,
  807. max: null,
  808. min: null,
  809. show: true,
  810. },
  811. ],
  812. },
  813. {
  814. aliasColors: {},
  815. bars: false,
  816. datasource: '$datasource',
  817. editable: true,
  818. 'error': false,
  819. fill: 5,
  820. id: 21,
  821. isNew: true,
  822. legend: {
  823. avg: false,
  824. current: false,
  825. max: false,
  826. min: false,
  827. show: false,
  828. total: false,
  829. values: false,
  830. },
  831. lines: true,
  832. linewidth: 2,
  833. links: [],
  834. nullPointMode: 'connected',
  835. percentage: false,
  836. pointradius: 5,
  837. points: false,
  838. renderer: 'flot',
  839. seriesOverrides: [],
  840. span: 3,
  841. stack: true,
  842. steppedLine: false,
  843. targets: [{
  844. expr: 'rate(etcd_network_client_grpc_sent_bytes_total{job="$cluster"}[5m])',
  845. intervalFactor: 2,
  846. legendFormat: '{{instance}} Client Traffic Out',
  847. metric: 'etcd_network_client_grpc_sent_bytes_total',
  848. refId: 'A',
  849. step: 4,
  850. }],
  851. thresholds: [],
  852. timeFrom: null,
  853. timeShift: null,
  854. title: 'Client Traffic Out',
  855. tooltip: {
  856. msResolution: false,
  857. shared: true,
  858. sort: 0,
  859. value_type: 'individual',
  860. },
  861. type: 'graph',
  862. xaxis: {
  863. mode: 'time',
  864. name: null,
  865. show: true,
  866. values: [],
  867. },
  868. yaxes: [
  869. {
  870. format: 'Bps',
  871. label: null,
  872. logBase: 1,
  873. max: null,
  874. min: null,
  875. show: true,
  876. },
  877. {
  878. format: 'short',
  879. label: null,
  880. logBase: 1,
  881. max: null,
  882. min: null,
  883. show: true,
  884. },
  885. ],
  886. },
  887. {
  888. aliasColors: {},
  889. bars: false,
  890. datasource: '$datasource',
  891. editable: true,
  892. 'error': false,
  893. fill: 0,
  894. id: 20,
  895. isNew: true,
  896. legend: {
  897. avg: false,
  898. current: false,
  899. max: false,
  900. min: false,
  901. show: false,
  902. total: false,
  903. values: false,
  904. },
  905. lines: true,
  906. linewidth: 2,
  907. links: [],
  908. nullPointMode: 'connected',
  909. percentage: false,
  910. pointradius: 5,
  911. points: false,
  912. renderer: 'flot',
  913. seriesOverrides: [],
  914. span: 3,
  915. stack: false,
  916. steppedLine: false,
  917. targets: [{
  918. expr: 'sum(rate(etcd_network_peer_received_bytes_total{job="$cluster"}[5m])) by (instance)',
  919. intervalFactor: 2,
  920. legendFormat: '{{instance}} Peer Traffic In',
  921. metric: 'etcd_network_peer_received_bytes_total',
  922. refId: 'A',
  923. step: 4,
  924. }],
  925. thresholds: [],
  926. timeFrom: null,
  927. timeShift: null,
  928. title: 'Peer Traffic In',
  929. tooltip: {
  930. msResolution: false,
  931. shared: true,
  932. sort: 0,
  933. value_type: 'individual',
  934. },
  935. type: 'graph',
  936. xaxis: {
  937. mode: 'time',
  938. name: null,
  939. show: true,
  940. values: [],
  941. },
  942. yaxes: [
  943. {
  944. format: 'Bps',
  945. label: null,
  946. logBase: 1,
  947. max: null,
  948. min: null,
  949. show: true,
  950. },
  951. {
  952. format: 'short',
  953. label: null,
  954. logBase: 1,
  955. max: null,
  956. min: null,
  957. show: true,
  958. },
  959. ],
  960. },
  961. {
  962. aliasColors: {},
  963. bars: false,
  964. datasource: '$datasource',
  965. decimals: null,
  966. editable: true,
  967. 'error': false,
  968. fill: 0,
  969. grid: {},
  970. id: 16,
  971. legend: {
  972. avg: false,
  973. current: false,
  974. max: false,
  975. min: false,
  976. show: false,
  977. total: false,
  978. values: false,
  979. },
  980. lines: true,
  981. linewidth: 2,
  982. links: [],
  983. nullPointMode: 'connected',
  984. percentage: false,
  985. pointradius: 5,
  986. points: false,
  987. renderer: 'flot',
  988. seriesOverrides: [],
  989. span: 3,
  990. stack: false,
  991. steppedLine: false,
  992. targets: [{
  993. expr: 'sum(rate(etcd_network_peer_sent_bytes_total{job="$cluster"}[5m])) by (instance)',
  994. hide: false,
  995. interval: '',
  996. intervalFactor: 2,
  997. legendFormat: '{{instance}} Peer Traffic Out',
  998. metric: 'etcd_network_peer_sent_bytes_total',
  999. refId: 'A',
  1000. step: 4,
  1001. }],
  1002. thresholds: [],
  1003. timeFrom: null,
  1004. timeShift: null,
  1005. title: 'Peer Traffic Out',
  1006. tooltip: {
  1007. msResolution: false,
  1008. shared: true,
  1009. sort: 0,
  1010. value_type: 'cumulative',
  1011. },
  1012. type: 'graph',
  1013. xaxis: {
  1014. mode: 'time',
  1015. name: null,
  1016. show: true,
  1017. values: [],
  1018. },
  1019. yaxes: [
  1020. {
  1021. format: 'Bps',
  1022. logBase: 1,
  1023. max: null,
  1024. min: null,
  1025. show: true,
  1026. },
  1027. {
  1028. format: 'short',
  1029. logBase: 1,
  1030. max: null,
  1031. min: null,
  1032. show: true,
  1033. },
  1034. ],
  1035. },
  1036. ],
  1037. title: 'New row',
  1038. },
  1039. {
  1040. collapse: false,
  1041. editable: true,
  1042. height: '250px',
  1043. panels: [
  1044. {
  1045. aliasColors: {},
  1046. bars: false,
  1047. datasource: '$datasource',
  1048. editable: true,
  1049. 'error': false,
  1050. fill: 0,
  1051. id: 40,
  1052. isNew: true,
  1053. legend: {
  1054. avg: false,
  1055. current: false,
  1056. max: false,
  1057. min: false,
  1058. show: false,
  1059. total: false,
  1060. values: false,
  1061. },
  1062. lines: true,
  1063. linewidth: 2,
  1064. links: [],
  1065. nullPointMode: 'connected',
  1066. percentage: false,
  1067. pointradius: 5,
  1068. points: false,
  1069. renderer: 'flot',
  1070. seriesOverrides: [],
  1071. span: 6,
  1072. stack: false,
  1073. steppedLine: false,
  1074. targets: [
  1075. {
  1076. expr: 'sum(rate(etcd_server_proposals_failed_total{job="$cluster"}[5m]))',
  1077. intervalFactor: 2,
  1078. legendFormat: 'Proposal Failure Rate',
  1079. metric: 'etcd_server_proposals_failed_total',
  1080. refId: 'A',
  1081. step: 2,
  1082. },
  1083. {
  1084. expr: 'sum(etcd_server_proposals_pending{job="$cluster"})',
  1085. intervalFactor: 2,
  1086. legendFormat: 'Proposal Pending Total',
  1087. metric: 'etcd_server_proposals_pending',
  1088. refId: 'B',
  1089. step: 2,
  1090. },
  1091. {
  1092. expr: 'sum(rate(etcd_server_proposals_committed_total{job="$cluster"}[5m]))',
  1093. intervalFactor: 2,
  1094. legendFormat: 'Proposal Commit Rate',
  1095. metric: 'etcd_server_proposals_committed_total',
  1096. refId: 'C',
  1097. step: 2,
  1098. },
  1099. {
  1100. expr: 'sum(rate(etcd_server_proposals_applied_total{job="$cluster"}[5m]))',
  1101. intervalFactor: 2,
  1102. legendFormat: 'Proposal Apply Rate',
  1103. refId: 'D',
  1104. step: 2,
  1105. },
  1106. ],
  1107. thresholds: [],
  1108. timeFrom: null,
  1109. timeShift: null,
  1110. title: 'Raft Proposals',
  1111. tooltip: {
  1112. msResolution: false,
  1113. shared: true,
  1114. sort: 0,
  1115. value_type: 'individual',
  1116. },
  1117. type: 'graph',
  1118. xaxis: {
  1119. mode: 'time',
  1120. name: null,
  1121. show: true,
  1122. values: [],
  1123. },
  1124. yaxes: [
  1125. {
  1126. format: 'short',
  1127. label: '',
  1128. logBase: 1,
  1129. max: null,
  1130. min: null,
  1131. show: true,
  1132. },
  1133. {
  1134. format: 'short',
  1135. label: null,
  1136. logBase: 1,
  1137. max: null,
  1138. min: null,
  1139. show: true,
  1140. },
  1141. ],
  1142. },
  1143. {
  1144. aliasColors: {},
  1145. bars: false,
  1146. datasource: '$datasource',
  1147. decimals: 0,
  1148. editable: true,
  1149. 'error': false,
  1150. fill: 0,
  1151. id: 19,
  1152. isNew: true,
  1153. legend: {
  1154. alignAsTable: false,
  1155. avg: false,
  1156. current: false,
  1157. max: false,
  1158. min: false,
  1159. rightSide: false,
  1160. show: false,
  1161. total: false,
  1162. values: false,
  1163. },
  1164. lines: true,
  1165. linewidth: 2,
  1166. links: [],
  1167. nullPointMode: 'connected',
  1168. percentage: false,
  1169. pointradius: 5,
  1170. points: false,
  1171. renderer: 'flot',
  1172. seriesOverrides: [],
  1173. span: 6,
  1174. stack: false,
  1175. steppedLine: false,
  1176. targets: [{
  1177. expr: 'changes(etcd_server_leader_changes_seen_total{job="$cluster"}[1d])',
  1178. intervalFactor: 2,
  1179. legendFormat: '{{instance}} Total Leader Elections Per Day',
  1180. metric: 'etcd_server_leader_changes_seen_total',
  1181. refId: 'A',
  1182. step: 2,
  1183. }],
  1184. thresholds: [],
  1185. timeFrom: null,
  1186. timeShift: null,
  1187. title: 'Total Leader Elections Per Day',
  1188. tooltip: {
  1189. msResolution: false,
  1190. shared: true,
  1191. sort: 0,
  1192. value_type: 'individual',
  1193. },
  1194. type: 'graph',
  1195. xaxis: {
  1196. mode: 'time',
  1197. name: null,
  1198. show: true,
  1199. values: [],
  1200. },
  1201. yaxes: [
  1202. {
  1203. format: 'short',
  1204. label: null,
  1205. logBase: 1,
  1206. max: null,
  1207. min: null,
  1208. show: true,
  1209. },
  1210. {
  1211. format: 'short',
  1212. label: null,
  1213. logBase: 1,
  1214. max: null,
  1215. min: null,
  1216. show: true,
  1217. },
  1218. ],
  1219. },
  1220. ],
  1221. title: 'New row',
  1222. },
  1223. ],
  1224. time: {
  1225. from: 'now-15m',
  1226. to: 'now',
  1227. },
  1228. timepicker: {
  1229. now: true,
  1230. refresh_intervals: [
  1231. '5s',
  1232. '10s',
  1233. '30s',
  1234. '1m',
  1235. '5m',
  1236. '15m',
  1237. '30m',
  1238. '1h',
  1239. '2h',
  1240. '1d',
  1241. ],
  1242. time_options: [
  1243. '5m',
  1244. '15m',
  1245. '1h',
  1246. '6h',
  1247. '12h',
  1248. '24h',
  1249. '2d',
  1250. '7d',
  1251. '30d',
  1252. ],
  1253. },
  1254. templating: {
  1255. list: [
  1256. {
  1257. current: {
  1258. text: 'Prometheus',
  1259. value: 'Prometheus',
  1260. },
  1261. hide: 0,
  1262. label: null,
  1263. name: 'datasource',
  1264. options: [],
  1265. query: 'prometheus',
  1266. refresh: 1,
  1267. regex: '',
  1268. type: 'datasource',
  1269. },
  1270. {
  1271. allValue: null,
  1272. current: {
  1273. text: 'prod',
  1274. value: 'prod',
  1275. },
  1276. datasource: '$datasource',
  1277. hide: 0,
  1278. includeAll: false,
  1279. label: 'cluster',
  1280. multi: false,
  1281. name: 'cluster',
  1282. options: [],
  1283. query: 'label_values(etcd_server_has_leader, job)',
  1284. refresh: 1,
  1285. regex: '',
  1286. sort: 2,
  1287. tagValuesQuery: '',
  1288. tags: [],
  1289. tagsQuery: '',
  1290. type: 'query',
  1291. useTags: false,
  1292. },
  1293. ],
  1294. },
  1295. annotations: {
  1296. list: [],
  1297. },
  1298. refresh: false,
  1299. schemaVersion: 13,
  1300. version: 215,
  1301. links: [],
  1302. gnetId: null,
  1303. },
  1304. },
  1305. }