metrics.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. // Copyright 2018 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package main
  15. import (
  16. "fmt"
  17. "io/ioutil"
  18. "net/http"
  19. "sort"
  20. "strings"
  21. "time"
  22. "go.etcd.io/etcd/pkg/transport"
  23. "go.uber.org/zap"
  24. )
  25. func fetchMetrics(ep string) (lines []string, err error) {
  26. tr, err := transport.NewTimeoutTransport(transport.TLSInfo{}, time.Second, time.Second, time.Second)
  27. if err != nil {
  28. return nil, err
  29. }
  30. cli := &http.Client{Transport: tr}
  31. resp, err := cli.Get(ep)
  32. if err != nil {
  33. return nil, err
  34. }
  35. defer resp.Body.Close()
  36. b, rerr := ioutil.ReadAll(resp.Body)
  37. if rerr != nil {
  38. return nil, rerr
  39. }
  40. lines = strings.Split(string(b), "\n")
  41. return lines, nil
  42. }
  43. func getMetrics(ep string) (m metricSlice) {
  44. lines, err := fetchMetrics(ep)
  45. if err != nil {
  46. lg.Panic("failed to fetch metrics", zap.Error(err))
  47. }
  48. mss := parse(lines)
  49. sort.Sort(metricSlice(mss))
  50. return mss
  51. }
  52. func (mss metricSlice) String() (s string) {
  53. ver := "unknown"
  54. for i, v := range mss {
  55. if strings.HasPrefix(v.name, "etcd_server_version") {
  56. ver = v.metrics[0]
  57. }
  58. s += v.String()
  59. if i != len(mss)-1 {
  60. s += "\n\n"
  61. }
  62. }
  63. return "# server version: " + ver + "\n\n" + s
  64. }
  65. type metricSlice []metric
  66. func (mss metricSlice) Len() int {
  67. return len(mss)
  68. }
  69. func (mss metricSlice) Less(i, j int) bool {
  70. return mss[i].name < mss[j].name
  71. }
  72. func (mss metricSlice) Swap(i, j int) {
  73. mss[i], mss[j] = mss[j], mss[i]
  74. }
  75. type metric struct {
  76. // raw data for debugging purposes
  77. raw []string
  78. // metrics name
  79. name string
  80. // metrics description
  81. desc string
  82. // metrics type
  83. tp string
  84. // aggregates of "grpc_server_handled_total"
  85. grpcCodes []string
  86. // keep fist 1 and last 4 if histogram or summary
  87. // otherwise, keep only 1
  88. metrics []string
  89. }
  90. func (m metric) String() (s string) {
  91. s += fmt.Sprintf("# name: %q\n", m.name)
  92. s += fmt.Sprintf("# description: %q\n", m.desc)
  93. s += fmt.Sprintf("# type: %q\n", m.tp)
  94. if len(m.grpcCodes) > 0 {
  95. s += "# gRPC codes: \n"
  96. for _, c := range m.grpcCodes {
  97. s += fmt.Sprintf("# - %q\n", c)
  98. }
  99. }
  100. s += strings.Join(m.metrics, "\n")
  101. return s
  102. }
  103. func parse(lines []string) (mss []metric) {
  104. m := metric{raw: make([]string, 0), metrics: make([]string, 0)}
  105. for _, line := range lines {
  106. if strings.HasPrefix(line, "# HELP ") {
  107. // add previous metric and initialize
  108. if m.name != "" {
  109. mss = append(mss, m)
  110. }
  111. m = metric{raw: make([]string, 0), metrics: make([]string, 0)}
  112. m.raw = append(m.raw, line)
  113. ss := strings.Split(strings.Replace(line, "# HELP ", "", 1), " ")
  114. m.name, m.desc = ss[0], strings.Join(ss[1:], " ")
  115. continue
  116. }
  117. if strings.HasPrefix(line, "# TYPE ") {
  118. m.raw = append(m.raw, line)
  119. m.tp = strings.Split(strings.Replace(line, "# TYPE "+m.tp, "", 1), " ")[1]
  120. continue
  121. }
  122. m.raw = append(m.raw, line)
  123. m.metrics = append(m.metrics, strings.Split(line, " ")[0])
  124. }
  125. if m.name != "" {
  126. mss = append(mss, m)
  127. }
  128. // aggregate
  129. for i := range mss {
  130. /*
  131. munge data for:
  132. etcd_network_active_peers{Local="c6c9b5143b47d146",Remote="fbdddd08d7e1608b"}
  133. etcd_network_peer_sent_bytes_total{To="c6c9b5143b47d146"}
  134. etcd_network_peer_received_bytes_total{From="0"}
  135. etcd_network_peer_received_bytes_total{From="fd422379fda50e48"}
  136. etcd_network_peer_round_trip_time_seconds_bucket{To="91bc3c398fb3c146",le="0.0001"}
  137. etcd_network_peer_round_trip_time_seconds_bucket{To="fd422379fda50e48",le="0.8192"}
  138. etcd_network_peer_round_trip_time_seconds_bucket{To="fd422379fda50e48",le="+Inf"}
  139. etcd_network_peer_round_trip_time_seconds_sum{To="fd422379fda50e48"}
  140. etcd_network_peer_round_trip_time_seconds_count{To="fd422379fda50e48"}
  141. */
  142. if mss[i].name == "etcd_network_active_peers" {
  143. mss[i].metrics = []string{`etcd_network_active_peers{Local="LOCAL_NODE_ID",Remote="REMOTE_PEER_NODE_ID"}`}
  144. }
  145. if mss[i].name == "etcd_network_peer_sent_bytes_total" {
  146. mss[i].metrics = []string{`etcd_network_peer_sent_bytes_total{To="REMOTE_PEER_NODE_ID"}`}
  147. }
  148. if mss[i].name == "etcd_network_peer_received_bytes_total" {
  149. mss[i].metrics = []string{`etcd_network_peer_received_bytes_total{From="REMOTE_PEER_NODE_ID"}`}
  150. }
  151. if mss[i].tp == "histogram" || mss[i].tp == "summary" {
  152. if mss[i].name == "etcd_network_peer_round_trip_time_seconds" {
  153. for j := range mss[i].metrics {
  154. l := mss[i].metrics[j]
  155. if strings.Contains(l, `To="`) && strings.Contains(l, `le="`) {
  156. k1 := strings.Index(l, `To="`)
  157. k2 := strings.Index(l, `",le="`)
  158. mss[i].metrics[j] = l[:k1+4] + "REMOTE_PEER_NODE_ID" + l[k2:]
  159. }
  160. if strings.HasPrefix(l, "etcd_network_peer_round_trip_time_seconds_sum") {
  161. mss[i].metrics[j] = `etcd_network_peer_round_trip_time_seconds_sum{To="REMOTE_PEER_NODE_ID"}`
  162. }
  163. if strings.HasPrefix(l, "etcd_network_peer_round_trip_time_seconds_count") {
  164. mss[i].metrics[j] = `etcd_network_peer_round_trip_time_seconds_count{To="REMOTE_PEER_NODE_ID"}`
  165. }
  166. }
  167. mss[i].metrics = aggSort(mss[i].metrics)
  168. }
  169. }
  170. // aggregate gRPC RPC metrics
  171. if mss[i].name == "grpc_server_handled_total" {
  172. pfx := `grpc_server_handled_total{grpc_code="`
  173. codes, metrics := make(map[string]struct{}), make(map[string]struct{})
  174. for _, v := range mss[i].metrics {
  175. v2 := strings.Replace(v, pfx, "", 1)
  176. idx := strings.Index(v2, `",grpc_method="`)
  177. code := v2[:idx]
  178. v2 = v2[idx:]
  179. codes[code] = struct{}{}
  180. v2 = pfx + "CODE" + v2
  181. metrics[v2] = struct{}{}
  182. }
  183. mss[i].grpcCodes = sortMap(codes)
  184. mss[i].metrics = sortMap(metrics)
  185. }
  186. }
  187. return mss
  188. }