summary.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. // Copyright 2014 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package prometheus
  14. import (
  15. "fmt"
  16. "math"
  17. "sort"
  18. "sync"
  19. "time"
  20. "github.com/beorn7/perks/quantile"
  21. "github.com/golang/protobuf/proto"
  22. dto "github.com/prometheus/client_model/go"
  23. )
  24. // quantileLabel is used for the label that defines the quantile in a
  25. // summary.
  26. const quantileLabel = "quantile"
  27. // A Summary captures individual observations from an event or sample stream and
  28. // summarizes them in a manner similar to traditional summary statistics: 1. sum
  29. // of observations, 2. observation count, 3. rank estimations.
  30. //
  31. // A typical use-case is the observation of request latencies. By default, a
  32. // Summary provides the median, the 90th and the 99th percentile of the latency
  33. // as rank estimations. However, the default behavior will change in the
  34. // upcoming v0.10 of the library. There will be no rank estiamtions at all by
  35. // default. For a sane transition, it is recommended to set the desired rank
  36. // estimations explicitly.
  37. //
  38. // Note that the rank estimations cannot be aggregated in a meaningful way with
  39. // the Prometheus query language (i.e. you cannot average or add them). If you
  40. // need aggregatable quantiles (e.g. you want the 99th percentile latency of all
  41. // queries served across all instances of a service), consider the Histogram
  42. // metric type. See the Prometheus documentation for more details.
  43. //
  44. // To create Summary instances, use NewSummary.
  45. type Summary interface {
  46. Metric
  47. Collector
  48. // Observe adds a single observation to the summary.
  49. Observe(float64)
  50. }
  51. // DefObjectives are the default Summary quantile values.
  52. //
  53. // Deprecated: DefObjectives will not be used as the default objectives in
  54. // v0.10 of the library. The default Summary will have no quantiles then.
  55. var (
  56. DefObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}
  57. errQuantileLabelNotAllowed = fmt.Errorf(
  58. "%q is not allowed as label name in summaries", quantileLabel,
  59. )
  60. )
  61. // Default values for SummaryOpts.
  62. const (
  63. // DefMaxAge is the default duration for which observations stay
  64. // relevant.
  65. DefMaxAge time.Duration = 10 * time.Minute
  66. // DefAgeBuckets is the default number of buckets used to calculate the
  67. // age of observations.
  68. DefAgeBuckets = 5
  69. // DefBufCap is the standard buffer size for collecting Summary observations.
  70. DefBufCap = 500
  71. )
  72. // SummaryOpts bundles the options for creating a Summary metric. It is
  73. // mandatory to set Name and Help to a non-empty string. While all other fields
  74. // are optional and can safely be left at their zero value, it is recommended to
  75. // explicitly set the Objectives field to the desired value as the default value
  76. // will change in the upcoming v0.10 of the library.
  77. type SummaryOpts struct {
  78. // Namespace, Subsystem, and Name are components of the fully-qualified
  79. // name of the Summary (created by joining these components with
  80. // "_"). Only Name is mandatory, the others merely help structuring the
  81. // name. Note that the fully-qualified name of the Summary must be a
  82. // valid Prometheus metric name.
  83. Namespace string
  84. Subsystem string
  85. Name string
  86. // Help provides information about this Summary. Mandatory!
  87. //
  88. // Metrics with the same fully-qualified name must have the same Help
  89. // string.
  90. Help string
  91. // ConstLabels are used to attach fixed labels to this
  92. // Summary. Summaries with the same fully-qualified name must have the
  93. // same label names in their ConstLabels.
  94. //
  95. // Note that in most cases, labels have a value that varies during the
  96. // lifetime of a process. Those labels are usually managed with a
  97. // SummaryVec. ConstLabels serve only special purposes. One is for the
  98. // special case where the value of a label does not change during the
  99. // lifetime of a process, e.g. if the revision of the running binary is
  100. // put into a label. Another, more advanced purpose is if more than one
  101. // Collector needs to collect Summaries with the same fully-qualified
  102. // name. In that case, those Summaries must differ in the values of
  103. // their ConstLabels. See the Collector examples.
  104. //
  105. // If the value of a label never changes (not even between binaries),
  106. // that label most likely should not be a label at all (but part of the
  107. // metric name).
  108. ConstLabels Labels
  109. // Objectives defines the quantile rank estimates with their respective
  110. // absolute error. If Objectives[q] = e, then the value reported for q
  111. // will be the φ-quantile value for some φ between q-e and q+e. The
  112. // default value is DefObjectives. It is used if Objectives is left at
  113. // its zero value (i.e. nil). To create a Summary without Objectives,
  114. // set it to an empty map (i.e. map[float64]float64{}).
  115. //
  116. // Deprecated: Note that the current value of DefObjectives is
  117. // deprecated. It will be replaced by an empty map in v0.10 of the
  118. // library. Please explicitly set Objectives to the desired value.
  119. Objectives map[float64]float64
  120. // MaxAge defines the duration for which an observation stays relevant
  121. // for the summary. Must be positive. The default value is DefMaxAge.
  122. MaxAge time.Duration
  123. // AgeBuckets is the number of buckets used to exclude observations that
  124. // are older than MaxAge from the summary. A higher number has a
  125. // resource penalty, so only increase it if the higher resolution is
  126. // really required. For very high observation rates, you might want to
  127. // reduce the number of age buckets. With only one age bucket, you will
  128. // effectively see a complete reset of the summary each time MaxAge has
  129. // passed. The default value is DefAgeBuckets.
  130. AgeBuckets uint32
  131. // BufCap defines the default sample stream buffer size. The default
  132. // value of DefBufCap should suffice for most uses. If there is a need
  133. // to increase the value, a multiple of 500 is recommended (because that
  134. // is the internal buffer size of the underlying package
  135. // "github.com/bmizerany/perks/quantile").
  136. BufCap uint32
  137. }
  138. // Great fuck-up with the sliding-window decay algorithm... The Merge method of
  139. // perk/quantile is actually not working as advertised - and it might be
  140. // unfixable, as the underlying algorithm is apparently not capable of merging
  141. // summaries in the first place. To avoid using Merge, we are currently adding
  142. // observations to _each_ age bucket, i.e. the effort to add a sample is
  143. // essentially multiplied by the number of age buckets. When rotating age
  144. // buckets, we empty the previous head stream. On scrape time, we simply take
  145. // the quantiles from the head stream (no merging required). Result: More effort
  146. // on observation time, less effort on scrape time, which is exactly the
  147. // opposite of what we try to accomplish, but at least the results are correct.
  148. //
  149. // The quite elegant previous contraption to merge the age buckets efficiently
  150. // on scrape time (see code up commit 6b9530d72ea715f0ba612c0120e6e09fbf1d49d0)
  151. // can't be used anymore.
  152. // NewSummary creates a new Summary based on the provided SummaryOpts.
  153. func NewSummary(opts SummaryOpts) Summary {
  154. return newSummary(
  155. NewDesc(
  156. BuildFQName(opts.Namespace, opts.Subsystem, opts.Name),
  157. opts.Help,
  158. nil,
  159. opts.ConstLabels,
  160. ),
  161. opts,
  162. )
  163. }
  164. func newSummary(desc *Desc, opts SummaryOpts, labelValues ...string) Summary {
  165. if len(desc.variableLabels) != len(labelValues) {
  166. panic(errInconsistentCardinality)
  167. }
  168. for _, n := range desc.variableLabels {
  169. if n == quantileLabel {
  170. panic(errQuantileLabelNotAllowed)
  171. }
  172. }
  173. for _, lp := range desc.constLabelPairs {
  174. if lp.GetName() == quantileLabel {
  175. panic(errQuantileLabelNotAllowed)
  176. }
  177. }
  178. if opts.Objectives == nil {
  179. opts.Objectives = DefObjectives
  180. }
  181. if opts.MaxAge < 0 {
  182. panic(fmt.Errorf("illegal max age MaxAge=%v", opts.MaxAge))
  183. }
  184. if opts.MaxAge == 0 {
  185. opts.MaxAge = DefMaxAge
  186. }
  187. if opts.AgeBuckets == 0 {
  188. opts.AgeBuckets = DefAgeBuckets
  189. }
  190. if opts.BufCap == 0 {
  191. opts.BufCap = DefBufCap
  192. }
  193. s := &summary{
  194. desc: desc,
  195. objectives: opts.Objectives,
  196. sortedObjectives: make([]float64, 0, len(opts.Objectives)),
  197. labelPairs: makeLabelPairs(desc, labelValues),
  198. hotBuf: make([]float64, 0, opts.BufCap),
  199. coldBuf: make([]float64, 0, opts.BufCap),
  200. streamDuration: opts.MaxAge / time.Duration(opts.AgeBuckets),
  201. }
  202. s.headStreamExpTime = time.Now().Add(s.streamDuration)
  203. s.hotBufExpTime = s.headStreamExpTime
  204. for i := uint32(0); i < opts.AgeBuckets; i++ {
  205. s.streams = append(s.streams, s.newStream())
  206. }
  207. s.headStream = s.streams[0]
  208. for qu := range s.objectives {
  209. s.sortedObjectives = append(s.sortedObjectives, qu)
  210. }
  211. sort.Float64s(s.sortedObjectives)
  212. s.init(s) // Init self-collection.
  213. return s
  214. }
  215. type summary struct {
  216. selfCollector
  217. bufMtx sync.Mutex // Protects hotBuf and hotBufExpTime.
  218. mtx sync.Mutex // Protects every other moving part.
  219. // Lock bufMtx before mtx if both are needed.
  220. desc *Desc
  221. objectives map[float64]float64
  222. sortedObjectives []float64
  223. labelPairs []*dto.LabelPair
  224. sum float64
  225. cnt uint64
  226. hotBuf, coldBuf []float64
  227. streams []*quantile.Stream
  228. streamDuration time.Duration
  229. headStream *quantile.Stream
  230. headStreamIdx int
  231. headStreamExpTime, hotBufExpTime time.Time
  232. }
  233. func (s *summary) Desc() *Desc {
  234. return s.desc
  235. }
  236. func (s *summary) Observe(v float64) {
  237. s.bufMtx.Lock()
  238. defer s.bufMtx.Unlock()
  239. now := time.Now()
  240. if now.After(s.hotBufExpTime) {
  241. s.asyncFlush(now)
  242. }
  243. s.hotBuf = append(s.hotBuf, v)
  244. if len(s.hotBuf) == cap(s.hotBuf) {
  245. s.asyncFlush(now)
  246. }
  247. }
  248. func (s *summary) Write(out *dto.Metric) error {
  249. sum := &dto.Summary{}
  250. qs := make([]*dto.Quantile, 0, len(s.objectives))
  251. s.bufMtx.Lock()
  252. s.mtx.Lock()
  253. // Swap bufs even if hotBuf is empty to set new hotBufExpTime.
  254. s.swapBufs(time.Now())
  255. s.bufMtx.Unlock()
  256. s.flushColdBuf()
  257. sum.SampleCount = proto.Uint64(s.cnt)
  258. sum.SampleSum = proto.Float64(s.sum)
  259. for _, rank := range s.sortedObjectives {
  260. var q float64
  261. if s.headStream.Count() == 0 {
  262. q = math.NaN()
  263. } else {
  264. q = s.headStream.Query(rank)
  265. }
  266. qs = append(qs, &dto.Quantile{
  267. Quantile: proto.Float64(rank),
  268. Value: proto.Float64(q),
  269. })
  270. }
  271. s.mtx.Unlock()
  272. if len(qs) > 0 {
  273. sort.Sort(quantSort(qs))
  274. }
  275. sum.Quantile = qs
  276. out.Summary = sum
  277. out.Label = s.labelPairs
  278. return nil
  279. }
  280. func (s *summary) newStream() *quantile.Stream {
  281. return quantile.NewTargeted(s.objectives)
  282. }
  283. // asyncFlush needs bufMtx locked.
  284. func (s *summary) asyncFlush(now time.Time) {
  285. s.mtx.Lock()
  286. s.swapBufs(now)
  287. // Unblock the original goroutine that was responsible for the mutation
  288. // that triggered the compaction. But hold onto the global non-buffer
  289. // state mutex until the operation finishes.
  290. go func() {
  291. s.flushColdBuf()
  292. s.mtx.Unlock()
  293. }()
  294. }
  295. // rotateStreams needs mtx AND bufMtx locked.
  296. func (s *summary) maybeRotateStreams() {
  297. for !s.hotBufExpTime.Equal(s.headStreamExpTime) {
  298. s.headStream.Reset()
  299. s.headStreamIdx++
  300. if s.headStreamIdx >= len(s.streams) {
  301. s.headStreamIdx = 0
  302. }
  303. s.headStream = s.streams[s.headStreamIdx]
  304. s.headStreamExpTime = s.headStreamExpTime.Add(s.streamDuration)
  305. }
  306. }
  307. // flushColdBuf needs mtx locked.
  308. func (s *summary) flushColdBuf() {
  309. for _, v := range s.coldBuf {
  310. for _, stream := range s.streams {
  311. stream.Insert(v)
  312. }
  313. s.cnt++
  314. s.sum += v
  315. }
  316. s.coldBuf = s.coldBuf[0:0]
  317. s.maybeRotateStreams()
  318. }
  319. // swapBufs needs mtx AND bufMtx locked, coldBuf must be empty.
  320. func (s *summary) swapBufs(now time.Time) {
  321. if len(s.coldBuf) != 0 {
  322. panic("coldBuf is not empty")
  323. }
  324. s.hotBuf, s.coldBuf = s.coldBuf, s.hotBuf
  325. // hotBuf is now empty and gets new expiration set.
  326. for now.After(s.hotBufExpTime) {
  327. s.hotBufExpTime = s.hotBufExpTime.Add(s.streamDuration)
  328. }
  329. }
  330. type quantSort []*dto.Quantile
  331. func (s quantSort) Len() int {
  332. return len(s)
  333. }
  334. func (s quantSort) Swap(i, j int) {
  335. s[i], s[j] = s[j], s[i]
  336. }
  337. func (s quantSort) Less(i, j int) bool {
  338. return s[i].GetQuantile() < s[j].GetQuantile()
  339. }
  340. // SummaryVec is a Collector that bundles a set of Summaries that all share the
  341. // same Desc, but have different values for their variable labels. This is used
  342. // if you want to count the same thing partitioned by various dimensions
  343. // (e.g. HTTP request latencies, partitioned by status code and method). Create
  344. // instances with NewSummaryVec.
  345. type SummaryVec struct {
  346. *metricVec
  347. }
  348. // NewSummaryVec creates a new SummaryVec based on the provided SummaryOpts and
  349. // partitioned by the given label names.
  350. func NewSummaryVec(opts SummaryOpts, labelNames []string) *SummaryVec {
  351. desc := NewDesc(
  352. BuildFQName(opts.Namespace, opts.Subsystem, opts.Name),
  353. opts.Help,
  354. labelNames,
  355. opts.ConstLabels,
  356. )
  357. return &SummaryVec{
  358. metricVec: newMetricVec(desc, func(lvs ...string) Metric {
  359. return newSummary(desc, opts, lvs...)
  360. }),
  361. }
  362. }
  363. // GetMetricWithLabelValues returns the Summary for the given slice of label
  364. // values (same order as the VariableLabels in Desc). If that combination of
  365. // label values is accessed for the first time, a new Summary is created.
  366. //
  367. // It is possible to call this method without using the returned Summary to only
  368. // create the new Summary but leave it at its starting value, a Summary without
  369. // any observations.
  370. //
  371. // Keeping the Summary for later use is possible (and should be considered if
  372. // performance is critical), but keep in mind that Reset, DeleteLabelValues and
  373. // Delete can be used to delete the Summary from the SummaryVec. In that case, the
  374. // Summary will still exist, but it will not be exported anymore, even if a
  375. // Summary with the same label values is created later. See also the CounterVec
  376. // example.
  377. //
  378. // An error is returned if the number of label values is not the same as the
  379. // number of VariableLabels in Desc.
  380. //
  381. // Note that for more than one label value, this method is prone to mistakes
  382. // caused by an incorrect order of arguments. Consider GetMetricWith(Labels) as
  383. // an alternative to avoid that type of mistake. For higher label numbers, the
  384. // latter has a much more readable (albeit more verbose) syntax, but it comes
  385. // with a performance overhead (for creating and processing the Labels map).
  386. // See also the GaugeVec example.
  387. func (m *SummaryVec) GetMetricWithLabelValues(lvs ...string) (Observer, error) {
  388. metric, err := m.metricVec.getMetricWithLabelValues(lvs...)
  389. if metric != nil {
  390. return metric.(Observer), err
  391. }
  392. return nil, err
  393. }
  394. // GetMetricWith returns the Summary for the given Labels map (the label names
  395. // must match those of the VariableLabels in Desc). If that label map is
  396. // accessed for the first time, a new Summary is created. Implications of
  397. // creating a Summary without using it and keeping the Summary for later use are
  398. // the same as for GetMetricWithLabelValues.
  399. //
  400. // An error is returned if the number and names of the Labels are inconsistent
  401. // with those of the VariableLabels in Desc.
  402. //
  403. // This method is used for the same purpose as
  404. // GetMetricWithLabelValues(...string). See there for pros and cons of the two
  405. // methods.
  406. func (m *SummaryVec) GetMetricWith(labels Labels) (Observer, error) {
  407. metric, err := m.metricVec.getMetricWith(labels)
  408. if metric != nil {
  409. return metric.(Observer), err
  410. }
  411. return nil, err
  412. }
  413. // WithLabelValues works as GetMetricWithLabelValues, but panics where
  414. // GetMetricWithLabelValues would have returned an error. By not returning an
  415. // error, WithLabelValues allows shortcuts like
  416. // myVec.WithLabelValues("404", "GET").Observe(42.21)
  417. func (m *SummaryVec) WithLabelValues(lvs ...string) Observer {
  418. return m.metricVec.withLabelValues(lvs...).(Observer)
  419. }
  420. // With works as GetMetricWith, but panics where GetMetricWithLabels would have
  421. // returned an error. By not returning an error, With allows shortcuts like
  422. // myVec.With(Labels{"code": "404", "method": "GET"}).Observe(42.21)
  423. func (m *SummaryVec) With(labels Labels) Observer {
  424. return m.metricVec.with(labels).(Observer)
  425. }
  426. type constSummary struct {
  427. desc *Desc
  428. count uint64
  429. sum float64
  430. quantiles map[float64]float64
  431. labelPairs []*dto.LabelPair
  432. }
  433. func (s *constSummary) Desc() *Desc {
  434. return s.desc
  435. }
  436. func (s *constSummary) Write(out *dto.Metric) error {
  437. sum := &dto.Summary{}
  438. qs := make([]*dto.Quantile, 0, len(s.quantiles))
  439. sum.SampleCount = proto.Uint64(s.count)
  440. sum.SampleSum = proto.Float64(s.sum)
  441. for rank, q := range s.quantiles {
  442. qs = append(qs, &dto.Quantile{
  443. Quantile: proto.Float64(rank),
  444. Value: proto.Float64(q),
  445. })
  446. }
  447. if len(qs) > 0 {
  448. sort.Sort(quantSort(qs))
  449. }
  450. sum.Quantile = qs
  451. out.Summary = sum
  452. out.Label = s.labelPairs
  453. return nil
  454. }
  455. // NewConstSummary returns a metric representing a Prometheus summary with fixed
  456. // values for the count, sum, and quantiles. As those parameters cannot be
  457. // changed, the returned value does not implement the Summary interface (but
  458. // only the Metric interface). Users of this package will not have much use for
  459. // it in regular operations. However, when implementing custom Collectors, it is
  460. // useful as a throw-away metric that is generated on the fly to send it to
  461. // Prometheus in the Collect method.
  462. //
  463. // quantiles maps ranks to quantile values. For example, a median latency of
  464. // 0.23s and a 99th percentile latency of 0.56s would be expressed as:
  465. // map[float64]float64{0.5: 0.23, 0.99: 0.56}
  466. //
  467. // NewConstSummary returns an error if the length of labelValues is not
  468. // consistent with the variable labels in Desc.
  469. func NewConstSummary(
  470. desc *Desc,
  471. count uint64,
  472. sum float64,
  473. quantiles map[float64]float64,
  474. labelValues ...string,
  475. ) (Metric, error) {
  476. if err := validateLabelValues(labelValues, len(desc.variableLabels)); err != nil {
  477. return nil, err
  478. }
  479. return &constSummary{
  480. desc: desc,
  481. count: count,
  482. sum: sum,
  483. quantiles: quantiles,
  484. labelPairs: makeLabelPairs(desc, labelValues),
  485. }, nil
  486. }
  487. // MustNewConstSummary is a version of NewConstSummary that panics where
  488. // NewConstMetric would have returned an error.
  489. func MustNewConstSummary(
  490. desc *Desc,
  491. count uint64,
  492. sum float64,
  493. quantiles map[float64]float64,
  494. labelValues ...string,
  495. ) Metric {
  496. m, err := NewConstSummary(desc, count, sum, quantiles, labelValues...)
  497. if err != nil {
  498. panic(err)
  499. }
  500. return m
  501. }