checks.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package main
  15. import (
  16. "fmt"
  17. "reflect"
  18. "strings"
  19. "time"
  20. "golang.org/x/net/context"
  21. )
  22. const (
  23. retries = 7
  24. stabilizationPeriod = 3 * time.Second
  25. )
  26. type Checker interface {
  27. // Check returns an error if the system fails a consistency check.
  28. Check() error
  29. }
  30. type hashAndRevGetter interface {
  31. getRevisionHash() (revs map[string]int64, hashes map[string]int64, err error)
  32. }
  33. type hashChecker struct {
  34. hrg hashAndRevGetter
  35. }
  36. func newHashChecker(hrg hashAndRevGetter) Checker { return &hashChecker{hrg} }
  37. const leaseCheckerTimeout = 10 * time.Second
  38. func (hc *hashChecker) checkRevAndHashes() (err error) {
  39. // retries in case of transient failure or etcd nodes have not stablized yet.
  40. var (
  41. revsStable bool
  42. hashesStable bool
  43. )
  44. for i := 0; i < retries; i++ {
  45. revsStable, err = hc.areRevisonsStable()
  46. if err != nil || !revsStable {
  47. continue
  48. }
  49. hashesStable, err = hc.areHashesStable()
  50. if err != nil || !hashesStable {
  51. continue
  52. }
  53. // hashes must be stable at this point
  54. return nil
  55. }
  56. if err != nil {
  57. return err
  58. }
  59. if !revsStable || !hashesStable {
  60. return fmt.Errorf("checkRevAndHashes detects inconsistency: [revisions stable %v] [hashes stable %v]", revsStable, hashesStable)
  61. }
  62. return err
  63. }
  64. func (hc *hashChecker) areRevisonsStable() (rv bool, err error) {
  65. var preRevs map[string]int64
  66. for i := 0; i < 2; i++ {
  67. revs, _, err := hc.hrg.getRevisionHash()
  68. if err != nil {
  69. return false, err
  70. }
  71. _, sameRev := getSameValue(revs)
  72. if !sameRev {
  73. plog.Printf("current revisions are not consistent: revisions [revisions: %v]", revs)
  74. return false, nil
  75. }
  76. // sleep for N seconds. after that, check to make sure that revisions don't change
  77. if i == 0 {
  78. preRevs = revs
  79. time.Sleep(stabilizationPeriod)
  80. } else if !reflect.DeepEqual(revs, preRevs) {
  81. // use map comparison logic found in http://stackoverflow.com/questions/18208394/testing-equivalence-of-maps-golang
  82. plog.Printf("revisions are not stable: [current revisions: %v] [previous revisions: %v]", revs, preRevs)
  83. return false, nil
  84. }
  85. }
  86. plog.Printf("revisions are stable: revisions [revisions: %v]", preRevs)
  87. return true, nil
  88. }
  89. func (hc *hashChecker) areHashesStable() (rv bool, err error) {
  90. var prevHashes map[string]int64
  91. for i := 0; i < 2; i++ {
  92. revs, hashes, err := hc.hrg.getRevisionHash()
  93. if err != nil {
  94. return false, err
  95. }
  96. _, sameRev := getSameValue(revs)
  97. _, sameHashes := getSameValue(hashes)
  98. if !sameRev || !sameHashes {
  99. plog.Printf("hashes are not stable: revisions [revisions: %v] and hashes [hashes: %v]", revs, hashes)
  100. return false, nil
  101. }
  102. // sleep for N seconds. after that, check to make sure that the hashes and revisions don't change
  103. if i == 0 {
  104. time.Sleep(stabilizationPeriod)
  105. prevHashes = hashes
  106. } else if !reflect.DeepEqual(hashes, prevHashes) {
  107. // use map comparison logic found in http://stackoverflow.com/questions/18208394/testing-equivalence-of-maps-golang
  108. plog.Printf("hashes are not stable: [current hashes: %v] [previous hashes: %v]", hashes, prevHashes)
  109. return false, nil
  110. }
  111. }
  112. plog.Printf("hashes are stable: hashes [hashes: %v]", prevHashes)
  113. return true, nil
  114. }
  115. func (hc *hashChecker) Check() error {
  116. return hc.checkRevAndHashes()
  117. }
  118. type leaseChecker struct {
  119. leaseStressers []Stresser
  120. }
  121. func newLeaseChecker(leaseStressers []Stresser) Checker { return &leaseChecker{leaseStressers} }
  122. func (lc *leaseChecker) Check() error {
  123. plog.Info("lease stresser invariant check...")
  124. errc := make(chan error)
  125. for _, ls := range lc.leaseStressers {
  126. go func(s Stresser) { errc <- lc.checkInvariant(s) }(ls)
  127. }
  128. var errs []error
  129. for i := 0; i < len(lc.leaseStressers); i++ {
  130. if err := <-errc; err != nil {
  131. errs = append(errs, err)
  132. }
  133. }
  134. if len(errs) == 0 {
  135. return nil
  136. }
  137. return fmt.Errorf("lease stresser encounters error: (%v)", fromErrsToString(errs))
  138. }
  139. func fromErrsToString(errs []error) string {
  140. stringArr := make([]string, len(errs))
  141. for i, err := range errs {
  142. stringArr[i] = err.Error()
  143. }
  144. return strings.Join(stringArr, ",")
  145. }
  146. func (lc *leaseChecker) checkInvariant(lStresser Stresser) error {
  147. ls := lStresser.(*leaseStresser)
  148. if err := checkLeasesExpired(ls); err != nil {
  149. return err
  150. }
  151. if err := checkLeasesAlive(ls); err != nil {
  152. return err
  153. }
  154. return checkShortLivedLeases(ls)
  155. }
  156. func checkLeasesExpired(ls *leaseStresser) error {
  157. plog.Infof("revoked leases %v", ls.revokedLeases.getLeasesMap())
  158. return checkLeases(true, ls, ls.revokedLeases.getLeasesMap())
  159. }
  160. func checkLeasesAlive(ls *leaseStresser) error {
  161. plog.Infof("alive leases %v", ls.aliveLeases.getLeasesMap())
  162. return checkLeases(false, ls, ls.aliveLeases.getLeasesMap())
  163. }
  164. // checkShortLivedLeases() verifies that the short lived leases are indeed being deleted.
  165. func checkShortLivedLeases(ls *leaseStresser) error {
  166. plog.Infof("short lived leases %v", ls.shortLivedLeases.getLeasesMap())
  167. return checkLeases(true, ls, ls.shortLivedLeases.getLeasesMap())
  168. }
  169. func checkLeases(expired bool, ls *leaseStresser, leases map[int64]time.Time) error {
  170. ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout)
  171. defer cancel()
  172. for leaseID := range leases {
  173. keysExpired, err := ls.hasKeysAttachedToLeaseExpired(ctx, leaseID)
  174. if err != nil {
  175. plog.Errorf("hasKeysAttachedToLeaseExpired error: (%v)", err)
  176. return err
  177. }
  178. leaseExpired, err := ls.hasLeaseExpired(ctx, leaseID)
  179. if err != nil {
  180. plog.Errorf("hasLeaseExpired error: (%v)", err)
  181. return err
  182. }
  183. if leaseExpired != keysExpired {
  184. return fmt.Errorf("lease %v expiration mismatch (lease expired=%v, keys expired=%v)", leaseID, leaseExpired, keysExpired)
  185. }
  186. if leaseExpired != expired {
  187. return fmt.Errorf("lease %v expected expired=%v, got %v", leaseID, expired, leaseExpired)
  188. }
  189. }
  190. return nil
  191. }
  192. type compositeChecker struct {
  193. checkers []Checker
  194. }
  195. func newCompositeChecker(checkers []Checker) Checker {
  196. return &compositeChecker{checkers}
  197. }
  198. func (cchecker *compositeChecker) Check() error {
  199. for _, checker := range cchecker.checkers {
  200. if err := checker.Check(); err != nil {
  201. return err
  202. }
  203. }
  204. return nil
  205. }
  206. type noChecker struct{}
  207. func newNoChecker() Checker { return &noChecker{} }
  208. func (nc *noChecker) Check() error { return nil }