corrupt.go 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package etcdserver
  15. import (
  16. "context"
  17. "time"
  18. "github.com/coreos/etcd/clientv3"
  19. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  20. "github.com/coreos/etcd/mvcc"
  21. "github.com/coreos/etcd/pkg/types"
  22. )
  23. func (s *EtcdServer) monitorKVHash() {
  24. t := s.Cfg.CorruptCheckTime
  25. if t == 0 {
  26. return
  27. }
  28. plog.Infof("enabled corruption checking with %s interval", t)
  29. for {
  30. select {
  31. case <-s.stopping:
  32. return
  33. case <-time.After(t):
  34. }
  35. if !s.isLeader() {
  36. continue
  37. }
  38. if err := s.checkHashKV(); err != nil {
  39. plog.Debugf("check hash kv failed %v", err)
  40. }
  41. }
  42. }
  43. func (s *EtcdServer) checkHashKV() error {
  44. h, rev, crev, err := s.kv.HashByRev(0)
  45. if err != nil {
  46. plog.Fatalf("failed to hash kv store (%v)", err)
  47. }
  48. resps := s.getPeerHashKVs(rev)
  49. ctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
  50. err = s.linearizableReadNotify(ctx)
  51. cancel()
  52. if err != nil {
  53. return err
  54. }
  55. h2, rev2, crev2, err := s.kv.HashByRev(0)
  56. if err != nil {
  57. plog.Warningf("failed to hash kv store (%v)", err)
  58. return err
  59. }
  60. alarmed := false
  61. mismatch := func(id uint64) {
  62. if alarmed {
  63. return
  64. }
  65. alarmed = true
  66. a := &pb.AlarmRequest{
  67. MemberID: uint64(id),
  68. Action: pb.AlarmRequest_ACTIVATE,
  69. Alarm: pb.AlarmType_CORRUPT,
  70. }
  71. s.goAttach(func() {
  72. s.raftRequest(s.ctx, pb.InternalRaftRequest{Alarm: a})
  73. })
  74. }
  75. if h2 != h && rev2 == rev && crev == crev2 {
  76. plog.Warningf("mismatched hashes %d and %d for revision %d", h, h2, rev)
  77. mismatch(uint64(s.ID()))
  78. }
  79. for _, resp := range resps {
  80. id := resp.Header.MemberId
  81. // leader expects follower's latest revision less than or equal to leader's
  82. if resp.Header.Revision > rev2 {
  83. plog.Warningf(
  84. "revision %d from member %v, expected at most %d",
  85. resp.Header.Revision,
  86. types.ID(id),
  87. rev2)
  88. mismatch(id)
  89. }
  90. // leader expects follower's latest compact revision less than or equal to leader's
  91. if resp.CompactRevision > crev2 {
  92. plog.Warningf(
  93. "compact revision %d from member %v, expected at most %d",
  94. resp.CompactRevision,
  95. types.ID(id),
  96. crev2,
  97. )
  98. mismatch(id)
  99. }
  100. // follower's compact revision is leader's old one, then hashes must match
  101. if resp.CompactRevision == crev && resp.Hash != h {
  102. plog.Warningf(
  103. "hash %d at revision %d from member %v, expected hash %d",
  104. resp.Hash,
  105. rev,
  106. types.ID(id),
  107. h,
  108. )
  109. mismatch(id)
  110. }
  111. }
  112. return nil
  113. }
  114. func (s *EtcdServer) getPeerHashKVs(rev int64) (resps []*clientv3.HashKVResponse) {
  115. for _, m := range s.cluster.Members() {
  116. if m.ID == s.ID() {
  117. continue
  118. }
  119. cli, cerr := clientv3.New(clientv3.Config{
  120. DialTimeout: s.Cfg.ReqTimeout(),
  121. Endpoints: m.PeerURLs,
  122. })
  123. if cerr != nil {
  124. plog.Warningf("%s failed to create client to peer %s for hash checking (%q)", s.ID(), types.ID(m.ID), cerr.Error())
  125. continue
  126. }
  127. respsLen := len(resps)
  128. for _, c := range cli.Endpoints() {
  129. ctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
  130. resp, herr := cli.HashKV(ctx, c, rev)
  131. cancel()
  132. if herr == nil {
  133. cerr = herr
  134. resps = append(resps, resp)
  135. break
  136. }
  137. }
  138. cli.Close()
  139. if respsLen == len(resps) {
  140. plog.Warningf("%s failed to hash kv for peer %s (%v)", s.ID(), types.ID(m.ID), cerr)
  141. }
  142. }
  143. return resps
  144. }
  145. type applierV3Corrupt struct {
  146. applierV3
  147. }
  148. func newApplierV3Corrupt(a applierV3) *applierV3Corrupt { return &applierV3Corrupt{a} }
  149. func (a *applierV3Corrupt) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, error) {
  150. return nil, ErrCorrupt
  151. }
  152. func (a *applierV3Corrupt) Range(txn mvcc.TxnRead, p *pb.RangeRequest) (*pb.RangeResponse, error) {
  153. return nil, ErrCorrupt
  154. }
  155. func (a *applierV3Corrupt) DeleteRange(txn mvcc.TxnWrite, p *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
  156. return nil, ErrCorrupt
  157. }
  158. func (a *applierV3Corrupt) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
  159. return nil, ErrCorrupt
  160. }
  161. func (a *applierV3Corrupt) Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, error) {
  162. return nil, nil, ErrCorrupt
  163. }
  164. func (a *applierV3Corrupt) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
  165. return nil, ErrCorrupt
  166. }
  167. func (a *applierV3Corrupt) LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
  168. return nil, ErrCorrupt
  169. }