rawnode.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package raft
  15. import (
  16. "errors"
  17. pb "go.etcd.io/etcd/raft/raftpb"
  18. "go.etcd.io/etcd/raft/tracker"
  19. )
  20. // ErrStepLocalMsg is returned when try to step a local raft message
  21. var ErrStepLocalMsg = errors.New("raft: cannot step raft local message")
  22. // ErrStepPeerNotFound is returned when try to step a response message
  23. // but there is no peer found in raft.prs for that node.
  24. var ErrStepPeerNotFound = errors.New("raft: cannot step as peer not found")
  25. // RawNode is a thread-unsafe Node.
  26. // The methods of this struct correspond to the methods of Node and are described
  27. // more fully there.
  28. type RawNode struct {
  29. raft *raft
  30. prevSoftSt *SoftState
  31. prevHardSt pb.HardState
  32. }
  33. // NewRawNode returns a new RawNode given configuration and a list of raft peers.
  34. func NewRawNode(config *Config, peers []Peer) (*RawNode, error) {
  35. if config.ID == 0 {
  36. panic("config.ID must not be zero")
  37. }
  38. r := newRaft(config)
  39. rn := &RawNode{
  40. raft: r,
  41. }
  42. if err := rn.init(peers); err != nil {
  43. return nil, err
  44. }
  45. return rn, nil
  46. }
  47. func (rn *RawNode) init(peers []Peer) error {
  48. r := rn.raft
  49. lastIndex, err := rn.raft.raftLog.storage.LastIndex()
  50. if err != nil {
  51. return err
  52. }
  53. // If the log is empty, this is a new RawNode (like StartNode); otherwise it's
  54. // restoring an existing RawNode (like RestartNode).
  55. // TODO(bdarnell): rethink RawNode initialization and whether the application needs
  56. // to be able to tell us when it expects the RawNode to exist.
  57. if lastIndex == 0 {
  58. rn.raft.becomeFollower(1, None)
  59. ents := make([]pb.Entry, len(peers))
  60. for i, peer := range peers {
  61. cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context}
  62. data, err := cc.Marshal()
  63. if err != nil {
  64. return err
  65. }
  66. ents[i] = pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: uint64(i + 1), Data: data}
  67. }
  68. rn.raft.raftLog.append(ents...)
  69. // Now apply them, mainly so that the application can call Campaign
  70. // immediately after StartNode in tests. Note that these nodes will
  71. // be added to raft twice: here and when the application's Ready
  72. // loop calls ApplyConfChange. The calls to addNode must come after
  73. // all calls to raftLog.append so progress.next is set after these
  74. // bootstrapping entries (it is an error if we try to append these
  75. // entries since they have already been committed).
  76. // We do not set raftLog.applied so the application will be able
  77. // to observe all conf changes via Ready.CommittedEntries.
  78. //
  79. // TODO(bdarnell): These entries are still unstable; do we need to preserve
  80. // the invariant that committed < unstable?
  81. r.raftLog.committed = uint64(len(ents))
  82. for _, peer := range peers {
  83. r.applyConfChange(pb.ConfChange{NodeID: peer.ID, Type: pb.ConfChangeAddNode})
  84. }
  85. }
  86. // Set the initial hard and soft states after performing all initialization.
  87. rn.prevSoftSt = r.softState()
  88. if lastIndex == 0 {
  89. rn.prevHardSt = emptyState
  90. } else {
  91. rn.prevHardSt = r.hardState()
  92. }
  93. return nil
  94. }
  95. // Tick advances the internal logical clock by a single tick.
  96. func (rn *RawNode) Tick() {
  97. rn.raft.tick()
  98. }
  99. // TickQuiesced advances the internal logical clock by a single tick without
  100. // performing any other state machine processing. It allows the caller to avoid
  101. // periodic heartbeats and elections when all of the peers in a Raft group are
  102. // known to be at the same state. Expected usage is to periodically invoke Tick
  103. // or TickQuiesced depending on whether the group is "active" or "quiesced".
  104. //
  105. // WARNING: Be very careful about using this method as it subverts the Raft
  106. // state machine. You should probably be using Tick instead.
  107. func (rn *RawNode) TickQuiesced() {
  108. rn.raft.electionElapsed++
  109. }
  110. // Campaign causes this RawNode to transition to candidate state.
  111. func (rn *RawNode) Campaign() error {
  112. return rn.raft.Step(pb.Message{
  113. Type: pb.MsgHup,
  114. })
  115. }
  116. // Propose proposes data be appended to the raft log.
  117. func (rn *RawNode) Propose(data []byte) error {
  118. return rn.raft.Step(pb.Message{
  119. Type: pb.MsgProp,
  120. From: rn.raft.id,
  121. Entries: []pb.Entry{
  122. {Data: data},
  123. }})
  124. }
  125. // ProposeConfChange proposes a config change.
  126. func (rn *RawNode) ProposeConfChange(cc pb.ConfChange) error {
  127. data, err := cc.Marshal()
  128. if err != nil {
  129. return err
  130. }
  131. return rn.raft.Step(pb.Message{
  132. Type: pb.MsgProp,
  133. Entries: []pb.Entry{
  134. {Type: pb.EntryConfChange, Data: data},
  135. },
  136. })
  137. }
  138. // ApplyConfChange applies a config change to the local node.
  139. func (rn *RawNode) ApplyConfChange(cc pb.ConfChange) *pb.ConfState {
  140. cs := rn.raft.applyConfChange(cc)
  141. return &cs
  142. }
  143. // Step advances the state machine using the given message.
  144. func (rn *RawNode) Step(m pb.Message) error {
  145. // ignore unexpected local messages receiving over network
  146. if IsLocalMsg(m.Type) {
  147. return ErrStepLocalMsg
  148. }
  149. if pr := rn.raft.prs.Progress[m.From]; pr != nil || !IsResponseMsg(m.Type) {
  150. return rn.raft.Step(m)
  151. }
  152. return ErrStepPeerNotFound
  153. }
  154. // Ready returns the outstanding work that the application needs to handle. This
  155. // includes appending and applying entries or a snapshot, updating the HardState,
  156. // and sending messages. Ready() is a read-only operation, that is, it does not
  157. // require the caller to actually handle the result. Typically, a caller will
  158. // want to handle the Ready and must pass the Ready to Advance *after* having
  159. // done so. While a Ready is being handled, the RawNode must not be used for
  160. // operations that may alter its state. For example, it is illegal to call
  161. // Ready, followed by Step, followed by Advance.
  162. func (rn *RawNode) Ready() Ready {
  163. rd := rn.newReady()
  164. return rd
  165. }
  166. func (rn *RawNode) newReady() Ready {
  167. return newReady(rn.raft, rn.prevSoftSt, rn.prevHardSt)
  168. }
  169. // acceptReady is called when the consumer of the RawNode has decided to go
  170. // ahead and handle a Ready. Nothing must alter the state of the RawNode between
  171. // this call and the prior call to Ready().
  172. func (rn *RawNode) acceptReady(rd Ready) {
  173. if rd.SoftState != nil {
  174. rn.prevSoftSt = rd.SoftState
  175. }
  176. if len(rd.ReadStates) != 0 {
  177. rn.raft.readStates = nil
  178. }
  179. rn.raft.msgs = nil
  180. }
  181. // commitReady is called when the consumer of the RawNode has successfully
  182. // handled a Ready (having previously called acceptReady).
  183. func (rn *RawNode) commitReady(rd Ready) {
  184. if !IsEmptyHardState(rd.HardState) {
  185. rn.prevHardSt = rd.HardState
  186. }
  187. // If entries were applied (or a snapshot), update our cursor for
  188. // the next Ready. Note that if the current HardState contains a
  189. // new Commit index, this does not mean that we're also applying
  190. // all of the new entries due to commit pagination by size.
  191. if index := rd.appliedCursor(); index > 0 {
  192. rn.raft.raftLog.appliedTo(index)
  193. }
  194. rn.raft.reduceUncommittedSize(rd.CommittedEntries)
  195. if len(rd.Entries) > 0 {
  196. e := rd.Entries[len(rd.Entries)-1]
  197. rn.raft.raftLog.stableTo(e.Index, e.Term)
  198. }
  199. if !IsEmptySnap(rd.Snapshot) {
  200. rn.raft.raftLog.stableSnapTo(rd.Snapshot.Metadata.Index)
  201. }
  202. }
  203. // HasReady called when RawNode user need to check if any Ready pending.
  204. // Checking logic in this method should be consistent with Ready.containsUpdates().
  205. func (rn *RawNode) HasReady() bool {
  206. r := rn.raft
  207. if !r.softState().equal(rn.prevSoftSt) {
  208. return true
  209. }
  210. if hardSt := r.hardState(); !IsEmptyHardState(hardSt) && !isHardStateEqual(hardSt, rn.prevHardSt) {
  211. return true
  212. }
  213. if r.raftLog.unstable.snapshot != nil && !IsEmptySnap(*r.raftLog.unstable.snapshot) {
  214. return true
  215. }
  216. if len(r.msgs) > 0 || len(r.raftLog.unstableEntries()) > 0 || r.raftLog.hasNextEnts() {
  217. return true
  218. }
  219. if len(r.readStates) != 0 {
  220. return true
  221. }
  222. return false
  223. }
  224. // Advance notifies the RawNode that the application has applied and saved progress in the
  225. // last Ready results.
  226. func (rn *RawNode) Advance(rd Ready) {
  227. // Advance combines accept and commit. Callers can't mutate the RawNode
  228. // between the call to Ready and the matching call to Advance, or the work
  229. // done in acceptReady will clobber potentially newer data that has not been
  230. // emitted in a Ready yet.
  231. rn.acceptReady(rd)
  232. rn.commitReady(rd)
  233. }
  234. // Status returns the current status of the given group. This allocates, see
  235. // BasicStatus and WithProgress for allocation-friendlier choices.
  236. func (rn *RawNode) Status() Status {
  237. status := getStatus(rn.raft)
  238. return status
  239. }
  240. // BasicStatus returns a BasicStatus. Notably this does not contain the
  241. // Progress map; see WithProgress for an allocation-free way to inspect it.
  242. func (rn *RawNode) BasicStatus() BasicStatus {
  243. return getBasicStatus(rn.raft)
  244. }
  245. // ProgressType indicates the type of replica a Progress corresponds to.
  246. type ProgressType byte
  247. const (
  248. // ProgressTypePeer accompanies a Progress for a regular peer replica.
  249. ProgressTypePeer ProgressType = iota
  250. // ProgressTypeLearner accompanies a Progress for a learner replica.
  251. ProgressTypeLearner
  252. )
  253. // WithProgress is a helper to introspect the Progress for this node and its
  254. // peers.
  255. func (rn *RawNode) WithProgress(visitor func(id uint64, typ ProgressType, pr tracker.Progress)) {
  256. rn.raft.prs.Visit(func(id uint64, pr *tracker.Progress) {
  257. typ := ProgressTypePeer
  258. if pr.IsLearner {
  259. typ = ProgressTypeLearner
  260. }
  261. p := *pr
  262. p.Inflights = nil
  263. visitor(id, typ, p)
  264. })
  265. }
  266. // ReportUnreachable reports the given node is not reachable for the last send.
  267. func (rn *RawNode) ReportUnreachable(id uint64) {
  268. _ = rn.raft.Step(pb.Message{Type: pb.MsgUnreachable, From: id})
  269. }
  270. // ReportSnapshot reports the status of the sent snapshot.
  271. func (rn *RawNode) ReportSnapshot(id uint64, status SnapshotStatus) {
  272. rej := status == SnapshotFailure
  273. _ = rn.raft.Step(pb.Message{Type: pb.MsgSnapStatus, From: id, Reject: rej})
  274. }
  275. // TransferLeader tries to transfer leadership to the given transferee.
  276. func (rn *RawNode) TransferLeader(transferee uint64) {
  277. _ = rn.raft.Step(pb.Message{Type: pb.MsgTransferLeader, From: transferee})
  278. }
  279. // ReadIndex requests a read state. The read state will be set in ready.
  280. // Read State has a read index. Once the application advances further than the read
  281. // index, any linearizable read requests issued before the read request can be
  282. // processed safely. The read state will have the same rctx attached.
  283. func (rn *RawNode) ReadIndex(rctx []byte) {
  284. _ = rn.raft.Step(pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}})
  285. }