raft.proto 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. syntax = "proto2";
  2. package raftpb;
  3. import "gogoproto/gogo.proto";
  4. option (gogoproto.marshaler_all) = true;
  5. option (gogoproto.sizer_all) = true;
  6. option (gogoproto.unmarshaler_all) = true;
  7. option (gogoproto.goproto_getters_all) = false;
  8. option (gogoproto.goproto_enum_prefix_all) = false;
  9. enum EntryType {
  10. EntryNormal = 0;
  11. EntryConfChange = 1; // corresponds to pb.ConfChange
  12. EntryConfChangeV2 = 2; // corresponds to pb.ConfChangeV2
  13. }
  14. message Entry {
  15. optional uint64 Term = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations
  16. optional uint64 Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations
  17. optional EntryType Type = 1 [(gogoproto.nullable) = false];
  18. optional bytes Data = 4;
  19. }
  20. message SnapshotMetadata {
  21. optional ConfState conf_state = 1 [(gogoproto.nullable) = false];
  22. optional uint64 index = 2 [(gogoproto.nullable) = false];
  23. optional uint64 term = 3 [(gogoproto.nullable) = false];
  24. }
  25. message Snapshot {
  26. optional bytes data = 1;
  27. optional SnapshotMetadata metadata = 2 [(gogoproto.nullable) = false];
  28. }
  29. enum MessageType {
  30. MsgHup = 0;
  31. MsgBeat = 1;
  32. MsgProp = 2;
  33. MsgApp = 3;
  34. MsgAppResp = 4;
  35. MsgVote = 5;
  36. MsgVoteResp = 6;
  37. MsgSnap = 7;
  38. MsgHeartbeat = 8;
  39. MsgHeartbeatResp = 9;
  40. MsgUnreachable = 10;
  41. MsgSnapStatus = 11;
  42. MsgCheckQuorum = 12;
  43. MsgTransferLeader = 13;
  44. MsgTimeoutNow = 14;
  45. MsgReadIndex = 15;
  46. MsgReadIndexResp = 16;
  47. MsgPreVote = 17;
  48. MsgPreVoteResp = 18;
  49. }
  50. message Message {
  51. optional MessageType type = 1 [(gogoproto.nullable) = false];
  52. optional uint64 to = 2 [(gogoproto.nullable) = false];
  53. optional uint64 from = 3 [(gogoproto.nullable) = false];
  54. optional uint64 term = 4 [(gogoproto.nullable) = false];
  55. optional uint64 logTerm = 5 [(gogoproto.nullable) = false];
  56. optional uint64 index = 6 [(gogoproto.nullable) = false];
  57. repeated Entry entries = 7 [(gogoproto.nullable) = false];
  58. optional uint64 commit = 8 [(gogoproto.nullable) = false];
  59. optional Snapshot snapshot = 9 [(gogoproto.nullable) = false];
  60. optional bool reject = 10 [(gogoproto.nullable) = false];
  61. optional uint64 rejectHint = 11 [(gogoproto.nullable) = false];
  62. optional bytes context = 12;
  63. }
  64. message HardState {
  65. optional uint64 term = 1 [(gogoproto.nullable) = false];
  66. optional uint64 vote = 2 [(gogoproto.nullable) = false];
  67. optional uint64 commit = 3 [(gogoproto.nullable) = false];
  68. }
  69. // ConfChangeTransition specifies the behavior of a configuration change with
  70. // respect to joint consensus.
  71. enum ConfChangeTransition {
  72. // Automatically use the simple protocol if possible, otherwise fall back
  73. // to ConfChangeJointImplicit. Most applications will want to use this.
  74. ConfChangeTransitionAuto = 0;
  75. // Use joint consensus unconditionally, and transition out of them
  76. // automatically (by proposing a zero configuration change).
  77. //
  78. // This option is suitable for applications that want to minimize the time
  79. // spent in the joint configuration and do not store the joint configuration
  80. // in the state machine (outside of InitialState).
  81. ConfChangeTransitionJointImplicit = 1;
  82. // Use joint consensus and remain in the joint configuration until the
  83. // application proposes a no-op configuration change. This is suitable for
  84. // applications that want to explicitly control the transitions, for example
  85. // to use a custom payload (via the Context field).
  86. ConfChangeTransitionJointExplicit = 2;
  87. }
  88. message ConfState {
  89. // The voters in the incoming config. (If the configuration is not joint,
  90. // then the outgoing config is empty).
  91. repeated uint64 voters = 1;
  92. // The learners in the incoming config.
  93. repeated uint64 learners = 2;
  94. // The voters in the outgoing config.
  95. repeated uint64 voters_outgoing = 3;
  96. // The nodes that will become learners when the outgoing config is removed.
  97. // These nodes are necessarily currently in nodes_joint (or they would have
  98. // been added to the incoming config right away).
  99. repeated uint64 learners_next = 4;
  100. // If set, the config is joint and Raft will automatically transition into
  101. // the final config (i.e. remove the outgoing config) when this is safe.
  102. optional bool auto_leave = 5 [(gogoproto.nullable) = false];
  103. }
  104. enum ConfChangeType {
  105. ConfChangeAddNode = 0;
  106. ConfChangeRemoveNode = 1;
  107. ConfChangeUpdateNode = 2;
  108. ConfChangeAddLearnerNode = 3;
  109. }
  110. message ConfChange {
  111. optional ConfChangeType type = 2 [(gogoproto.nullable) = false];
  112. optional uint64 node_id = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID" ];
  113. optional bytes context = 4;
  114. // NB: this is used only by etcd to thread through a unique identifier.
  115. // Ideally it should really use the Context instead. No counterpart to
  116. // this field exists in ConfChangeV2.
  117. optional uint64 id = 1 [(gogoproto.nullable) = false, (gogoproto.customname) = "ID" ];
  118. }
  119. // ConfChangeSingle is an individual configuration change operation. Multiple
  120. // such operations can be carried out atomically via a ConfChangeV2.
  121. message ConfChangeSingle {
  122. optional ConfChangeType type = 1 [(gogoproto.nullable) = false];
  123. optional uint64 node_id = 2 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID"];
  124. }
  125. // ConfChangeV2 messages initiate configuration changes. They support both the
  126. // simple "one at a time" membership change protocol and full Joint Consensus
  127. // allowing for arbitrary changes in membership.
  128. //
  129. // The supplied context is treated as an opaque payload and can be used to
  130. // attach an action on the state machine to the application of the config change
  131. // proposal. Note that contrary to Joint Consensus as outlined in the Raft
  132. // paper[1], configuration changes become active when they are *applied* to the
  133. // state machine (not when they are appended to the log).
  134. //
  135. // The simple protocol can be used whenever only a single change is made.
  136. //
  137. // Non-simple changes require the use of Joint Consensus, for which two
  138. // configuration changes are run. The first configuration change specifies the
  139. // desired changes and transitions the Raft group into the joint configuration,
  140. // in which quorum requires a majority of both the pre-changes and post-changes
  141. // configuration. Joint Consensus avoids entering fragile intermediate
  142. // configurations that could compromise survivability. For example, without the
  143. // use of Joint Consensus and running across three availability zones with a
  144. // replication factor of three, it is not possible to replace a voter without
  145. // entering an intermediate configuration that does not survive the outage of
  146. // one availability zone.
  147. //
  148. // The provided ConfChangeTransition specifies how (and whether) Joint Consensus
  149. // is used, and assigns the task of leaving the joint configuration either to
  150. // Raft or the application. Leaving the joint configuration is accomplished by
  151. // proposing a ConfChangeV2 with only and optionally the Context field
  152. // populated.
  153. //
  154. // For details on Raft membership changes, see:
  155. //
  156. // [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
  157. message ConfChangeV2 {
  158. optional ConfChangeTransition transition = 1 [(gogoproto.nullable) = false];
  159. repeated ConfChangeSingle changes = 2 [(gogoproto.nullable) = false];
  160. optional bytes context = 3;
  161. }