log.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package raft
  15. import (
  16. "fmt"
  17. "log"
  18. pb "github.com/coreos/etcd/raft/raftpb"
  19. )
  20. type raftLog struct {
  21. // storage contains all stable entries since the last snapshot.
  22. storage Storage
  23. // unstable contains all unstable entries and snapshot.
  24. // they will be saved into storage.
  25. unstable unstable
  26. // committed is the highest log position that is known to be in
  27. // stable storage on a quorum of nodes.
  28. // Invariant: committed < unstable
  29. committed uint64
  30. // applied is the highest log position that the application has
  31. // been instructed to apply to its state machine.
  32. // Invariant: applied <= committed
  33. applied uint64
  34. }
  35. // newLog returns log using the given storage. It recovers the log to the state
  36. // that it just commits and applies the latest snapshot.
  37. func newLog(storage Storage) *raftLog {
  38. if storage == nil {
  39. log.Panic("storage must not be nil")
  40. }
  41. log := &raftLog{
  42. storage: storage,
  43. }
  44. firstIndex, err := storage.FirstIndex()
  45. if err != nil {
  46. panic(err) // TODO(bdarnell)
  47. }
  48. lastIndex, err := storage.LastIndex()
  49. if err != nil {
  50. panic(err) // TODO(bdarnell)
  51. }
  52. log.unstable.offset = lastIndex + 1
  53. // Initialize our committed and applied pointers to the time of the last compaction.
  54. log.committed = firstIndex - 1
  55. log.applied = firstIndex - 1
  56. return log
  57. }
  58. func (l *raftLog) String() string {
  59. return fmt.Sprintf("committed=%d, applied=%d, unstable.offset=%d, len(unstable.Entries)=%d", l.committed, l.applied, l.unstable.offset, len(l.unstable.entries))
  60. }
  61. // maybeAppend returns (0, false) if the entries cannot be appended. Otherwise,
  62. // it returns (last index of new entries, true).
  63. func (l *raftLog) maybeAppend(index, logTerm, committed uint64, ents ...pb.Entry) (lastnewi uint64, ok bool) {
  64. lastnewi = index + uint64(len(ents))
  65. if l.matchTerm(index, logTerm) {
  66. ci := l.findConflict(ents)
  67. switch {
  68. case ci == 0:
  69. case ci <= l.committed:
  70. log.Panicf("entry %d conflict with committed entry [committed(%d)]", ci, l.committed)
  71. default:
  72. offset := index + 1
  73. l.append(ents[ci-offset:]...)
  74. }
  75. l.commitTo(min(committed, lastnewi))
  76. return lastnewi, true
  77. }
  78. return 0, false
  79. }
  80. func (l *raftLog) append(ents ...pb.Entry) uint64 {
  81. if len(ents) == 0 {
  82. return l.lastIndex()
  83. }
  84. if after := ents[0].Index - 1; after < l.committed {
  85. log.Panicf("after(%d) is out of range [committed(%d)]", after, l.committed)
  86. }
  87. l.unstable.truncateAndAppend(ents)
  88. return l.lastIndex()
  89. }
  90. // findConflict finds the index of the conflict.
  91. // It returns the first pair of conflicting entries between the existing
  92. // entries and the given entries, if there are any.
  93. // If there is no conflicting entries, and the existing entries contains
  94. // all the given entries, zero will be returned.
  95. // If there is no conflicting entries, but the given entries contains new
  96. // entries, the index of the first new entry will be returned.
  97. // An entry is considered to be conflicting if it has the same index but
  98. // a different term.
  99. // The first entry MUST have an index equal to the argument 'from'.
  100. // The index of the given entries MUST be continuously increasing.
  101. func (l *raftLog) findConflict(ents []pb.Entry) uint64 {
  102. for _, ne := range ents {
  103. if !l.matchTerm(ne.Index, ne.Term) {
  104. if ne.Index <= l.lastIndex() {
  105. log.Printf("raftlog: found conflict at index %d [existing term: %d, conflicting term: %d]",
  106. ne.Index, l.term(ne.Index), ne.Term)
  107. }
  108. return ne.Index
  109. }
  110. }
  111. return 0
  112. }
  113. func (l *raftLog) unstableEntries() []pb.Entry {
  114. if len(l.unstable.entries) == 0 {
  115. return nil
  116. }
  117. return l.unstable.entries
  118. }
  119. // nextEnts returns all the available entries for execution.
  120. // If applied is smaller than the index of snapshot, it returns all committed
  121. // entries after the index of snapshot.
  122. func (l *raftLog) nextEnts() (ents []pb.Entry) {
  123. off := max(l.applied+1, l.firstIndex())
  124. if l.committed+1 > off {
  125. return l.slice(off, l.committed+1)
  126. }
  127. return nil
  128. }
  129. func (l *raftLog) snapshot() (pb.Snapshot, error) {
  130. if l.unstable.snapshot != nil {
  131. return *l.unstable.snapshot, nil
  132. }
  133. return l.storage.Snapshot()
  134. }
  135. func (l *raftLog) firstIndex() uint64 {
  136. if i, ok := l.unstable.maybeFirstIndex(); ok {
  137. return i
  138. }
  139. index, err := l.storage.FirstIndex()
  140. if err != nil {
  141. panic(err) // TODO(bdarnell)
  142. }
  143. return index
  144. }
  145. func (l *raftLog) lastIndex() uint64 {
  146. if i, ok := l.unstable.maybeLastIndex(); ok {
  147. return i
  148. }
  149. i, err := l.storage.LastIndex()
  150. if err != nil {
  151. panic(err) // TODO(bdarnell)
  152. }
  153. return i
  154. }
  155. func (l *raftLog) commitTo(tocommit uint64) {
  156. // never decrease commit
  157. if l.committed < tocommit {
  158. if l.lastIndex() < tocommit {
  159. log.Panicf("tocommit(%d) is out of range [lastIndex(%d)]", tocommit, l.lastIndex())
  160. }
  161. l.committed = tocommit
  162. }
  163. }
  164. func (l *raftLog) appliedTo(i uint64) {
  165. if i == 0 {
  166. return
  167. }
  168. if l.committed < i || i < l.applied {
  169. log.Panicf("applied(%d) is out of range [prevApplied(%d), committed(%d)]", i, l.applied, l.committed)
  170. }
  171. l.applied = i
  172. }
  173. func (l *raftLog) stableTo(i, t uint64) { l.unstable.stableTo(i, t) }
  174. func (l *raftLog) stableSnapTo(i uint64) { l.unstable.stableSnapTo(i) }
  175. func (l *raftLog) lastTerm() uint64 { return l.term(l.lastIndex()) }
  176. func (l *raftLog) term(i uint64) uint64 {
  177. // the valid term range is [index of dummy entry, last index]
  178. dummyIndex := l.firstIndex() - 1
  179. if i < dummyIndex || i > l.lastIndex() {
  180. return 0
  181. }
  182. if t, ok := l.unstable.maybeTerm(i); ok {
  183. return t
  184. }
  185. t, err := l.storage.Term(i)
  186. if err == nil {
  187. return t
  188. }
  189. if err == ErrCompacted {
  190. return 0
  191. }
  192. panic(err) // TODO(bdarnell)
  193. }
  194. func (l *raftLog) entries(i uint64) []pb.Entry {
  195. if i > l.lastIndex() {
  196. return nil
  197. }
  198. return l.slice(i, l.lastIndex()+1)
  199. }
  200. // allEntries returns all entries in the log.
  201. func (l *raftLog) allEntries() []pb.Entry { return l.entries(l.firstIndex()) }
  202. // isUpToDate determines if the given (lastIndex,term) log is more up-to-date
  203. // by comparing the index and term of the last entries in the existing logs.
  204. // If the logs have last entries with different terms, then the log with the
  205. // later term is more up-to-date. If the logs end with the same term, then
  206. // whichever log has the larger lastIndex is more up-to-date. If the logs are
  207. // the same, the given log is up-to-date.
  208. func (l *raftLog) isUpToDate(lasti, term uint64) bool {
  209. return term > l.lastTerm() || (term == l.lastTerm() && lasti >= l.lastIndex())
  210. }
  211. func (l *raftLog) matchTerm(i, term uint64) bool { return l.term(i) == term }
  212. func (l *raftLog) maybeCommit(maxIndex, term uint64) bool {
  213. if maxIndex > l.committed && l.term(maxIndex) == term {
  214. l.commitTo(maxIndex)
  215. return true
  216. }
  217. return false
  218. }
  219. func (l *raftLog) restore(s pb.Snapshot) {
  220. log.Printf("raftlog: log [%s] starts to restore snapshot [index: %d, term: %d]", l, s.Metadata.Index, s.Metadata.Term)
  221. l.committed = s.Metadata.Index
  222. l.unstable.restore(s)
  223. }
  224. // slice returns a slice of log entries from lo through hi-1, inclusive.
  225. func (l *raftLog) slice(lo uint64, hi uint64) []pb.Entry {
  226. l.mustCheckOutOfBounds(lo, hi)
  227. if lo == hi {
  228. return nil
  229. }
  230. var ents []pb.Entry
  231. if lo < l.unstable.offset {
  232. storedEnts, err := l.storage.Entries(lo, min(hi, l.unstable.offset))
  233. if err == ErrCompacted {
  234. // This should never fail because it has been checked before.
  235. log.Panicf("entries[%d:%d) from storage is out of bound", lo, min(hi, l.unstable.offset))
  236. } else if err == ErrUnavailable {
  237. log.Panicf("entries[%d:%d) is unavailable from storage", lo, min(hi, l.unstable.offset))
  238. } else if err != nil {
  239. panic(err) // TODO(bdarnell)
  240. }
  241. ents = storedEnts
  242. }
  243. if hi > l.unstable.offset {
  244. unstable := l.unstable.slice(max(lo, l.unstable.offset), hi)
  245. if len(ents) > 0 {
  246. ents = append([]pb.Entry{}, ents...)
  247. ents = append(ents, unstable...)
  248. } else {
  249. ents = unstable
  250. }
  251. }
  252. return ents
  253. }
  254. // l.firstIndex <= lo <= hi <= l.firstIndex + len(l.entries)
  255. func (l *raftLog) mustCheckOutOfBounds(lo, hi uint64) {
  256. if lo > hi {
  257. log.Panicf("raft: invalid slice %d > %d", lo, hi)
  258. }
  259. length := l.lastIndex() - l.firstIndex() + 1
  260. if lo < l.firstIndex() || hi > l.firstIndex()+length {
  261. log.Panicf("raft: slice[%d,%d) out of bound [%d,%d]", lo, hi, l.firstIndex(), l.lastIndex())
  262. }
  263. }