storage.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package raft
  15. import (
  16. "errors"
  17. "sync"
  18. pb "go.etcd.io/etcd/raft/raftpb"
  19. )
  20. // ErrCompacted is returned by Storage.Entries/Compact when a requested
  21. // index is unavailable because it predates the last snapshot.
  22. var ErrCompacted = errors.New("requested index is unavailable due to compaction")
  23. // ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested
  24. // index is older than the existing snapshot.
  25. var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot")
  26. // ErrUnavailable is returned by Storage interface when the requested log entries
  27. // are unavailable.
  28. var ErrUnavailable = errors.New("requested entry at index is unavailable")
  29. // ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required
  30. // snapshot is temporarily unavailable.
  31. var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable")
  32. // Storage is an interface that may be implemented by the application
  33. // to retrieve log entries from storage.
  34. //
  35. // If any Storage method returns an error, the raft instance will
  36. // become inoperable and refuse to participate in elections; the
  37. // application is responsible for cleanup and recovery in this case.
  38. type Storage interface {
  39. // TODO(tbg): split this into two interfaces, LogStorage and StateStorage.
  40. // InitialState returns the saved HardState and ConfState information.
  41. InitialState() (pb.HardState, pb.ConfState, error)
  42. // Entries returns a slice of log entries in the range [lo,hi).
  43. // MaxSize limits the total size of the log entries returned, but
  44. // Entries returns at least one entry if any.
  45. Entries(lo, hi, maxSize uint64) ([]pb.Entry, error)
  46. // Term returns the term of entry i, which must be in the range
  47. // [FirstIndex()-1, LastIndex()]. The term of the entry before
  48. // FirstIndex is retained for matching purposes even though the
  49. // rest of that entry may not be available.
  50. Term(i uint64) (uint64, error)
  51. // LastIndex returns the index of the last entry in the log.
  52. LastIndex() (uint64, error)
  53. // FirstIndex returns the index of the first log entry that is
  54. // possibly available via Entries (older entries have been incorporated
  55. // into the latest Snapshot; if storage only contains the dummy entry the
  56. // first log entry is not available).
  57. FirstIndex() (uint64, error)
  58. // Snapshot returns the most recent snapshot.
  59. // If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable,
  60. // so raft state machine could know that Storage needs some time to prepare
  61. // snapshot and call Snapshot later.
  62. Snapshot() (pb.Snapshot, error)
  63. }
  64. // MemoryStorage implements the Storage interface backed by an
  65. // in-memory array.
  66. type MemoryStorage struct {
  67. // Protects access to all fields. Most methods of MemoryStorage are
  68. // run on the raft goroutine, but Append() is run on an application
  69. // goroutine.
  70. sync.Mutex
  71. hardState pb.HardState
  72. snapshot pb.Snapshot
  73. // ents[i] has raft log position i+snapshot.Metadata.Index
  74. ents []pb.Entry
  75. }
  76. // NewMemoryStorage creates an empty MemoryStorage.
  77. func NewMemoryStorage() *MemoryStorage {
  78. return &MemoryStorage{
  79. // When starting from scratch populate the list with a dummy entry at term zero.
  80. ents: make([]pb.Entry, 1),
  81. }
  82. }
  83. // InitialState implements the Storage interface.
  84. func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) {
  85. return ms.hardState, ms.snapshot.Metadata.ConfState, nil
  86. }
  87. // SetHardState saves the current HardState.
  88. func (ms *MemoryStorage) SetHardState(st pb.HardState) error {
  89. ms.Lock()
  90. defer ms.Unlock()
  91. ms.hardState = st
  92. return nil
  93. }
  94. // Entries implements the Storage interface.
  95. func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) {
  96. ms.Lock()
  97. defer ms.Unlock()
  98. offset := ms.ents[0].Index
  99. if lo <= offset {
  100. return nil, ErrCompacted
  101. }
  102. if hi > ms.lastIndex()+1 {
  103. raftLogger.Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex())
  104. }
  105. // only contains dummy entries.
  106. if len(ms.ents) == 1 {
  107. return nil, ErrUnavailable
  108. }
  109. ents := ms.ents[lo-offset : hi-offset]
  110. return limitSize(ents, maxSize), nil
  111. }
  112. // Term implements the Storage interface.
  113. func (ms *MemoryStorage) Term(i uint64) (uint64, error) {
  114. ms.Lock()
  115. defer ms.Unlock()
  116. offset := ms.ents[0].Index
  117. if i < offset {
  118. return 0, ErrCompacted
  119. }
  120. if int(i-offset) >= len(ms.ents) {
  121. return 0, ErrUnavailable
  122. }
  123. return ms.ents[i-offset].Term, nil
  124. }
  125. // LastIndex implements the Storage interface.
  126. func (ms *MemoryStorage) LastIndex() (uint64, error) {
  127. ms.Lock()
  128. defer ms.Unlock()
  129. return ms.lastIndex(), nil
  130. }
  131. func (ms *MemoryStorage) lastIndex() uint64 {
  132. return ms.ents[0].Index + uint64(len(ms.ents)) - 1
  133. }
  134. // FirstIndex implements the Storage interface.
  135. func (ms *MemoryStorage) FirstIndex() (uint64, error) {
  136. ms.Lock()
  137. defer ms.Unlock()
  138. return ms.firstIndex(), nil
  139. }
  140. func (ms *MemoryStorage) firstIndex() uint64 {
  141. return ms.ents[0].Index + 1
  142. }
  143. // Snapshot implements the Storage interface.
  144. func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) {
  145. ms.Lock()
  146. defer ms.Unlock()
  147. return ms.snapshot, nil
  148. }
  149. // ApplySnapshot overwrites the contents of this Storage object with
  150. // those of the given snapshot.
  151. func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error {
  152. ms.Lock()
  153. defer ms.Unlock()
  154. //handle check for old snapshot being applied
  155. msIndex := ms.snapshot.Metadata.Index
  156. snapIndex := snap.Metadata.Index
  157. if msIndex >= snapIndex {
  158. return ErrSnapOutOfDate
  159. }
  160. ms.snapshot = snap
  161. ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}}
  162. return nil
  163. }
  164. // CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and
  165. // can be used to reconstruct the state at that point.
  166. // If any configuration changes have been made since the last compaction,
  167. // the result of the last ApplyConfChange must be passed in.
  168. func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) {
  169. ms.Lock()
  170. defer ms.Unlock()
  171. if i <= ms.snapshot.Metadata.Index {
  172. return pb.Snapshot{}, ErrSnapOutOfDate
  173. }
  174. offset := ms.ents[0].Index
  175. if i > ms.lastIndex() {
  176. raftLogger.Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex())
  177. }
  178. ms.snapshot.Metadata.Index = i
  179. ms.snapshot.Metadata.Term = ms.ents[i-offset].Term
  180. if cs != nil {
  181. ms.snapshot.Metadata.ConfState = *cs
  182. }
  183. ms.snapshot.Data = data
  184. return ms.snapshot, nil
  185. }
  186. // Compact discards all log entries prior to compactIndex.
  187. // It is the application's responsibility to not attempt to compact an index
  188. // greater than raftLog.applied.
  189. func (ms *MemoryStorage) Compact(compactIndex uint64) error {
  190. ms.Lock()
  191. defer ms.Unlock()
  192. offset := ms.ents[0].Index
  193. if compactIndex <= offset {
  194. return ErrCompacted
  195. }
  196. if compactIndex > ms.lastIndex() {
  197. raftLogger.Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex())
  198. }
  199. i := compactIndex - offset
  200. ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i)
  201. ents[0].Index = ms.ents[i].Index
  202. ents[0].Term = ms.ents[i].Term
  203. ents = append(ents, ms.ents[i+1:]...)
  204. ms.ents = ents
  205. return nil
  206. }
  207. // Append the new entries to storage.
  208. // TODO (xiangli): ensure the entries are continuous and
  209. // entries[0].Index > ms.entries[0].Index
  210. func (ms *MemoryStorage) Append(entries []pb.Entry) error {
  211. if len(entries) == 0 {
  212. return nil
  213. }
  214. ms.Lock()
  215. defer ms.Unlock()
  216. first := ms.firstIndex()
  217. last := entries[0].Index + uint64(len(entries)) - 1
  218. // shortcut if there is no new entry.
  219. if last < first {
  220. return nil
  221. }
  222. // truncate compacted entries
  223. if first > entries[0].Index {
  224. entries = entries[first-entries[0].Index:]
  225. }
  226. offset := entries[0].Index - ms.ents[0].Index
  227. switch {
  228. case uint64(len(ms.ents)) > offset:
  229. ms.ents = append([]pb.Entry{}, ms.ents[:offset]...)
  230. ms.ents = append(ms.ents, entries...)
  231. case uint64(len(ms.ents)) == offset:
  232. ms.ents = append(ms.ents, entries...)
  233. default:
  234. raftLogger.Panicf("missing log entry [last: %d, append at: %d]",
  235. ms.lastIndex(), entries[0].Index)
  236. }
  237. return nil
  238. }