123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- // Copyright 2015 The etcd Authors
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package raft
- import (
- "errors"
- "sync"
- pb "github.com/coreos/etcd/raft/raftpb"
- )
- // ErrCompacted is returned by Storage.Entries/Compact when a requested
- // index is unavailable because it predates the last snapshot.
- var ErrCompacted = errors.New("requested index is unavailable due to compaction")
- // ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested
- // index is older than the existing snapshot.
- var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot")
- // ErrUnavailable is returned by Storage interface when the requested log entries
- // are unavailable.
- var ErrUnavailable = errors.New("requested entry at index is unavailable")
- // ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required
- // snapshot is temporarily unavailable.
- var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable")
- // Storage is an interface that may be implemented by the application
- // to retrieve log entries from storage.
- //
- // If any Storage method returns an error, the raft instance will
- // become inoperable and refuse to participate in elections; the
- // application is responsible for cleanup and recovery in this case.
- type Storage interface {
- // InitialState returns the saved HardState and ConfState information.
- InitialState() (pb.HardState, pb.ConfState, error)
- // Entries returns a slice of log entries in the range [lo,hi).
- // MaxSize limits the total size of the log entries returned, but
- // Entries returns at least one entry if any.
- Entries(lo, hi, maxSize uint64) ([]pb.Entry, error)
- // Term returns the term of entry i, which must be in the range
- // [FirstIndex()-1, LastIndex()]. The term of the entry before
- // FirstIndex is retained for matching purposes even though the
- // rest of that entry may not be available.
- Term(i uint64) (uint64, error)
- // LastIndex returns the index of the last entry in the log.
- LastIndex() (uint64, error)
- // FirstIndex returns the index of the first log entry that is
- // possibly available via Entries (older entries have been incorporated
- // into the latest Snapshot; if storage only contains the dummy entry the
- // first log entry is not available).
- FirstIndex() (uint64, error)
- // Snapshot returns the most recent snapshot.
- // If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable,
- // so raft state machine could know that Storage needs some time to prepare
- // snapshot and call Snapshot later.
- Snapshot() (pb.Snapshot, error)
- }
- // MemoryStorage implements the Storage interface backed by an
- // in-memory array.
- type MemoryStorage struct {
- // Protects access to all fields. Most methods of MemoryStorage are
- // run on the raft goroutine, but Append() is run on an application
- // goroutine.
- sync.Mutex
- hardState pb.HardState
- snapshot pb.Snapshot
- // ents[i] has raft log position i+snapshot.Metadata.Index
- ents []pb.Entry
- }
- // NewMemoryStorage creates an empty MemoryStorage.
- func NewMemoryStorage() *MemoryStorage {
- return &MemoryStorage{
- // When starting from scratch populate the list with a dummy entry at term zero.
- ents: make([]pb.Entry, 1),
- }
- }
- // InitialState implements the Storage interface.
- func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) {
- return ms.hardState, ms.snapshot.Metadata.ConfState, nil
- }
- // SetHardState saves the current HardState.
- func (ms *MemoryStorage) SetHardState(st pb.HardState) error {
- ms.Lock()
- defer ms.Unlock()
- ms.hardState = st
- return nil
- }
- // Entries implements the Storage interface.
- func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) {
- ms.Lock()
- defer ms.Unlock()
- offset := ms.ents[0].Index
- if lo <= offset {
- return nil, ErrCompacted
- }
- if hi > ms.lastIndex()+1 {
- raftLogger.Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex())
- }
- // only contains dummy entries.
- if len(ms.ents) == 1 {
- return nil, ErrUnavailable
- }
- ents := ms.ents[lo-offset : hi-offset]
- return limitSize(ents, maxSize), nil
- }
- // Term implements the Storage interface.
- func (ms *MemoryStorage) Term(i uint64) (uint64, error) {
- ms.Lock()
- defer ms.Unlock()
- offset := ms.ents[0].Index
- if i < offset {
- return 0, ErrCompacted
- }
- if int(i-offset) >= len(ms.ents) {
- return 0, ErrUnavailable
- }
- return ms.ents[i-offset].Term, nil
- }
- // LastIndex implements the Storage interface.
- func (ms *MemoryStorage) LastIndex() (uint64, error) {
- ms.Lock()
- defer ms.Unlock()
- return ms.lastIndex(), nil
- }
- func (ms *MemoryStorage) lastIndex() uint64 {
- return ms.ents[0].Index + uint64(len(ms.ents)) - 1
- }
- // FirstIndex implements the Storage interface.
- func (ms *MemoryStorage) FirstIndex() (uint64, error) {
- ms.Lock()
- defer ms.Unlock()
- return ms.firstIndex(), nil
- }
- func (ms *MemoryStorage) firstIndex() uint64 {
- return ms.ents[0].Index + 1
- }
- // Snapshot implements the Storage interface.
- func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) {
- ms.Lock()
- defer ms.Unlock()
- return ms.snapshot, nil
- }
- // ApplySnapshot overwrites the contents of this Storage object with
- // those of the given snapshot.
- func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error {
- ms.Lock()
- defer ms.Unlock()
- //handle check for old snapshot being applied
- msIndex := ms.snapshot.Metadata.Index
- snapIndex := snap.Metadata.Index
- if msIndex >= snapIndex {
- return ErrSnapOutOfDate
- }
- ms.snapshot = snap
- ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}}
- return nil
- }
- // CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and
- // can be used to reconstruct the state at that point.
- // If any configuration changes have been made since the last compaction,
- // the result of the last ApplyConfChange must be passed in.
- func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) {
- ms.Lock()
- defer ms.Unlock()
- if i <= ms.snapshot.Metadata.Index {
- return pb.Snapshot{}, ErrSnapOutOfDate
- }
- offset := ms.ents[0].Index
- if i > ms.lastIndex() {
- raftLogger.Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex())
- }
- ms.snapshot.Metadata.Index = i
- ms.snapshot.Metadata.Term = ms.ents[i-offset].Term
- if cs != nil {
- ms.snapshot.Metadata.ConfState = *cs
- }
- ms.snapshot.Data = data
- return ms.snapshot, nil
- }
- // Compact discards all log entries prior to compactIndex.
- // It is the application's responsibility to not attempt to compact an index
- // greater than raftLog.applied.
- func (ms *MemoryStorage) Compact(compactIndex uint64) error {
- ms.Lock()
- defer ms.Unlock()
- offset := ms.ents[0].Index
- if compactIndex <= offset {
- return ErrCompacted
- }
- if compactIndex > ms.lastIndex() {
- raftLogger.Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex())
- }
- i := compactIndex - offset
- ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i)
- ents[0].Index = ms.ents[i].Index
- ents[0].Term = ms.ents[i].Term
- ents = append(ents, ms.ents[i+1:]...)
- ms.ents = ents
- return nil
- }
- // Append the new entries to storage.
- // TODO (xiangli): ensure the entries are continuous and
- // entries[0].Index > ms.entries[0].Index
- func (ms *MemoryStorage) Append(entries []pb.Entry) error {
- if len(entries) == 0 {
- return nil
- }
- ms.Lock()
- defer ms.Unlock()
- first := ms.firstIndex()
- last := entries[0].Index + uint64(len(entries)) - 1
- // shortcut if there is no new entry.
- if last < first {
- return nil
- }
- // truncate compacted entries
- if first > entries[0].Index {
- entries = entries[first-entries[0].Index:]
- }
- offset := entries[0].Index - ms.ents[0].Index
- switch {
- case uint64(len(ms.ents)) > offset:
- ms.ents = append([]pb.Entry{}, ms.ents[:offset]...)
- ms.ents = append(ms.ents, entries...)
- case uint64(len(ms.ents)) == offset:
- ms.ents = append(ms.ents, entries...)
- default:
- raftLogger.Panicf("missing log entry [last: %d, append at: %d]",
- ms.lastIndex(), entries[0].Index)
- }
- return nil
- }
|