|
@@ -38,6 +38,7 @@ const (
|
|
|
entryType
|
|
entryType
|
|
|
stateType
|
|
stateType
|
|
|
crcType
|
|
crcType
|
|
|
|
|
+ snapshotType
|
|
|
|
|
|
|
|
// the owner can make/remove files inside the directory
|
|
// the owner can make/remove files inside the directory
|
|
|
privateDirMode = 0700
|
|
privateDirMode = 0700
|
|
@@ -47,6 +48,8 @@ var (
|
|
|
ErrMetadataConflict = errors.New("wal: conflicting metadata found")
|
|
ErrMetadataConflict = errors.New("wal: conflicting metadata found")
|
|
|
ErrFileNotFound = errors.New("wal: file not found")
|
|
ErrFileNotFound = errors.New("wal: file not found")
|
|
|
ErrCRCMismatch = errors.New("wal: crc mismatch")
|
|
ErrCRCMismatch = errors.New("wal: crc mismatch")
|
|
|
|
|
+ ErrSnapshotMismatch = errors.New("wal: snapshot mismatch")
|
|
|
|
|
+ ErrSnapshotNotFound = errors.New("wal: snapshot not found")
|
|
|
crcTable = crc32.MakeTable(crc32.Castagnoli)
|
|
crcTable = crc32.MakeTable(crc32.Castagnoli)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
@@ -60,8 +63,8 @@ type WAL struct {
|
|
|
metadata []byte // metadata recorded at the head of each WAL
|
|
metadata []byte // metadata recorded at the head of each WAL
|
|
|
state raftpb.HardState // hardstate recorded at the head of WAL
|
|
state raftpb.HardState // hardstate recorded at the head of WAL
|
|
|
|
|
|
|
|
- ri uint64 // index of entry to start reading
|
|
|
|
|
- decoder *decoder // decoder to decode records
|
|
|
|
|
|
|
+ start walpb.Snapshot // snapshot to start reading
|
|
|
|
|
+ decoder *decoder // decoder to decode records
|
|
|
|
|
|
|
|
f *os.File // underlay file opened for appending, sync
|
|
f *os.File // underlay file opened for appending, sync
|
|
|
seq uint64 // sequence of the wal file currently used for writes
|
|
seq uint64 // sequence of the wal file currently used for writes
|
|
@@ -116,23 +119,23 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
|
|
|
return w, nil
|
|
return w, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// Open opens the WAL at the given index.
|
|
|
|
|
-// The index SHOULD have been previously committed to the WAL, or the following
|
|
|
|
|
|
|
+// Open opens the WAL at the given snap.
|
|
|
|
|
+// The snap SHOULD have been previously saved to the WAL, or the following
|
|
|
// ReadAll will fail.
|
|
// ReadAll will fail.
|
|
|
-// The returned WAL is ready to read and the first record will be the given
|
|
|
|
|
-// index. The WAL cannot be appended to before reading out all of its
|
|
|
|
|
|
|
+// The returned WAL is ready to read and the first record will be the one after
|
|
|
|
|
+// the given snap. The WAL cannot be appended to before reading out all of its
|
|
|
// previous records.
|
|
// previous records.
|
|
|
-func Open(dirpath string, index uint64) (*WAL, error) {
|
|
|
|
|
- return openAtIndex(dirpath, index, true)
|
|
|
|
|
|
|
+func Open(dirpath string, snap walpb.Snapshot) (*WAL, error) {
|
|
|
|
|
+ return openAtIndex(dirpath, snap, true)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// OpenNotInUse only opens the wal files that are not in use.
|
|
// OpenNotInUse only opens the wal files that are not in use.
|
|
|
// Other than that, it is similar to Open.
|
|
// Other than that, it is similar to Open.
|
|
|
-func OpenNotInUse(dirpath string, index uint64) (*WAL, error) {
|
|
|
|
|
- return openAtIndex(dirpath, index, false)
|
|
|
|
|
|
|
+func OpenNotInUse(dirpath string, snap walpb.Snapshot) (*WAL, error) {
|
|
|
|
|
+ return openAtIndex(dirpath, snap, false)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-func openAtIndex(dirpath string, index uint64, all bool) (*WAL, error) {
|
|
|
|
|
|
|
+func openAtIndex(dirpath string, snap walpb.Snapshot, all bool) (*WAL, error) {
|
|
|
names, err := fileutil.ReadDir(dirpath)
|
|
names, err := fileutil.ReadDir(dirpath)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
return nil, err
|
|
return nil, err
|
|
@@ -142,7 +145,7 @@ func openAtIndex(dirpath string, index uint64, all bool) (*WAL, error) {
|
|
|
return nil, ErrFileNotFound
|
|
return nil, ErrFileNotFound
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- nameIndex, ok := searchIndex(names, index)
|
|
|
|
|
|
|
+ nameIndex, ok := searchIndex(names, snap.Index)
|
|
|
if !ok || !isValidSeq(names[nameIndex:]) {
|
|
if !ok || !isValidSeq(names[nameIndex:]) {
|
|
|
return nil, ErrFileNotFound
|
|
return nil, ErrFileNotFound
|
|
|
}
|
|
}
|
|
@@ -189,7 +192,7 @@ func openAtIndex(dirpath string, index uint64, all bool) (*WAL, error) {
|
|
|
// create a WAL ready for reading
|
|
// create a WAL ready for reading
|
|
|
w := &WAL{
|
|
w := &WAL{
|
|
|
dir: dirpath,
|
|
dir: dirpath,
|
|
|
- ri: index,
|
|
|
|
|
|
|
+ start: snap,
|
|
|
decoder: newDecoder(rc),
|
|
decoder: newDecoder(rc),
|
|
|
|
|
|
|
|
f: f,
|
|
f: f,
|
|
@@ -200,18 +203,23 @@ func openAtIndex(dirpath string, index uint64, all bool) (*WAL, error) {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// ReadAll reads out all records of the current WAL.
|
|
// ReadAll reads out all records of the current WAL.
|
|
|
-// If it cannot read out the expected entry, it will return ErrIndexNotFound.
|
|
|
|
|
|
|
+// If it cannot read out the expected snap, it will return ErrSnapshotNotFound.
|
|
|
|
|
+// If loaded snap doesn't match with the expected one, it will return
|
|
|
|
|
+// ErrSnapshotMismatch.
|
|
|
|
|
+// TODO: detect not-last-snap error.
|
|
|
|
|
+// TODO: maybe loose the checking of match.
|
|
|
// After ReadAll, the WAL will be ready for appending new records.
|
|
// After ReadAll, the WAL will be ready for appending new records.
|
|
|
func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.Entry, err error) {
|
|
func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.Entry, err error) {
|
|
|
rec := &walpb.Record{}
|
|
rec := &walpb.Record{}
|
|
|
decoder := w.decoder
|
|
decoder := w.decoder
|
|
|
|
|
|
|
|
|
|
+ var match bool
|
|
|
for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
|
|
for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
|
|
|
switch rec.Type {
|
|
switch rec.Type {
|
|
|
case entryType:
|
|
case entryType:
|
|
|
e := mustUnmarshalEntry(rec.Data)
|
|
e := mustUnmarshalEntry(rec.Data)
|
|
|
- if e.Index >= w.ri {
|
|
|
|
|
- ents = append(ents[:e.Index-w.ri], e)
|
|
|
|
|
|
|
+ if e.Index > w.start.Index {
|
|
|
|
|
+ ents = append(ents[:e.Index-w.start.Index-1], e)
|
|
|
}
|
|
}
|
|
|
w.enti = e.Index
|
|
w.enti = e.Index
|
|
|
case stateType:
|
|
case stateType:
|
|
@@ -231,6 +239,16 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
|
|
return nil, state, nil, ErrCRCMismatch
|
|
return nil, state, nil, ErrCRCMismatch
|
|
|
}
|
|
}
|
|
|
decoder.updateCRC(rec.Crc)
|
|
decoder.updateCRC(rec.Crc)
|
|
|
|
|
+ case snapshotType:
|
|
|
|
|
+ var snap walpb.Snapshot
|
|
|
|
|
+ pbutil.MustUnmarshal(&snap, rec.Data)
|
|
|
|
|
+ if snap.Index == w.start.Index {
|
|
|
|
|
+ if snap.Term != w.start.Term {
|
|
|
|
|
+ state.Reset()
|
|
|
|
|
+ return nil, state, nil, ErrSnapshotMismatch
|
|
|
|
|
+ }
|
|
|
|
|
+ match = true
|
|
|
|
|
+ }
|
|
|
default:
|
|
default:
|
|
|
state.Reset()
|
|
state.Reset()
|
|
|
return nil, state, nil, fmt.Errorf("unexpected block type %d", rec.Type)
|
|
return nil, state, nil, fmt.Errorf("unexpected block type %d", rec.Type)
|
|
@@ -240,10 +258,14 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
|
|
state.Reset()
|
|
state.Reset()
|
|
|
return nil, state, nil, err
|
|
return nil, state, nil, err
|
|
|
}
|
|
}
|
|
|
|
|
+ if !match {
|
|
|
|
|
+ state.Reset()
|
|
|
|
|
+ return nil, state, nil, ErrSnapshotNotFound
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
// close decoder, disable reading
|
|
// close decoder, disable reading
|
|
|
w.decoder.close()
|
|
w.decoder.close()
|
|
|
- w.ri = 0
|
|
|
|
|
|
|
+ w.start = walpb.Snapshot{}
|
|
|
|
|
|
|
|
w.metadata = metadata
|
|
w.metadata = metadata
|
|
|
// create encoder (chain crc with the decoder), enable appending
|
|
// create encoder (chain crc with the decoder), enable appending
|
|
@@ -374,6 +396,19 @@ func (w *WAL) Save(st raftpb.HardState, ents []raftpb.Entry) error {
|
|
|
return w.sync()
|
|
return w.sync()
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+func (w *WAL) SaveSnapshot(e walpb.Snapshot) error {
|
|
|
|
|
+ b := pbutil.MustMarshal(&e)
|
|
|
|
|
+ rec := &walpb.Record{Type: snapshotType, Data: b}
|
|
|
|
|
+ if err := w.encoder.encode(rec); err != nil {
|
|
|
|
|
+ return err
|
|
|
|
|
+ }
|
|
|
|
|
+ // update enti only when snapshot is ahead of last index
|
|
|
|
|
+ if w.enti < e.Index {
|
|
|
|
|
+ w.enti = e.Index
|
|
|
|
|
+ }
|
|
|
|
|
+ return w.sync()
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
func (w *WAL) saveCrc(prevCrc uint32) error {
|
|
func (w *WAL) saveCrc(prevCrc uint32) error {
|
|
|
return w.encoder.encode(&walpb.Record{Type: crcType, Crc: prevCrc})
|
|
return w.encoder.encode(&walpb.Record{Type: crcType, Crc: prevCrc})
|
|
|
}
|
|
}
|