|
@@ -57,6 +57,7 @@ var (
|
|
|
ErrCRCMismatch = errors.New("wal: crc mismatch")
|
|
ErrCRCMismatch = errors.New("wal: crc mismatch")
|
|
|
ErrSnapshotMismatch = errors.New("wal: snapshot mismatch")
|
|
ErrSnapshotMismatch = errors.New("wal: snapshot mismatch")
|
|
|
ErrSnapshotNotFound = errors.New("wal: snapshot not found")
|
|
ErrSnapshotNotFound = errors.New("wal: snapshot not found")
|
|
|
|
|
+ ErrZeroTrailer = errors.New("wal: zero trailer")
|
|
|
crcTable = crc32.MakeTable(crc32.Castagnoli)
|
|
crcTable = crc32.MakeTable(crc32.Castagnoli)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
@@ -72,7 +73,7 @@ type WAL struct {
|
|
|
|
|
|
|
|
start walpb.Snapshot // snapshot to start reading
|
|
start walpb.Snapshot // snapshot to start reading
|
|
|
decoder *decoder // decoder to decode records
|
|
decoder *decoder // decoder to decode records
|
|
|
- readClose io.Closer // closer for decode reader
|
|
|
|
|
|
|
+ readClose func() error // closer for decode reader
|
|
|
|
|
|
|
|
mu sync.Mutex
|
|
mu sync.Mutex
|
|
|
enti uint64 // index of the last entry saved to the wal
|
|
enti uint64 // index of the last entry saved to the wal
|
|
@@ -93,10 +94,16 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
p := path.Join(dirpath, walName(0, 0))
|
|
p := path.Join(dirpath, walName(0, 0))
|
|
|
- f, err := fileutil.LockFile(p, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0600)
|
|
|
|
|
|
|
+ f, err := fileutil.LockFile(p, os.O_WRONLY|os.O_CREATE, 0600)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
return nil, err
|
|
return nil, err
|
|
|
}
|
|
}
|
|
|
|
|
+ if _, err := f.Seek(0, os.SEEK_END); err != nil {
|
|
|
|
|
+ return nil, err
|
|
|
|
|
+ }
|
|
|
|
|
+ if err := fileutil.Preallocate(f.File, segmentSizeBytes, true); err != nil {
|
|
|
|
|
+ return nil, err
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
w := &WAL{
|
|
w := &WAL{
|
|
|
dir: dirpath,
|
|
dir: dirpath,
|
|
@@ -149,13 +156,14 @@ func openAtIndex(dirpath string, snap walpb.Snapshot, write bool) (*WAL, error)
|
|
|
|
|
|
|
|
// open the wal files
|
|
// open the wal files
|
|
|
rcs := make([]io.ReadCloser, 0)
|
|
rcs := make([]io.ReadCloser, 0)
|
|
|
|
|
+ rs := make([]io.Reader, 0)
|
|
|
ls := make([]*fileutil.LockedFile, 0)
|
|
ls := make([]*fileutil.LockedFile, 0)
|
|
|
for _, name := range names[nameIndex:] {
|
|
for _, name := range names[nameIndex:] {
|
|
|
p := path.Join(dirpath, name)
|
|
p := path.Join(dirpath, name)
|
|
|
if write {
|
|
if write {
|
|
|
l, err := fileutil.TryLockFile(p, os.O_RDWR, 0600)
|
|
l, err := fileutil.TryLockFile(p, os.O_RDWR, 0600)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- MultiReadCloser(rcs...).Close()
|
|
|
|
|
|
|
+ closeAll(rcs...)
|
|
|
return nil, err
|
|
return nil, err
|
|
|
}
|
|
}
|
|
|
ls = append(ls, l)
|
|
ls = append(ls, l)
|
|
@@ -163,37 +171,38 @@ func openAtIndex(dirpath string, snap walpb.Snapshot, write bool) (*WAL, error)
|
|
|
} else {
|
|
} else {
|
|
|
rf, err := os.OpenFile(p, os.O_RDONLY, 0600)
|
|
rf, err := os.OpenFile(p, os.O_RDONLY, 0600)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
|
|
+ closeAll(rcs...)
|
|
|
return nil, err
|
|
return nil, err
|
|
|
}
|
|
}
|
|
|
ls = append(ls, nil)
|
|
ls = append(ls, nil)
|
|
|
rcs = append(rcs, rf)
|
|
rcs = append(rcs, rf)
|
|
|
}
|
|
}
|
|
|
|
|
+ rs = append(rs, rcs[len(rcs)-1])
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- rc := MultiReadCloser(rcs...)
|
|
|
|
|
- c := rc
|
|
|
|
|
- if write {
|
|
|
|
|
- // write reuses the file descriptors from read; don't close so
|
|
|
|
|
- // WAL can append without dropping the file lock
|
|
|
|
|
- c = nil
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ closer := func() error { return closeAll(rcs...) }
|
|
|
|
|
|
|
|
// create a WAL ready for reading
|
|
// create a WAL ready for reading
|
|
|
w := &WAL{
|
|
w := &WAL{
|
|
|
dir: dirpath,
|
|
dir: dirpath,
|
|
|
start: snap,
|
|
start: snap,
|
|
|
- decoder: newDecoder(rc),
|
|
|
|
|
- readClose: c,
|
|
|
|
|
|
|
+ decoder: newDecoder(rs...),
|
|
|
|
|
+ readClose: closer,
|
|
|
locks: ls,
|
|
locks: ls,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if write {
|
|
if write {
|
|
|
|
|
+ // write reuses the file descriptors from read; don't close so
|
|
|
|
|
+ // WAL can append without dropping the file lock
|
|
|
|
|
+ w.readClose = nil
|
|
|
|
|
+
|
|
|
if _, _, err := parseWalName(path.Base(w.tail().Name())); err != nil {
|
|
if _, _, err := parseWalName(path.Base(w.tail().Name())); err != nil {
|
|
|
- rc.Close()
|
|
|
|
|
|
|
+ closer()
|
|
|
return nil, err
|
|
return nil, err
|
|
|
}
|
|
}
|
|
|
|
|
+ // don't resize file for preallocation in case tail is corrupted
|
|
|
if err := fileutil.Preallocate(w.tail().File, segmentSizeBytes, false); err != nil {
|
|
if err := fileutil.Preallocate(w.tail().File, segmentSizeBytes, false); err != nil {
|
|
|
- rc.Close()
|
|
|
|
|
|
|
+ closer()
|
|
|
plog.Errorf("failed to allocate space when creating new wal file (%v)", err)
|
|
plog.Errorf("failed to allocate space when creating new wal file (%v)", err)
|
|
|
return nil, err
|
|
return nil, err
|
|
|
}
|
|
}
|
|
@@ -261,6 +270,9 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ if err == ErrZeroTrailer {
|
|
|
|
|
+ err = io.EOF
|
|
|
|
|
+ }
|
|
|
switch w.tail() {
|
|
switch w.tail() {
|
|
|
case nil:
|
|
case nil:
|
|
|
// We do not have to read out all entries in read mode.
|
|
// We do not have to read out all entries in read mode.
|
|
@@ -285,7 +297,7 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
|
|
|
|
|
|
|
// close decoder, disable reading
|
|
// close decoder, disable reading
|
|
|
if w.readClose != nil {
|
|
if w.readClose != nil {
|
|
|
- w.readClose.Close()
|
|
|
|
|
|
|
+ w.readClose()
|
|
|
w.readClose = nil
|
|
w.readClose = nil
|
|
|
}
|
|
}
|
|
|
w.start = walpb.Snapshot{}
|
|
w.start = walpb.Snapshot{}
|
|
@@ -294,6 +306,7 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
|
|
|
|
|
|
|
if w.tail() != nil {
|
|
if w.tail() != nil {
|
|
|
// create encoder (chain crc with the decoder), enable appending
|
|
// create encoder (chain crc with the decoder), enable appending
|
|
|
|
|
+ _, err = w.tail().Seek(w.decoder.lastOffset(), os.SEEK_SET)
|
|
|
w.encoder = newEncoder(w.tail(), w.decoder.lastCRC())
|
|
w.encoder = newEncoder(w.tail(), w.decoder.lastCRC())
|
|
|
lastIndexSaved.Set(float64(w.enti))
|
|
lastIndexSaved.Set(float64(w.enti))
|
|
|
}
|
|
}
|
|
@@ -306,7 +319,14 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
|
|
// cut first creates a temp wal file and writes necessary headers into it.
|
|
// cut first creates a temp wal file and writes necessary headers into it.
|
|
|
// Then cut atomically rename temp wal file to a wal file.
|
|
// Then cut atomically rename temp wal file to a wal file.
|
|
|
func (w *WAL) cut() error {
|
|
func (w *WAL) cut() error {
|
|
|
- // close old wal file
|
|
|
|
|
|
|
+ // close old wal file; truncate to avoid wasting space if an early cut
|
|
|
|
|
+ off, serr := w.tail().Seek(0, os.SEEK_CUR)
|
|
|
|
|
+ if serr != nil {
|
|
|
|
|
+ return serr
|
|
|
|
|
+ }
|
|
|
|
|
+ if err := w.tail().Truncate(off); err != nil {
|
|
|
|
|
+ return err
|
|
|
|
|
+ }
|
|
|
if err := w.sync(); err != nil {
|
|
if err := w.sync(); err != nil {
|
|
|
return err
|
|
return err
|
|
|
}
|
|
}
|
|
@@ -342,15 +362,19 @@ func (w *WAL) cut() error {
|
|
|
}
|
|
}
|
|
|
newTail.Close()
|
|
newTail.Close()
|
|
|
|
|
|
|
|
- if newTail, err = fileutil.LockFile(fpath, os.O_WRONLY|os.O_APPEND, 0600); err != nil {
|
|
|
|
|
|
|
+ if newTail, err = fileutil.LockFile(fpath, os.O_WRONLY, 0600); err != nil {
|
|
|
return err
|
|
return err
|
|
|
}
|
|
}
|
|
|
|
|
+ if _, err = newTail.Seek(0, os.SEEK_END); err != nil {
|
|
|
|
|
+ return err
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
w.locks[len(w.locks)-1] = newTail
|
|
w.locks[len(w.locks)-1] = newTail
|
|
|
|
|
|
|
|
prevCrc = w.encoder.crc.Sum32()
|
|
prevCrc = w.encoder.crc.Sum32()
|
|
|
w.encoder = newEncoder(w.tail(), prevCrc)
|
|
w.encoder = newEncoder(w.tail(), prevCrc)
|
|
|
|
|
|
|
|
- if err = fileutil.Preallocate(w.tail().File, segmentSizeBytes, false); err != nil {
|
|
|
|
|
|
|
+ if err = fileutil.Preallocate(w.tail().File, segmentSizeBytes, true); err != nil {
|
|
|
plog.Errorf("failed to allocate space when creating new wal file (%v)", err)
|
|
plog.Errorf("failed to allocate space when creating new wal file (%v)", err)
|
|
|
return err
|
|
return err
|
|
|
}
|
|
}
|
|
@@ -478,11 +502,11 @@ func (w *WAL) Save(st raftpb.HardState, ents []raftpb.Entry) error {
|
|
|
return err
|
|
return err
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- fstat, err := w.tail().Stat()
|
|
|
|
|
|
|
+ curOff, err := w.tail().Seek(0, os.SEEK_CUR)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
return err
|
|
return err
|
|
|
}
|
|
}
|
|
|
- if fstat.Size() < segmentSizeBytes {
|
|
|
|
|
|
|
+ if curOff < segmentSizeBytes {
|
|
|
if mustSync {
|
|
if mustSync {
|
|
|
return w.sync()
|
|
return w.sync()
|
|
|
}
|
|
}
|
|
@@ -544,3 +568,12 @@ func mustSync(st, prevst raftpb.HardState, entsnum int) bool {
|
|
|
}
|
|
}
|
|
|
return false
|
|
return false
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+func closeAll(rcs ...io.ReadCloser) error {
|
|
|
|
|
+ for _, f := range rcs {
|
|
|
|
|
+ if err := f.Close(); err != nil {
|
|
|
|
|
+ return err
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil
|
|
|
|
|
+}
|