Ver Fonte

wal: Improve cleanup for robustness and debuggability

Rename wal with '.suffix.<timestamp>' instead of delete it and call cleanup when perr in a 'defer'ed statement.
jcoutin há 6 anos atrás
pai
commit
f7f7e9c762
1 ficheiros alterados com 18 adições e 8 exclusões
  1. 18 8
      wal/wal.go

+ 18 - 8
wal/wal.go

@@ -184,6 +184,13 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
+	var perr error
+	defer func() {
+		if perr != nil {
+			w.cleanupWAL(lg)
+		}
+	}()
+
 	// directory was renamed; sync parent dir to persist rename
 	// directory was renamed; sync parent dir to persist rename
 	pdir, perr := fileutil.OpenDir(filepath.Dir(w.dir))
 	pdir, perr := fileutil.OpenDir(filepath.Dir(w.dir))
 	if perr != nil {
 	if perr != nil {
@@ -195,7 +202,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) {
 				zap.Error(perr),
 				zap.Error(perr),
 			)
 			)
 		}
 		}
-		w.cleanupWAL(lg)
 		return nil, perr
 		return nil, perr
 	}
 	}
 	if perr = fileutil.Fsync(pdir); perr != nil {
 	if perr = fileutil.Fsync(pdir); perr != nil {
@@ -207,7 +213,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) {
 				zap.Error(perr),
 				zap.Error(perr),
 			)
 			)
 		}
 		}
-		w.cleanupWAL(lg)
 		return nil, perr
 		return nil, perr
 	}
 	}
 	if perr = pdir.Close(); perr != nil {
 	if perr = pdir.Close(); perr != nil {
@@ -219,7 +224,6 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) {
 				zap.Error(perr),
 				zap.Error(perr),
 			)
 			)
 		}
 		}
-		w.cleanupWAL(lg)
 		return nil, perr
 		return nil, perr
 	}
 	}
 
 
@@ -230,16 +234,22 @@ func (w *WAL) cleanupWAL(lg *zap.Logger) {
 	var err error
 	var err error
 	if err = w.Close(); err != nil {
 	if err = w.Close(); err != nil {
 		if lg != nil {
 		if lg != nil {
-			lg.Panic("failed to cleanup WAL", zap.Error(err))
+			lg.Panic("failed to closeup WAL during cleanup", zap.Error(err))
 		} else {
 		} else {
-			plog.Panicf("failed to cleanup WAL: %v", err)
+			plog.Panicf("failed to closeup WAL during cleanup: %v", err)
 		}
 		}
 	}
 	}
-	if err = os.RemoveAll(w.dir); err != nil {
+	brokenDirName := fmt.Sprintf("%s.broken.%v", w.dir, time.Now().Format("20060102.150405.999999"))
+	if err = os.Rename(w.dir, brokenDirName); err != nil {
 		if lg != nil {
 		if lg != nil {
-			lg.Panic("failed to cleanup WAL", zap.Error(err))
+			lg.Panic(
+				"failed to rename WAL during cleanup",
+				zap.Error(err),
+				zap.String("source-path", w.dir),
+				zap.String("rename-path", brokenDirName),
+			)
 		} else {
 		} else {
-			plog.Panicf("failed to cleanup WAL: %v", err)
+			plog.Panicf("failed to rename WAL during cleanup: %v", err)
 		}
 		}
 	}
 	}
 }
 }