|
|
@@ -275,6 +275,18 @@ func (c *caseUntilSnapshot) Inject(clus *Cluster) error {
|
|
|
|
|
|
for i := 0; i < retries; i++ {
|
|
|
lastRev, err = clus.maxRev()
|
|
|
+ if lastRev == 0 {
|
|
|
+ clus.lg.Info(
|
|
|
+ "trigger snapshot RETRY",
|
|
|
+ zap.Int("retries", i),
|
|
|
+ zap.Int64("etcd-snapshot-count", snapshotCount),
|
|
|
+ zap.Int64("start-revision", startRev),
|
|
|
+ zap.Error(err),
|
|
|
+ )
|
|
|
+ time.Sleep(3 * time.Second)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
// If the number of proposals committed is bigger than snapshot count,
|
|
|
// a new snapshot should have been created.
|
|
|
diff := lastRev - startRev
|
|
|
@@ -292,12 +304,8 @@ func (c *caseUntilSnapshot) Inject(clus *Cluster) error {
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
- dur := time.Second
|
|
|
- if diff < 0 || err != nil {
|
|
|
- dur = 3 * time.Second
|
|
|
- }
|
|
|
clus.lg.Info(
|
|
|
- "trigger snapshot PROGRESS",
|
|
|
+ "trigger snapshot RETRY",
|
|
|
zap.Int("retries", i),
|
|
|
zap.Int64("committed-entries", diff),
|
|
|
zap.Int64("etcd-snapshot-count", snapshotCount),
|
|
|
@@ -306,7 +314,10 @@ func (c *caseUntilSnapshot) Inject(clus *Cluster) error {
|
|
|
zap.Duration("took", time.Since(now)),
|
|
|
zap.Error(err),
|
|
|
)
|
|
|
- time.Sleep(dur)
|
|
|
+ time.Sleep(time.Second)
|
|
|
+ if err != nil {
|
|
|
+ time.Sleep(2 * time.Second)
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
return fmt.Errorf("cluster too slow: only %d commits in %d retries", lastRev-startRev, retries)
|