Browse Source

mvcc/backend: hold 'readTx.Lock' until completing bolt.Tx reset

Fix https://github.com/coreos/etcd/issues/7526.

When resetting `bolt.Tx` in `defrag` and `batchTxBuffered.commit`
operation, we do not hold `readTx` lock, so the inflight range
requests can trigger panic in `mvcc.Range` paths. This fixes by
moving mutexes out and hold it while resetting the `readTx`.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
Gyu-Ho Lee 8 years ago
parent
commit
26abd25cd3
2 changed files with 20 additions and 3 deletions
  1. 16 3
      mvcc/backend/backend.go
  2. 4 0
      mvcc/backend/batch_tx.go

+ 16 - 3
mvcc/backend/backend.go

@@ -247,7 +247,11 @@ func (b *backend) defrag() error {
 	b.mu.Lock()
 	b.mu.Lock()
 	defer b.mu.Unlock()
 	defer b.mu.Unlock()
 
 
-	b.batchTx.commit(true)
+	// block concurrent read requests while resetting tx
+	b.readTx.mu.Lock()
+	defer b.readTx.mu.Unlock()
+
+	b.batchTx.unsafeCommit(true)
 	b.batchTx.tx = nil
 	b.batchTx.tx = nil
 
 
 	tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions)
 	tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions)
@@ -288,6 +292,10 @@ func (b *backend) defrag() error {
 		plog.Fatalf("cannot begin tx (%s)", err)
 		plog.Fatalf("cannot begin tx (%s)", err)
 	}
 	}
 
 
+	b.readTx.buf.reset()
+	b.readTx.tx = b.unsafeBegin(false)
+	atomic.StoreInt64(&b.size, b.readTx.tx.Size())
+
 	return nil
 	return nil
 }
 }
 
 
@@ -345,12 +353,17 @@ func defragdb(odb, tmpdb *bolt.DB, limit int) error {
 
 
 func (b *backend) begin(write bool) *bolt.Tx {
 func (b *backend) begin(write bool) *bolt.Tx {
 	b.mu.RLock()
 	b.mu.RLock()
+	tx := b.unsafeBegin(write)
+	b.mu.RUnlock()
+	atomic.StoreInt64(&b.size, tx.Size())
+	return tx
+}
+
+func (b *backend) unsafeBegin(write bool) *bolt.Tx {
 	tx, err := b.db.Begin(write)
 	tx, err := b.db.Begin(write)
 	if err != nil {
 	if err != nil {
 		plog.Fatalf("cannot begin tx (%s)", err)
 		plog.Fatalf("cannot begin tx (%s)", err)
 	}
 	}
-	b.mu.RUnlock()
-	atomic.StoreInt64(&b.size, tx.Size())
 	return tx
 	return tx
 }
 }
 
 

+ 4 - 0
mvcc/backend/batch_tx.go

@@ -244,6 +244,10 @@ func (t *batchTxBuffered) commit(stop bool) {
 	// all read txs must be closed to acquire boltdb commit rwlock
 	// all read txs must be closed to acquire boltdb commit rwlock
 	t.backend.readTx.mu.Lock()
 	t.backend.readTx.mu.Lock()
 	defer t.backend.readTx.mu.Unlock()
 	defer t.backend.readTx.mu.Unlock()
+	t.unsafeCommit(stop)
+}
+
+func (t *batchTxBuffered) unsafeCommit(stop bool) {
 	if t.backend.readTx.tx != nil {
 	if t.backend.readTx.tx != nil {
 		if err := t.backend.readTx.tx.Rollback(); err != nil {
 		if err := t.backend.readTx.tx.Rollback(); err != nil {
 			plog.Fatalf("cannot rollback tx (%s)", err)
 			plog.Fatalf("cannot rollback tx (%s)", err)