Browse Source

Merge pull request #6487 from heyitsanthony/watch-stress

clientv3: process closed watcherStreams in watcherGrpcStream run loop
Anthony Romano 9 years ago
parent
commit
27c500d8d0
2 changed files with 87 additions and 19 deletions
  1. 60 0
      clientv3/integration/watch_test.go
  2. 27 19
      clientv3/watch.go

+ 60 - 0
clientv3/integration/watch_test.go

@@ -759,3 +759,63 @@ func TestWatchCancelOnServer(t *testing.T) {
 		t.Fatalf("expected 0 watchers, got %q", watchers)
 	}
 }
+
+// TestWatchOverlapContextCancel stresses the watcher stream teardown path by
+// creating/canceling watchers to ensure that new watchers are not taken down
+// by a torn down watch stream. The sort of race that's being detected:
+//     1. create w1 using a cancelable ctx with %v as "ctx"
+//     2. cancel ctx
+//     3. watcher client begins tearing down watcher grpc stream since no more watchers
+//     3. start creating watcher w2 using a new "ctx" (not canceled), attaches to old grpc stream
+//     4. watcher client finishes tearing down stream on "ctx"
+//     5. w2 comes back canceled
+func TestWatchOverlapContextCancel(t *testing.T) {
+	defer testutil.AfterTest(t)
+	clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
+	defer clus.Terminate(t)
+
+	cli := clus.RandClient()
+	if _, err := cli.Put(context.TODO(), "abc", "def"); err != nil {
+		t.Fatal(err)
+	}
+
+	// each unique context "%v" has a unique grpc stream
+	n := 100
+	ctxs, ctxc := make([]context.Context, 5), make([]chan struct{}, 5)
+	for i := range ctxs {
+		// make "%v" unique
+		ctxs[i] = context.WithValue(context.TODO(), "key", i)
+		// limits the maximum number of outstanding watchers per stream
+		ctxc[i] = make(chan struct{}, 2)
+	}
+	ch := make(chan struct{}, n)
+	// issue concurrent watches with cancel
+	for i := 0; i < n; i++ {
+		go func() {
+			defer func() { ch <- struct{}{} }()
+			idx := rand.Intn(len(ctxs))
+			ctx, cancel := context.WithCancel(ctxs[idx])
+			ctxc[idx] <- struct{}{}
+			ch := cli.Watch(ctx, "abc", clientv3.WithRev(1))
+			if _, ok := <-ch; !ok {
+				t.Fatalf("unexpected closed channel")
+			}
+			// randomize how cancel overlaps with watch creation
+			if rand.Intn(2) == 0 {
+				<-ctxc[idx]
+				cancel()
+			} else {
+				cancel()
+				<-ctxc[idx]
+			}
+		}()
+	}
+	// join on watches
+	for i := 0; i < n; i++ {
+		select {
+		case <-ch:
+		case <-time.After(time.Second):
+			t.Fatalf("timed out waiting for completed watch")
+		}
+	}
+}

+ 27 - 19
clientv3/watch.go

@@ -131,6 +131,8 @@ type watchGrpcStream struct {
 	donec chan struct{}
 	// errc transmits errors from grpc Recv to the watch stream reconn logic
 	errc chan error
+	// closingc gets the watcherStream of closing watchers
+	closingc chan *watcherStream
 
 	// the error that closed the watch stream
 	closeErr error
@@ -203,11 +205,12 @@ func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
 		cancel:  cancel,
 		streams: make(map[int64]*watcherStream),
 
-		respc: make(chan *pb.WatchResponse),
-		reqc:  make(chan *watchRequest),
-		stopc: make(chan struct{}),
-		donec: make(chan struct{}),
-		errc:  make(chan error, 1),
+		respc:    make(chan *pb.WatchResponse),
+		reqc:     make(chan *watchRequest),
+		stopc:    make(chan struct{}),
+		donec:    make(chan struct{}),
+		errc:     make(chan error, 1),
+		closingc: make(chan *watcherStream),
 	}
 	go wgs.run()
 	return wgs
@@ -268,7 +271,6 @@ func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) Watch
 	case reqc <- wr:
 		ok = true
 	case <-wr.ctx.Done():
-		wgs.stopIfEmpty()
 	case <-donec:
 		if wgs.closeErr != nil {
 			closeCh <- WatchResponse{closeErr: wgs.closeErr}
@@ -378,15 +380,19 @@ func (w *watchGrpcStream) addStream(resp *pb.WatchResponse, pendingReq *watchReq
 	go w.serveStream(ws)
 }
 
-// closeStream closes the watcher resources and removes it
-func (w *watchGrpcStream) closeStream(ws *watcherStream) {
+func (w *watchGrpcStream) closeStream(ws *watcherStream) bool {
 	w.mu.Lock()
 	// cancels request stream; subscriber receives nil channel
 	close(ws.initReq.retc)
 	// close subscriber's channel
 	close(ws.outc)
 	delete(w.streams, ws.id)
+	empty := len(w.streams) == 0
+	if empty && w.stopc != nil {
+		w.stopc = nil
+	}
 	w.mu.Unlock()
+	return empty
 }
 
 // run is the root of the goroutines for managing a watcher client
@@ -491,6 +497,10 @@ func (w *watchGrpcStream) run() {
 			cancelSet = make(map[int64]struct{})
 		case <-stopc:
 			return
+		case ws := <-w.closingc:
+			if w.closeStream(ws) {
+				return
+			}
 		}
 
 		// send failed; queue for retry
@@ -553,6 +563,15 @@ func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
 
 // serveStream forwards watch responses from run() to the subscriber
 func (w *watchGrpcStream) serveStream(ws *watcherStream) {
+	defer func() {
+		// signal that this watcherStream is finished
+		select {
+		case w.closingc <- ws:
+		case <-w.donec:
+			w.closeStream(ws)
+		}
+	}()
+
 	var closeErr error
 	emptyWr := &WatchResponse{}
 	wrs := []*WatchResponse{}
@@ -641,20 +660,9 @@ func (w *watchGrpcStream) serveStream(ws *watcherStream) {
 		}
 	}
 
-	w.closeStream(ws)
-	w.stopIfEmpty()
 	// lazily send cancel message if events on missing id
 }
 
-func (wgs *watchGrpcStream) stopIfEmpty() {
-	wgs.mu.Lock()
-	if len(wgs.streams) == 0 && wgs.stopc != nil {
-		close(wgs.stopc)
-		wgs.stopc = nil
-	}
-	wgs.mu.Unlock()
-}
-
 func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
 	ws, rerr := w.resume()
 	if rerr != nil {