|
|
@@ -15,253 +15,246 @@
|
|
|
package grpcproxy
|
|
|
|
|
|
import (
|
|
|
- "io"
|
|
|
"sync"
|
|
|
|
|
|
"golang.org/x/net/context"
|
|
|
+ "golang.org/x/time/rate"
|
|
|
+ "google.golang.org/grpc/metadata"
|
|
|
|
|
|
"github.com/coreos/etcd/clientv3"
|
|
|
"github.com/coreos/etcd/etcdserver/api/v3rpc"
|
|
|
+ "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
|
|
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
|
|
)
|
|
|
|
|
|
type watchProxy struct {
|
|
|
cw clientv3.Watcher
|
|
|
- wgs watchergroups
|
|
|
+ ctx context.Context
|
|
|
|
|
|
- mu sync.Mutex
|
|
|
- nextStreamID int64
|
|
|
+ ranges *watchRanges
|
|
|
|
|
|
- ctx context.Context
|
|
|
+ // retryLimiter controls the create watch retry rate on lost leaders.
|
|
|
+ retryLimiter *rate.Limiter
|
|
|
+
|
|
|
+ // mu protects leaderc updates.
|
|
|
+ mu sync.RWMutex
|
|
|
+ leaderc chan struct{}
|
|
|
+
|
|
|
+ // wg waits until all outstanding watch servers quit.
|
|
|
+ wg sync.WaitGroup
|
|
|
}
|
|
|
|
|
|
+const (
|
|
|
+ lostLeaderKey = "__lostleader" // watched to detect leader l oss
|
|
|
+ retryPerSecond = 10
|
|
|
+)
|
|
|
+
|
|
|
func NewWatchProxy(c *clientv3.Client) pb.WatchServer {
|
|
|
wp := &watchProxy{
|
|
|
- cw: c.Watcher,
|
|
|
- wgs: watchergroups{
|
|
|
- cw: c.Watcher,
|
|
|
- groups: make(map[watchRange]*watcherGroup),
|
|
|
- idToGroup: make(map[receiverID]*watcherGroup),
|
|
|
- proxyCtx: c.Ctx(),
|
|
|
- },
|
|
|
- ctx: c.Ctx(),
|
|
|
+ cw: c.Watcher,
|
|
|
+ ctx: clientv3.WithRequireLeader(c.Ctx()),
|
|
|
+ retryLimiter: rate.NewLimiter(rate.Limit(retryPerSecond), retryPerSecond),
|
|
|
+ leaderc: make(chan struct{}),
|
|
|
}
|
|
|
+ wp.ranges = newWatchRanges(wp)
|
|
|
go func() {
|
|
|
+ // a new streams without opening any watchers won't catch
|
|
|
+ // a lost leader event, so have a special watch to monitor it
|
|
|
+ rev := int64((uint64(1) << 63) - 2)
|
|
|
+ for wp.ctx.Err() == nil {
|
|
|
+ wch := wp.cw.Watch(wp.ctx, lostLeaderKey, clientv3.WithRev(rev))
|
|
|
+ for range wch {
|
|
|
+ }
|
|
|
+ wp.mu.Lock()
|
|
|
+ close(wp.leaderc)
|
|
|
+ wp.leaderc = make(chan struct{})
|
|
|
+ wp.mu.Unlock()
|
|
|
+ wp.retryLimiter.Wait(wp.ctx)
|
|
|
+ }
|
|
|
+ wp.mu.Lock()
|
|
|
<-wp.ctx.Done()
|
|
|
- wp.wgs.stop()
|
|
|
+ wp.mu.Unlock()
|
|
|
+ wp.wg.Wait()
|
|
|
+ wp.ranges.stop()
|
|
|
}()
|
|
|
return wp
|
|
|
}
|
|
|
|
|
|
func (wp *watchProxy) Watch(stream pb.Watch_WatchServer) (err error) {
|
|
|
wp.mu.Lock()
|
|
|
- wp.nextStreamID++
|
|
|
- sid := wp.nextStreamID
|
|
|
+ select {
|
|
|
+ case <-wp.ctx.Done():
|
|
|
+ wp.mu.Unlock()
|
|
|
+ return
|
|
|
+ default:
|
|
|
+ wp.wg.Add(1)
|
|
|
+ }
|
|
|
wp.mu.Unlock()
|
|
|
|
|
|
- ctx, cancel := context.WithCancel(wp.ctx)
|
|
|
- sws := serverWatchStream{
|
|
|
- cw: wp.cw,
|
|
|
- groups: &wp.wgs,
|
|
|
- singles: make(map[int64]*watcherSingle),
|
|
|
- inGroups: make(map[int64]struct{}),
|
|
|
+ ctx, cancel := context.WithCancel(stream.Context())
|
|
|
+ wps := &watchProxyStream{
|
|
|
+ ranges: wp.ranges,
|
|
|
+ watchers: make(map[int64]*watcher),
|
|
|
+ stream: stream,
|
|
|
+ watchCh: make(chan *pb.WatchResponse, 1024),
|
|
|
+ ctx: ctx,
|
|
|
+ cancel: cancel,
|
|
|
+ }
|
|
|
|
|
|
- id: sid,
|
|
|
- gRPCStream: stream,
|
|
|
+ var leaderc <-chan struct{}
|
|
|
+ if md, ok := metadata.FromContext(stream.Context()); ok {
|
|
|
+ v := md[rpctypes.MetadataRequireLeaderKey]
|
|
|
+ if len(v) > 0 && v[0] == rpctypes.MetadataHasLeader {
|
|
|
+ leaderc = wp.lostLeaderNotify()
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- watchCh: make(chan *pb.WatchResponse, 1024),
|
|
|
+ // post to stopc => terminate server stream; can't use a waitgroup
|
|
|
+ // since all goroutines will only terminate after Watch() exits.
|
|
|
+ stopc := make(chan struct{}, 3)
|
|
|
+ go func() {
|
|
|
+ defer func() { stopc <- struct{}{} }()
|
|
|
+ wps.recvLoop()
|
|
|
+ }()
|
|
|
+ go func() {
|
|
|
+ defer func() { stopc <- struct{}{} }()
|
|
|
+ wps.sendLoop()
|
|
|
+ }()
|
|
|
+ if leaderc != nil {
|
|
|
+ go func() {
|
|
|
+ defer func() { stopc <- struct{}{} }()
|
|
|
+ select {
|
|
|
+ case <-leaderc:
|
|
|
+ case <-ctx.Done():
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ }
|
|
|
+
|
|
|
+ <-stopc
|
|
|
+ // recv/send may only shutdown after function exits;
|
|
|
+ // goroutine notifies proxy that stream is through
|
|
|
+ go func() {
|
|
|
+ if leaderc != nil {
|
|
|
+ <-stopc
|
|
|
+ }
|
|
|
+ <-stopc
|
|
|
+ wps.close()
|
|
|
+ wp.wg.Done()
|
|
|
+ }()
|
|
|
|
|
|
- ctx: ctx,
|
|
|
- cancel: cancel,
|
|
|
+ select {
|
|
|
+ case <-leaderc:
|
|
|
+ return rpctypes.ErrNoLeader
|
|
|
+ default:
|
|
|
+ return wps.ctx.Err()
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
- go sws.recvLoop()
|
|
|
- sws.sendLoop()
|
|
|
- return wp.ctx.Err()
|
|
|
+func (wp *watchProxy) lostLeaderNotify() <-chan struct{} {
|
|
|
+ wp.mu.RLock()
|
|
|
+ defer wp.mu.RUnlock()
|
|
|
+ return wp.leaderc
|
|
|
}
|
|
|
|
|
|
-type serverWatchStream struct {
|
|
|
- id int64
|
|
|
- cw clientv3.Watcher
|
|
|
+// watchProxyStream forwards etcd watch events to a proxied client stream.
|
|
|
+type watchProxyStream struct {
|
|
|
+ ranges *watchRanges
|
|
|
|
|
|
- mu sync.Mutex // make sure any access of groups and singles is atomic
|
|
|
- groups *watchergroups
|
|
|
- singles map[int64]*watcherSingle
|
|
|
- inGroups map[int64]struct{}
|
|
|
+ // mu protects watchers and nextWatcherID
|
|
|
+ mu sync.Mutex
|
|
|
+ // watchers receive events from watch broadcast.
|
|
|
+ watchers map[int64]*watcher
|
|
|
+ // nextWatcherID is the id to assign the next watcher on this stream.
|
|
|
+ nextWatcherID int64
|
|
|
|
|
|
- gRPCStream pb.Watch_WatchServer
|
|
|
+ stream pb.Watch_WatchServer
|
|
|
|
|
|
+ // watchCh receives watch responses from the watchers.
|
|
|
watchCh chan *pb.WatchResponse
|
|
|
|
|
|
- nextWatcherID int64
|
|
|
-
|
|
|
ctx context.Context
|
|
|
cancel context.CancelFunc
|
|
|
}
|
|
|
|
|
|
-func (sws *serverWatchStream) close() {
|
|
|
+func (wps *watchProxyStream) close() {
|
|
|
var wg sync.WaitGroup
|
|
|
- sws.cancel()
|
|
|
- sws.mu.Lock()
|
|
|
- wg.Add(len(sws.singles) + len(sws.inGroups))
|
|
|
- for _, ws := range sws.singles {
|
|
|
- // copy the range variable to avoid race
|
|
|
- copyws := ws
|
|
|
- go func() {
|
|
|
- copyws.stop()
|
|
|
- wg.Done()
|
|
|
- }()
|
|
|
- }
|
|
|
- for id := range sws.inGroups {
|
|
|
- // copy the range variable to avoid race
|
|
|
- wid := id
|
|
|
- go func() {
|
|
|
- sws.groups.removeWatcher(receiverID{streamID: sws.id, watcherID: wid})
|
|
|
+ wps.cancel()
|
|
|
+ wps.mu.Lock()
|
|
|
+ wg.Add(len(wps.watchers))
|
|
|
+ for _, wpsw := range wps.watchers {
|
|
|
+ go func(w *watcher) {
|
|
|
+ wps.ranges.delete(w)
|
|
|
wg.Done()
|
|
|
- }()
|
|
|
+ }(wpsw)
|
|
|
}
|
|
|
- sws.inGroups = nil
|
|
|
- sws.mu.Unlock()
|
|
|
+ wps.watchers = nil
|
|
|
+ wps.mu.Unlock()
|
|
|
|
|
|
wg.Wait()
|
|
|
|
|
|
- close(sws.watchCh)
|
|
|
+ close(wps.watchCh)
|
|
|
}
|
|
|
|
|
|
-func (sws *serverWatchStream) recvLoop() error {
|
|
|
- defer sws.close()
|
|
|
-
|
|
|
+func (wps *watchProxyStream) recvLoop() error {
|
|
|
for {
|
|
|
- req, err := sws.gRPCStream.Recv()
|
|
|
- if err == io.EOF {
|
|
|
- return nil
|
|
|
- }
|
|
|
+ req, err := wps.stream.Recv()
|
|
|
if err != nil {
|
|
|
return err
|
|
|
}
|
|
|
-
|
|
|
switch uv := req.RequestUnion.(type) {
|
|
|
case *pb.WatchRequest_CreateRequest:
|
|
|
cr := uv.CreateRequest
|
|
|
+ w := &watcher{
|
|
|
+ wr: watchRange{string(cr.Key), string(cr.RangeEnd)},
|
|
|
+ id: wps.nextWatcherID,
|
|
|
+ wps: wps,
|
|
|
|
|
|
- watcher := watcher{
|
|
|
- wr: watchRange{
|
|
|
- key: string(cr.Key),
|
|
|
- end: string(cr.RangeEnd),
|
|
|
- },
|
|
|
- id: sws.nextWatcherID,
|
|
|
- sws: sws,
|
|
|
-
|
|
|
+ nextrev: cr.StartRevision,
|
|
|
progress: cr.ProgressNotify,
|
|
|
filters: v3rpc.FiltersFromRequest(cr),
|
|
|
}
|
|
|
- if cr.StartRevision != 0 {
|
|
|
- sws.addDedicatedWatcher(watcher, cr.StartRevision)
|
|
|
- } else {
|
|
|
- sws.addCoalescedWatcher(watcher)
|
|
|
- }
|
|
|
- sws.nextWatcherID++
|
|
|
-
|
|
|
+ wps.nextWatcherID++
|
|
|
+ w.nextrev = cr.StartRevision
|
|
|
+ wps.watchers[w.id] = w
|
|
|
+ wps.ranges.add(w)
|
|
|
case *pb.WatchRequest_CancelRequest:
|
|
|
- sws.removeWatcher(uv.CancelRequest.WatchId)
|
|
|
+ wps.delete(uv.CancelRequest.WatchId)
|
|
|
default:
|
|
|
panic("not implemented")
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func (sws *serverWatchStream) sendLoop() {
|
|
|
+func (wps *watchProxyStream) sendLoop() {
|
|
|
for {
|
|
|
select {
|
|
|
- case wresp, ok := <-sws.watchCh:
|
|
|
+ case wresp, ok := <-wps.watchCh:
|
|
|
if !ok {
|
|
|
return
|
|
|
}
|
|
|
- if err := sws.gRPCStream.Send(wresp); err != nil {
|
|
|
+ if err := wps.stream.Send(wresp); err != nil {
|
|
|
return
|
|
|
}
|
|
|
- case <-sws.ctx.Done():
|
|
|
+ case <-wps.ctx.Done():
|
|
|
return
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func (sws *serverWatchStream) addCoalescedWatcher(w watcher) {
|
|
|
- sws.mu.Lock()
|
|
|
- defer sws.mu.Unlock()
|
|
|
+func (wps *watchProxyStream) delete(id int64) {
|
|
|
+ wps.mu.Lock()
|
|
|
+ defer wps.mu.Unlock()
|
|
|
|
|
|
- rid := receiverID{streamID: sws.id, watcherID: w.id}
|
|
|
- sws.groups.addWatcher(rid, w)
|
|
|
- sws.inGroups[w.id] = struct{}{}
|
|
|
-}
|
|
|
-
|
|
|
-func (sws *serverWatchStream) addDedicatedWatcher(w watcher, rev int64) {
|
|
|
- ctx, cancel := context.WithCancel(sws.ctx)
|
|
|
- wch := sws.cw.Watch(ctx,
|
|
|
- w.wr.key, clientv3.WithRange(w.wr.end),
|
|
|
- clientv3.WithRev(rev),
|
|
|
- clientv3.WithProgressNotify(),
|
|
|
- clientv3.WithCreatedNotify(),
|
|
|
- )
|
|
|
- sws.mu.Lock()
|
|
|
- defer sws.mu.Unlock()
|
|
|
- ws := newWatcherSingle(wch, cancel, w, sws)
|
|
|
- sws.singles[w.id] = ws
|
|
|
- go ws.run()
|
|
|
-}
|
|
|
-
|
|
|
-func (sws *serverWatchStream) maybeCoalesceWatcher(ws watcherSingle) bool {
|
|
|
- sws.mu.Lock()
|
|
|
- defer sws.mu.Unlock()
|
|
|
-
|
|
|
- // do not add new watchers when stream is closing
|
|
|
- if sws.inGroups == nil {
|
|
|
- return false
|
|
|
- }
|
|
|
- if sws.groups.maybeJoinWatcherSingle(ws) {
|
|
|
- delete(sws.singles, ws.w.id)
|
|
|
- sws.inGroups[ws.w.id] = struct{}{}
|
|
|
- return true
|
|
|
- }
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-func (sws *serverWatchStream) removeWatcher(id int64) {
|
|
|
- sws.mu.Lock()
|
|
|
- defer sws.mu.Unlock()
|
|
|
-
|
|
|
- var (
|
|
|
- rev int64
|
|
|
- ok bool
|
|
|
- )
|
|
|
-
|
|
|
- defer func() {
|
|
|
- if !ok {
|
|
|
- return
|
|
|
- }
|
|
|
- resp := &pb.WatchResponse{
|
|
|
- Header: &pb.ResponseHeader{
|
|
|
- // todo: fill in ClusterId
|
|
|
- // todo: fill in MemberId:
|
|
|
- Revision: rev,
|
|
|
- // todo: fill in RaftTerm:
|
|
|
- },
|
|
|
- WatchId: id,
|
|
|
- Canceled: true,
|
|
|
- }
|
|
|
- sws.watchCh <- resp
|
|
|
- }()
|
|
|
-
|
|
|
- rev, ok = sws.groups.removeWatcher(receiverID{streamID: sws.id, watcherID: id})
|
|
|
- if ok {
|
|
|
- delete(sws.inGroups, id)
|
|
|
+ w, ok := wps.watchers[id]
|
|
|
+ if !ok {
|
|
|
return
|
|
|
}
|
|
|
-
|
|
|
- var ws *watcherSingle
|
|
|
- if ws, ok = sws.singles[id]; ok {
|
|
|
- delete(sws.singles, id)
|
|
|
- ws.stop()
|
|
|
- rev = ws.lastStoreRev
|
|
|
+ wps.ranges.delete(w)
|
|
|
+ delete(wps.watchers, id)
|
|
|
+ resp := &pb.WatchResponse{
|
|
|
+ Header: &w.lastHeader,
|
|
|
+ WatchId: id,
|
|
|
+ Canceled: true,
|
|
|
}
|
|
|
+ wps.watchCh <- resp
|
|
|
}
|