watch.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package grpcproxy
  15. import (
  16. "sync"
  17. "golang.org/x/net/context"
  18. "golang.org/x/time/rate"
  19. "google.golang.org/grpc/metadata"
  20. "github.com/coreos/etcd/clientv3"
  21. "github.com/coreos/etcd/etcdserver/api/v3rpc"
  22. "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  23. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  24. )
  25. type watchProxy struct {
  26. cw clientv3.Watcher
  27. ctx context.Context
  28. ranges *watchRanges
  29. // retryLimiter controls the create watch retry rate on lost leaders.
  30. retryLimiter *rate.Limiter
  31. // mu protects leaderc updates.
  32. mu sync.RWMutex
  33. leaderc chan struct{}
  34. // wg waits until all outstanding watch servers quit.
  35. wg sync.WaitGroup
  36. }
  37. const (
  38. lostLeaderKey = "__lostleader" // watched to detect leader loss
  39. retryPerSecond = 10
  40. )
  41. func NewWatchProxy(c *clientv3.Client) (pb.WatchServer, <-chan struct{}) {
  42. wp := &watchProxy{
  43. cw: c.Watcher,
  44. ctx: clientv3.WithRequireLeader(c.Ctx()),
  45. retryLimiter: rate.NewLimiter(rate.Limit(retryPerSecond), retryPerSecond),
  46. leaderc: make(chan struct{}),
  47. }
  48. wp.ranges = newWatchRanges(wp)
  49. ch := make(chan struct{})
  50. go func() {
  51. defer close(ch)
  52. // a new streams without opening any watchers won't catch
  53. // a lost leader event, so have a special watch to monitor it
  54. rev := int64((uint64(1) << 63) - 2)
  55. for wp.ctx.Err() == nil {
  56. wch := wp.cw.Watch(wp.ctx, lostLeaderKey, clientv3.WithRev(rev))
  57. for range wch {
  58. }
  59. wp.mu.Lock()
  60. close(wp.leaderc)
  61. wp.leaderc = make(chan struct{})
  62. wp.mu.Unlock()
  63. wp.retryLimiter.Wait(wp.ctx)
  64. }
  65. wp.mu.Lock()
  66. <-wp.ctx.Done()
  67. wp.mu.Unlock()
  68. wp.wg.Wait()
  69. wp.ranges.stop()
  70. }()
  71. return wp, ch
  72. }
  73. func (wp *watchProxy) Watch(stream pb.Watch_WatchServer) (err error) {
  74. wp.mu.Lock()
  75. select {
  76. case <-wp.ctx.Done():
  77. wp.mu.Unlock()
  78. return
  79. default:
  80. wp.wg.Add(1)
  81. }
  82. wp.mu.Unlock()
  83. ctx, cancel := context.WithCancel(stream.Context())
  84. wps := &watchProxyStream{
  85. ranges: wp.ranges,
  86. watchers: make(map[int64]*watcher),
  87. stream: stream,
  88. watchCh: make(chan *pb.WatchResponse, 1024),
  89. ctx: ctx,
  90. cancel: cancel,
  91. }
  92. var leaderc <-chan struct{}
  93. if md, ok := metadata.FromContext(stream.Context()); ok {
  94. v := md[rpctypes.MetadataRequireLeaderKey]
  95. if len(v) > 0 && v[0] == rpctypes.MetadataHasLeader {
  96. leaderc = wp.lostLeaderNotify()
  97. }
  98. }
  99. // post to stopc => terminate server stream; can't use a waitgroup
  100. // since all goroutines will only terminate after Watch() exits.
  101. stopc := make(chan struct{}, 3)
  102. go func() {
  103. defer func() { stopc <- struct{}{} }()
  104. wps.recvLoop()
  105. }()
  106. go func() {
  107. defer func() { stopc <- struct{}{} }()
  108. wps.sendLoop()
  109. }()
  110. // tear down watch if leader goes down or entire watch proxy is terminated
  111. go func() {
  112. defer func() { stopc <- struct{}{} }()
  113. select {
  114. case <-leaderc:
  115. case <-ctx.Done():
  116. case <-wp.ctx.Done():
  117. }
  118. }()
  119. <-stopc
  120. cancel()
  121. // recv/send may only shutdown after function exits;
  122. // goroutine notifies proxy that stream is through
  123. go func() {
  124. <-stopc
  125. <-stopc
  126. wps.close()
  127. wp.wg.Done()
  128. }()
  129. select {
  130. case <-leaderc:
  131. return rpctypes.ErrNoLeader
  132. default:
  133. return wps.ctx.Err()
  134. }
  135. }
  136. func (wp *watchProxy) lostLeaderNotify() <-chan struct{} {
  137. wp.mu.RLock()
  138. defer wp.mu.RUnlock()
  139. return wp.leaderc
  140. }
  141. // watchProxyStream forwards etcd watch events to a proxied client stream.
  142. type watchProxyStream struct {
  143. ranges *watchRanges
  144. // mu protects watchers and nextWatcherID
  145. mu sync.Mutex
  146. // watchers receive events from watch broadcast.
  147. watchers map[int64]*watcher
  148. // nextWatcherID is the id to assign the next watcher on this stream.
  149. nextWatcherID int64
  150. stream pb.Watch_WatchServer
  151. // watchCh receives watch responses from the watchers.
  152. watchCh chan *pb.WatchResponse
  153. ctx context.Context
  154. cancel context.CancelFunc
  155. }
  156. func (wps *watchProxyStream) close() {
  157. var wg sync.WaitGroup
  158. wps.cancel()
  159. wps.mu.Lock()
  160. wg.Add(len(wps.watchers))
  161. for _, wpsw := range wps.watchers {
  162. go func(w *watcher) {
  163. wps.ranges.delete(w)
  164. wg.Done()
  165. }(wpsw)
  166. }
  167. wps.watchers = nil
  168. wps.mu.Unlock()
  169. wg.Wait()
  170. close(wps.watchCh)
  171. }
  172. func (wps *watchProxyStream) recvLoop() error {
  173. for {
  174. req, err := wps.stream.Recv()
  175. if err != nil {
  176. return err
  177. }
  178. switch uv := req.RequestUnion.(type) {
  179. case *pb.WatchRequest_CreateRequest:
  180. cr := uv.CreateRequest
  181. w := &watcher{
  182. wr: watchRange{string(cr.Key), string(cr.RangeEnd)},
  183. id: wps.nextWatcherID,
  184. wps: wps,
  185. nextrev: cr.StartRevision,
  186. progress: cr.ProgressNotify,
  187. prevKV: cr.PrevKv,
  188. filters: v3rpc.FiltersFromRequest(cr),
  189. }
  190. if !w.wr.valid() {
  191. w.post(&pb.WatchResponse{WatchId: -1, Created: true, Canceled: true})
  192. continue
  193. }
  194. wps.nextWatcherID++
  195. w.nextrev = cr.StartRevision
  196. wps.watchers[w.id] = w
  197. wps.ranges.add(w)
  198. case *pb.WatchRequest_CancelRequest:
  199. wps.delete(uv.CancelRequest.WatchId)
  200. default:
  201. panic("not implemented")
  202. }
  203. }
  204. }
  205. func (wps *watchProxyStream) sendLoop() {
  206. for {
  207. select {
  208. case wresp, ok := <-wps.watchCh:
  209. if !ok {
  210. return
  211. }
  212. if err := wps.stream.Send(wresp); err != nil {
  213. return
  214. }
  215. case <-wps.ctx.Done():
  216. return
  217. }
  218. }
  219. }
  220. func (wps *watchProxyStream) delete(id int64) {
  221. wps.mu.Lock()
  222. defer wps.mu.Unlock()
  223. w, ok := wps.watchers[id]
  224. if !ok {
  225. return
  226. }
  227. wps.ranges.delete(w)
  228. delete(wps.watchers, id)
  229. resp := &pb.WatchResponse{
  230. Header: &w.lastHeader,
  231. WatchId: id,
  232. Canceled: true,
  233. }
  234. wps.watchCh <- resp
  235. }