watch.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "errors"
  18. "fmt"
  19. "sync"
  20. "time"
  21. v3rpc "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
  22. pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
  23. mvccpb "go.etcd.io/etcd/mvcc/mvccpb"
  24. "google.golang.org/grpc"
  25. "google.golang.org/grpc/codes"
  26. "google.golang.org/grpc/metadata"
  27. "google.golang.org/grpc/status"
  28. )
  29. const (
  30. EventTypeDelete = mvccpb.DELETE
  31. EventTypePut = mvccpb.PUT
  32. closeSendErrTimeout = 250 * time.Millisecond
  33. )
  34. type Event mvccpb.Event
  35. type WatchChan <-chan WatchResponse
  36. type Watcher interface {
  37. // Watch watches on a key or prefix. The watched events will be returned
  38. // through the returned channel. If revisions waiting to be sent over the
  39. // watch are compacted, then the watch will be canceled by the server, the
  40. // client will post a compacted error watch response, and the channel will close.
  41. // If the context "ctx" is canceled or timed out, returned "WatchChan" is closed,
  42. // and "WatchResponse" from this closed channel has zero events and nil "Err()".
  43. // The context "ctx" MUST be canceled, as soon as watcher is no longer being used,
  44. // to release the associated resources.
  45. //
  46. // If the context is "context.Background/TODO", returned "WatchChan" will
  47. // not be closed and block until event is triggered, except when server
  48. // returns a non-recoverable error (e.g. ErrCompacted).
  49. // For example, when context passed with "WithRequireLeader" and the
  50. // connected server has no leader (e.g. due to network partition),
  51. // error "etcdserver: no leader" (ErrNoLeader) will be returned,
  52. // and then "WatchChan" is closed with non-nil "Err()".
  53. // In order to prevent a watch stream being stuck in a partitioned node,
  54. // make sure to wrap context with "WithRequireLeader".
  55. //
  56. // Otherwise, as long as the context has not been canceled or timed out,
  57. // watch will retry on other recoverable errors forever until reconnected.
  58. //
  59. // TODO: explicitly set context error in the last "WatchResponse" message and close channel?
  60. // Currently, client contexts are overwritten with "valCtx" that never closes.
  61. // TODO(v3.4): configure watch retry policy, limit maximum retry number
  62. // (see https://github.com/etcd-io/etcd/issues/8980)
  63. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  64. // RequestProgress requests a progress notify response be sent in all watch channels.
  65. RequestProgress(ctx context.Context) error
  66. // Close closes the watcher and cancels all watch requests.
  67. Close() error
  68. }
  69. type WatchResponse struct {
  70. Header pb.ResponseHeader
  71. Events []*Event
  72. // CompactRevision is the minimum revision the watcher may receive.
  73. CompactRevision int64
  74. // Canceled is used to indicate watch failure.
  75. // If the watch failed and the stream was about to close, before the channel is closed,
  76. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  77. Canceled bool
  78. // Created is used to indicate the creation of the watcher.
  79. Created bool
  80. closeErr error
  81. // cancelReason is a reason of canceling watch
  82. cancelReason string
  83. }
  84. // IsCreate returns true if the event tells that the key is newly created.
  85. func (e *Event) IsCreate() bool {
  86. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  87. }
  88. // IsModify returns true if the event tells that a new value is put on existing key.
  89. func (e *Event) IsModify() bool {
  90. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  91. }
  92. // Err is the error value if this WatchResponse holds an error.
  93. func (wr *WatchResponse) Err() error {
  94. switch {
  95. case wr.closeErr != nil:
  96. return v3rpc.Error(wr.closeErr)
  97. case wr.CompactRevision != 0:
  98. return v3rpc.ErrCompacted
  99. case wr.Canceled:
  100. if len(wr.cancelReason) != 0 {
  101. return v3rpc.Error(status.Error(codes.FailedPrecondition, wr.cancelReason))
  102. }
  103. return v3rpc.ErrFutureRev
  104. }
  105. return nil
  106. }
  107. // IsProgressNotify returns true if the WatchResponse is progress notification.
  108. func (wr *WatchResponse) IsProgressNotify() bool {
  109. return len(wr.Events) == 0 && !wr.Canceled && !wr.Created && wr.CompactRevision == 0 && wr.Header.Revision != 0
  110. }
  111. // watcher implements the Watcher interface
  112. type watcher struct {
  113. remote pb.WatchClient
  114. callOpts []grpc.CallOption
  115. // mu protects the grpc streams map
  116. mu sync.RWMutex
  117. // streams holds all the active grpc streams keyed by ctx value.
  118. streams map[string]*watchGrpcStream
  119. }
  120. // watchGrpcStream tracks all watch resources attached to a single grpc stream.
  121. type watchGrpcStream struct {
  122. owner *watcher
  123. remote pb.WatchClient
  124. callOpts []grpc.CallOption
  125. // ctx controls internal remote.Watch requests
  126. ctx context.Context
  127. // ctxKey is the key used when looking up this stream's context
  128. ctxKey string
  129. cancel context.CancelFunc
  130. // substreams holds all active watchers on this grpc stream
  131. substreams map[int64]*watcherStream
  132. // resuming holds all resuming watchers on this grpc stream
  133. resuming []*watcherStream
  134. // reqc sends a watch request from Watch() to the main goroutine
  135. reqc chan watchStreamRequest
  136. // respc receives data from the watch client
  137. respc chan *pb.WatchResponse
  138. // donec closes to broadcast shutdown
  139. donec chan struct{}
  140. // errc transmits errors from grpc Recv to the watch stream reconnect logic
  141. errc chan error
  142. // closingc gets the watcherStream of closing watchers
  143. closingc chan *watcherStream
  144. // wg is Done when all substream goroutines have exited
  145. wg sync.WaitGroup
  146. // resumec closes to signal that all substreams should begin resuming
  147. resumec chan struct{}
  148. // closeErr is the error that closed the watch stream
  149. closeErr error
  150. }
  151. // watchStreamRequest is a union of the supported watch request operation types
  152. type watchStreamRequest interface {
  153. toPB() *pb.WatchRequest
  154. }
  155. // watchRequest is issued by the subscriber to start a new watcher
  156. type watchRequest struct {
  157. ctx context.Context
  158. key string
  159. end string
  160. rev int64
  161. // send created notification event if this field is true
  162. createdNotify bool
  163. // progressNotify is for progress updates
  164. progressNotify bool
  165. // fragmentation should be disabled by default
  166. // if true, split watch events when total exceeds
  167. // "--max-request-bytes" flag value + 512-byte
  168. fragment bool
  169. // filters is the list of events to filter out
  170. filters []pb.WatchCreateRequest_FilterType
  171. // get the previous key-value pair before the event happens
  172. prevKV bool
  173. // retc receives a chan WatchResponse once the watcher is established
  174. retc chan chan WatchResponse
  175. }
  176. // progressRequest is issued by the subscriber to request watch progress
  177. type progressRequest struct {
  178. }
  179. // watcherStream represents a registered watcher
  180. type watcherStream struct {
  181. // initReq is the request that initiated this request
  182. initReq watchRequest
  183. // outc publishes watch responses to subscriber
  184. outc chan WatchResponse
  185. // recvc buffers watch responses before publishing
  186. recvc chan *WatchResponse
  187. // donec closes when the watcherStream goroutine stops.
  188. donec chan struct{}
  189. // closing is set to true when stream should be scheduled to shutdown.
  190. closing bool
  191. // id is the registered watch id on the grpc stream
  192. id int64
  193. // buf holds all events received from etcd but not yet consumed by the client
  194. buf []*WatchResponse
  195. }
  196. func NewWatcher(c *Client) Watcher {
  197. return NewWatchFromWatchClient(pb.NewWatchClient(c.conn), c)
  198. }
  199. func NewWatchFromWatchClient(wc pb.WatchClient, c *Client) Watcher {
  200. w := &watcher{
  201. remote: wc,
  202. streams: make(map[string]*watchGrpcStream),
  203. }
  204. if c != nil {
  205. w.callOpts = c.callOpts
  206. }
  207. return w
  208. }
  209. // never closes
  210. var valCtxCh = make(chan struct{})
  211. var zeroTime = time.Unix(0, 0)
  212. // ctx with only the values; never Done
  213. type valCtx struct{ context.Context }
  214. func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
  215. func (vc *valCtx) Done() <-chan struct{} { return valCtxCh }
  216. func (vc *valCtx) Err() error { return nil }
  217. func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
  218. ctx, cancel := context.WithCancel(&valCtx{inctx})
  219. wgs := &watchGrpcStream{
  220. owner: w,
  221. remote: w.remote,
  222. callOpts: w.callOpts,
  223. ctx: ctx,
  224. ctxKey: streamKeyFromCtx(inctx),
  225. cancel: cancel,
  226. substreams: make(map[int64]*watcherStream),
  227. respc: make(chan *pb.WatchResponse),
  228. reqc: make(chan watchStreamRequest),
  229. donec: make(chan struct{}),
  230. errc: make(chan error, 1),
  231. closingc: make(chan *watcherStream),
  232. resumec: make(chan struct{}),
  233. }
  234. go wgs.run()
  235. return wgs
  236. }
  237. // Watch posts a watch request to run() and waits for a new watcher channel
  238. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  239. ow := opWatch(key, opts...)
  240. var filters []pb.WatchCreateRequest_FilterType
  241. if ow.filterPut {
  242. filters = append(filters, pb.WatchCreateRequest_NOPUT)
  243. }
  244. if ow.filterDelete {
  245. filters = append(filters, pb.WatchCreateRequest_NODELETE)
  246. }
  247. wr := &watchRequest{
  248. ctx: ctx,
  249. createdNotify: ow.createdNotify,
  250. key: string(ow.key),
  251. end: string(ow.end),
  252. rev: ow.rev,
  253. progressNotify: ow.progressNotify,
  254. fragment: ow.fragment,
  255. filters: filters,
  256. prevKV: ow.prevKV,
  257. retc: make(chan chan WatchResponse, 1),
  258. }
  259. ok := false
  260. ctxKey := streamKeyFromCtx(ctx)
  261. // find or allocate appropriate grpc watch stream
  262. w.mu.Lock()
  263. if w.streams == nil {
  264. // closed
  265. w.mu.Unlock()
  266. ch := make(chan WatchResponse)
  267. close(ch)
  268. return ch
  269. }
  270. wgs := w.streams[ctxKey]
  271. if wgs == nil {
  272. wgs = w.newWatcherGrpcStream(ctx)
  273. w.streams[ctxKey] = wgs
  274. }
  275. donec := wgs.donec
  276. reqc := wgs.reqc
  277. w.mu.Unlock()
  278. // couldn't create channel; return closed channel
  279. closeCh := make(chan WatchResponse, 1)
  280. // submit request
  281. select {
  282. case reqc <- wr:
  283. ok = true
  284. case <-wr.ctx.Done():
  285. case <-donec:
  286. if wgs.closeErr != nil {
  287. closeCh <- WatchResponse{Canceled: true, closeErr: wgs.closeErr}
  288. break
  289. }
  290. // retry; may have dropped stream from no ctxs
  291. return w.Watch(ctx, key, opts...)
  292. }
  293. // receive channel
  294. if ok {
  295. select {
  296. case ret := <-wr.retc:
  297. return ret
  298. case <-ctx.Done():
  299. case <-donec:
  300. if wgs.closeErr != nil {
  301. closeCh <- WatchResponse{Canceled: true, closeErr: wgs.closeErr}
  302. break
  303. }
  304. // retry; may have dropped stream from no ctxs
  305. return w.Watch(ctx, key, opts...)
  306. }
  307. }
  308. close(closeCh)
  309. return closeCh
  310. }
  311. func (w *watcher) Close() (err error) {
  312. w.mu.Lock()
  313. streams := w.streams
  314. w.streams = nil
  315. w.mu.Unlock()
  316. for _, wgs := range streams {
  317. if werr := wgs.close(); werr != nil {
  318. err = werr
  319. }
  320. }
  321. // Consider context.Canceled as a successful close
  322. if err == context.Canceled {
  323. err = nil
  324. }
  325. return err
  326. }
  327. // RequestProgress requests a progress notify response be sent in all watch channels.
  328. func (w *watcher) RequestProgress(ctx context.Context) (err error) {
  329. ctxKey := streamKeyFromCtx(ctx)
  330. w.mu.Lock()
  331. if w.streams == nil {
  332. w.mu.Unlock()
  333. return fmt.Errorf("no stream found for context")
  334. }
  335. wgs := w.streams[ctxKey]
  336. if wgs == nil {
  337. wgs = w.newWatcherGrpcStream(ctx)
  338. w.streams[ctxKey] = wgs
  339. }
  340. donec := wgs.donec
  341. reqc := wgs.reqc
  342. w.mu.Unlock()
  343. pr := &progressRequest{}
  344. select {
  345. case reqc <- pr:
  346. return nil
  347. case <-ctx.Done():
  348. if err == nil {
  349. return ctx.Err()
  350. }
  351. return err
  352. case <-donec:
  353. if wgs.closeErr != nil {
  354. return wgs.closeErr
  355. }
  356. // retry; may have dropped stream from no ctxs
  357. return w.RequestProgress(ctx)
  358. }
  359. }
  360. func (w *watchGrpcStream) close() (err error) {
  361. w.cancel()
  362. <-w.donec
  363. select {
  364. case err = <-w.errc:
  365. default:
  366. }
  367. return toErr(w.ctx, err)
  368. }
  369. func (w *watcher) closeStream(wgs *watchGrpcStream) {
  370. w.mu.Lock()
  371. close(wgs.donec)
  372. wgs.cancel()
  373. if w.streams != nil {
  374. delete(w.streams, wgs.ctxKey)
  375. }
  376. w.mu.Unlock()
  377. }
  378. func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
  379. // check watch ID for backward compatibility (<= v3.3)
  380. if resp.WatchId == -1 || (resp.Canceled && resp.CancelReason != "") {
  381. w.closeErr = v3rpc.Error(errors.New(resp.CancelReason))
  382. // failed; no channel
  383. close(ws.recvc)
  384. return
  385. }
  386. ws.id = resp.WatchId
  387. w.substreams[ws.id] = ws
  388. }
  389. func (w *watchGrpcStream) sendCloseSubstream(ws *watcherStream, resp *WatchResponse) {
  390. select {
  391. case ws.outc <- *resp:
  392. case <-ws.initReq.ctx.Done():
  393. case <-time.After(closeSendErrTimeout):
  394. }
  395. close(ws.outc)
  396. }
  397. func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
  398. // send channel response in case stream was never established
  399. select {
  400. case ws.initReq.retc <- ws.outc:
  401. default:
  402. }
  403. // close subscriber's channel
  404. if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
  405. go w.sendCloseSubstream(ws, &WatchResponse{Canceled: true, closeErr: w.closeErr})
  406. } else if ws.outc != nil {
  407. close(ws.outc)
  408. }
  409. if ws.id != -1 {
  410. delete(w.substreams, ws.id)
  411. return
  412. }
  413. for i := range w.resuming {
  414. if w.resuming[i] == ws {
  415. w.resuming[i] = nil
  416. return
  417. }
  418. }
  419. }
  420. // run is the root of the goroutines for managing a watcher client
  421. func (w *watchGrpcStream) run() {
  422. var wc pb.Watch_WatchClient
  423. var closeErr error
  424. // substreams marked to close but goroutine still running; needed for
  425. // avoiding double-closing recvc on grpc stream teardown
  426. closing := make(map[*watcherStream]struct{})
  427. defer func() {
  428. w.closeErr = closeErr
  429. // shutdown substreams and resuming substreams
  430. for _, ws := range w.substreams {
  431. if _, ok := closing[ws]; !ok {
  432. close(ws.recvc)
  433. closing[ws] = struct{}{}
  434. }
  435. }
  436. for _, ws := range w.resuming {
  437. if _, ok := closing[ws]; ws != nil && !ok {
  438. close(ws.recvc)
  439. closing[ws] = struct{}{}
  440. }
  441. }
  442. w.joinSubstreams()
  443. for range closing {
  444. w.closeSubstream(<-w.closingc)
  445. }
  446. w.wg.Wait()
  447. w.owner.closeStream(w)
  448. }()
  449. // start a stream with the etcd grpc server
  450. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  451. return
  452. }
  453. cancelSet := make(map[int64]struct{})
  454. var cur *pb.WatchResponse
  455. for {
  456. select {
  457. // Watch() requested
  458. case req := <-w.reqc:
  459. switch wreq := req.(type) {
  460. case *watchRequest:
  461. outc := make(chan WatchResponse, 1)
  462. // TODO: pass custom watch ID?
  463. ws := &watcherStream{
  464. initReq: *wreq,
  465. id: -1,
  466. outc: outc,
  467. // unbuffered so resumes won't cause repeat events
  468. recvc: make(chan *WatchResponse),
  469. }
  470. ws.donec = make(chan struct{})
  471. w.wg.Add(1)
  472. go w.serveSubstream(ws, w.resumec)
  473. // queue up for watcher creation/resume
  474. w.resuming = append(w.resuming, ws)
  475. if len(w.resuming) == 1 {
  476. // head of resume queue, can register a new watcher
  477. wc.Send(ws.initReq.toPB())
  478. }
  479. case *progressRequest:
  480. wc.Send(wreq.toPB())
  481. }
  482. // new events from the watch client
  483. case pbresp := <-w.respc:
  484. if cur == nil || pbresp.Created || pbresp.Canceled {
  485. cur = pbresp
  486. } else if cur != nil && cur.WatchId == pbresp.WatchId {
  487. // merge new events
  488. cur.Events = append(cur.Events, pbresp.Events...)
  489. // update "Fragment" field; last response with "Fragment" == false
  490. cur.Fragment = pbresp.Fragment
  491. }
  492. switch {
  493. case pbresp.Created:
  494. // response to head of queue creation
  495. if ws := w.resuming[0]; ws != nil {
  496. w.addSubstream(pbresp, ws)
  497. w.dispatchEvent(pbresp)
  498. w.resuming[0] = nil
  499. }
  500. if ws := w.nextResume(); ws != nil {
  501. wc.Send(ws.initReq.toPB())
  502. }
  503. // reset for next iteration
  504. cur = nil
  505. case pbresp.Canceled && pbresp.CompactRevision == 0:
  506. delete(cancelSet, pbresp.WatchId)
  507. if ws, ok := w.substreams[pbresp.WatchId]; ok {
  508. // signal to stream goroutine to update closingc
  509. close(ws.recvc)
  510. closing[ws] = struct{}{}
  511. }
  512. // reset for next iteration
  513. cur = nil
  514. case cur.Fragment:
  515. // watch response events are still fragmented
  516. // continue to fetch next fragmented event arrival
  517. continue
  518. default:
  519. // dispatch to appropriate watch stream
  520. ok := w.dispatchEvent(cur)
  521. // reset for next iteration
  522. cur = nil
  523. if ok {
  524. break
  525. }
  526. // watch response on unexpected watch id; cancel id
  527. if _, ok := cancelSet[pbresp.WatchId]; ok {
  528. break
  529. }
  530. cancelSet[pbresp.WatchId] = struct{}{}
  531. cr := &pb.WatchRequest_CancelRequest{
  532. CancelRequest: &pb.WatchCancelRequest{
  533. WatchId: pbresp.WatchId,
  534. },
  535. }
  536. req := &pb.WatchRequest{RequestUnion: cr}
  537. wc.Send(req)
  538. }
  539. // watch client failed on Recv; spawn another if possible
  540. case err := <-w.errc:
  541. if isHaltErr(w.ctx, err) || toErr(w.ctx, err) == v3rpc.ErrNoLeader {
  542. closeErr = err
  543. return
  544. }
  545. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  546. return
  547. }
  548. if ws := w.nextResume(); ws != nil {
  549. wc.Send(ws.initReq.toPB())
  550. }
  551. cancelSet = make(map[int64]struct{})
  552. case <-w.ctx.Done():
  553. return
  554. case ws := <-w.closingc:
  555. w.closeSubstream(ws)
  556. delete(closing, ws)
  557. // no more watchers on this stream, shutdown
  558. if len(w.substreams)+len(w.resuming) == 0 {
  559. return
  560. }
  561. }
  562. }
  563. }
  564. // nextResume chooses the next resuming to register with the grpc stream. Abandoned
  565. // streams are marked as nil in the queue since the head must wait for its inflight registration.
  566. func (w *watchGrpcStream) nextResume() *watcherStream {
  567. for len(w.resuming) != 0 {
  568. if w.resuming[0] != nil {
  569. return w.resuming[0]
  570. }
  571. w.resuming = w.resuming[1:len(w.resuming)]
  572. }
  573. return nil
  574. }
  575. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  576. func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
  577. events := make([]*Event, len(pbresp.Events))
  578. for i, ev := range pbresp.Events {
  579. events[i] = (*Event)(ev)
  580. }
  581. // TODO: return watch ID?
  582. wr := &WatchResponse{
  583. Header: *pbresp.Header,
  584. Events: events,
  585. CompactRevision: pbresp.CompactRevision,
  586. Created: pbresp.Created,
  587. Canceled: pbresp.Canceled,
  588. cancelReason: pbresp.CancelReason,
  589. }
  590. // watch IDs are zero indexed, so request notify watch responses are assigned a watch ID of -1 to
  591. // indicate they should be broadcast.
  592. if wr.IsProgressNotify() && pbresp.WatchId == -1 {
  593. return w.broadcastResponse(wr)
  594. }
  595. return w.unicastResponse(wr, pbresp.WatchId)
  596. }
  597. // broadcastResponse send a watch response to all watch substreams.
  598. func (w *watchGrpcStream) broadcastResponse(wr *WatchResponse) bool {
  599. for _, ws := range w.substreams {
  600. select {
  601. case ws.recvc <- wr:
  602. case <-ws.donec:
  603. }
  604. }
  605. return true
  606. }
  607. // unicastResponse sends a watch response to a specific watch substream.
  608. func (w *watchGrpcStream) unicastResponse(wr *WatchResponse, watchId int64) bool {
  609. ws, ok := w.substreams[watchId]
  610. if !ok {
  611. return false
  612. }
  613. select {
  614. case ws.recvc <- wr:
  615. case <-ws.donec:
  616. return false
  617. }
  618. return true
  619. }
  620. // serveWatchClient forwards messages from the grpc stream to run()
  621. func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
  622. for {
  623. resp, err := wc.Recv()
  624. if err != nil {
  625. select {
  626. case w.errc <- err:
  627. case <-w.donec:
  628. }
  629. return
  630. }
  631. select {
  632. case w.respc <- resp:
  633. case <-w.donec:
  634. return
  635. }
  636. }
  637. }
  638. // serveSubstream forwards watch responses from run() to the subscriber
  639. func (w *watchGrpcStream) serveSubstream(ws *watcherStream, resumec chan struct{}) {
  640. if ws.closing {
  641. panic("created substream goroutine but substream is closing")
  642. }
  643. // nextRev is the minimum expected next revision
  644. nextRev := ws.initReq.rev
  645. resuming := false
  646. defer func() {
  647. if !resuming {
  648. ws.closing = true
  649. }
  650. close(ws.donec)
  651. if !resuming {
  652. w.closingc <- ws
  653. }
  654. w.wg.Done()
  655. }()
  656. emptyWr := &WatchResponse{}
  657. for {
  658. curWr := emptyWr
  659. outc := ws.outc
  660. if len(ws.buf) > 0 {
  661. curWr = ws.buf[0]
  662. } else {
  663. outc = nil
  664. }
  665. select {
  666. case outc <- *curWr:
  667. if ws.buf[0].Err() != nil {
  668. return
  669. }
  670. ws.buf[0] = nil
  671. ws.buf = ws.buf[1:]
  672. case wr, ok := <-ws.recvc:
  673. if !ok {
  674. // shutdown from closeSubstream
  675. return
  676. }
  677. if wr.Created {
  678. if ws.initReq.retc != nil {
  679. ws.initReq.retc <- ws.outc
  680. // to prevent next write from taking the slot in buffered channel
  681. // and posting duplicate create events
  682. ws.initReq.retc = nil
  683. // send first creation event only if requested
  684. if ws.initReq.createdNotify {
  685. ws.outc <- *wr
  686. }
  687. // once the watch channel is returned, a current revision
  688. // watch must resume at the store revision. This is necessary
  689. // for the following case to work as expected:
  690. // wch := m1.Watch("a")
  691. // m2.Put("a", "b")
  692. // <-wch
  693. // If the revision is only bound on the first observed event,
  694. // if wch is disconnected before the Put is issued, then reconnects
  695. // after it is committed, it'll miss the Put.
  696. if ws.initReq.rev == 0 {
  697. nextRev = wr.Header.Revision
  698. }
  699. }
  700. } else {
  701. // current progress of watch; <= store revision
  702. nextRev = wr.Header.Revision
  703. }
  704. if len(wr.Events) > 0 {
  705. nextRev = wr.Events[len(wr.Events)-1].Kv.ModRevision + 1
  706. }
  707. ws.initReq.rev = nextRev
  708. // created event is already sent above,
  709. // watcher should not post duplicate events
  710. if wr.Created {
  711. continue
  712. }
  713. // TODO pause channel if buffer gets too large
  714. ws.buf = append(ws.buf, wr)
  715. case <-w.ctx.Done():
  716. return
  717. case <-ws.initReq.ctx.Done():
  718. return
  719. case <-resumec:
  720. resuming = true
  721. return
  722. }
  723. }
  724. // lazily send cancel message if events on missing id
  725. }
  726. func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
  727. // mark all substreams as resuming
  728. close(w.resumec)
  729. w.resumec = make(chan struct{})
  730. w.joinSubstreams()
  731. for _, ws := range w.substreams {
  732. ws.id = -1
  733. w.resuming = append(w.resuming, ws)
  734. }
  735. // strip out nils, if any
  736. var resuming []*watcherStream
  737. for _, ws := range w.resuming {
  738. if ws != nil {
  739. resuming = append(resuming, ws)
  740. }
  741. }
  742. w.resuming = resuming
  743. w.substreams = make(map[int64]*watcherStream)
  744. // connect to grpc stream while accepting watcher cancelation
  745. stopc := make(chan struct{})
  746. donec := w.waitCancelSubstreams(stopc)
  747. wc, err := w.openWatchClient()
  748. close(stopc)
  749. <-donec
  750. // serve all non-closing streams, even if there's a client error
  751. // so that the teardown path can shutdown the streams as expected.
  752. for _, ws := range w.resuming {
  753. if ws.closing {
  754. continue
  755. }
  756. ws.donec = make(chan struct{})
  757. w.wg.Add(1)
  758. go w.serveSubstream(ws, w.resumec)
  759. }
  760. if err != nil {
  761. return nil, v3rpc.Error(err)
  762. }
  763. // receive data from new grpc stream
  764. go w.serveWatchClient(wc)
  765. return wc, nil
  766. }
  767. func (w *watchGrpcStream) waitCancelSubstreams(stopc <-chan struct{}) <-chan struct{} {
  768. var wg sync.WaitGroup
  769. wg.Add(len(w.resuming))
  770. donec := make(chan struct{})
  771. for i := range w.resuming {
  772. go func(ws *watcherStream) {
  773. defer wg.Done()
  774. if ws.closing {
  775. if ws.initReq.ctx.Err() != nil && ws.outc != nil {
  776. close(ws.outc)
  777. ws.outc = nil
  778. }
  779. return
  780. }
  781. select {
  782. case <-ws.initReq.ctx.Done():
  783. // closed ws will be removed from resuming
  784. ws.closing = true
  785. close(ws.outc)
  786. ws.outc = nil
  787. w.wg.Add(1)
  788. go func() {
  789. defer w.wg.Done()
  790. w.closingc <- ws
  791. }()
  792. case <-stopc:
  793. }
  794. }(w.resuming[i])
  795. }
  796. go func() {
  797. defer close(donec)
  798. wg.Wait()
  799. }()
  800. return donec
  801. }
  802. // joinSubstreams waits for all substream goroutines to complete.
  803. func (w *watchGrpcStream) joinSubstreams() {
  804. for _, ws := range w.substreams {
  805. <-ws.donec
  806. }
  807. for _, ws := range w.resuming {
  808. if ws != nil {
  809. <-ws.donec
  810. }
  811. }
  812. }
  813. var maxBackoff = 100 * time.Millisecond
  814. // openWatchClient retries opening a watch client until success or halt.
  815. // manually retry in case "ws==nil && err==nil"
  816. // TODO: remove FailFast=false
  817. func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  818. backoff := time.Millisecond
  819. for {
  820. select {
  821. case <-w.ctx.Done():
  822. if err == nil {
  823. return nil, w.ctx.Err()
  824. }
  825. return nil, err
  826. default:
  827. }
  828. if ws, err = w.remote.Watch(w.ctx, w.callOpts...); ws != nil && err == nil {
  829. break
  830. }
  831. if isHaltErr(w.ctx, err) {
  832. return nil, v3rpc.Error(err)
  833. }
  834. if isUnavailableErr(w.ctx, err) {
  835. // retry, but backoff
  836. if backoff < maxBackoff {
  837. // 25% backoff factor
  838. backoff = backoff + backoff/4
  839. if backoff > maxBackoff {
  840. backoff = maxBackoff
  841. }
  842. }
  843. time.Sleep(backoff)
  844. }
  845. }
  846. return ws, nil
  847. }
  848. // toPB converts an internal watch request structure to its protobuf WatchRequest structure.
  849. func (wr *watchRequest) toPB() *pb.WatchRequest {
  850. req := &pb.WatchCreateRequest{
  851. StartRevision: wr.rev,
  852. Key: []byte(wr.key),
  853. RangeEnd: []byte(wr.end),
  854. ProgressNotify: wr.progressNotify,
  855. Filters: wr.filters,
  856. PrevKv: wr.prevKV,
  857. Fragment: wr.fragment,
  858. }
  859. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  860. return &pb.WatchRequest{RequestUnion: cr}
  861. }
  862. // toPB converts an internal progress request structure to its protobuf WatchRequest structure.
  863. func (pr *progressRequest) toPB() *pb.WatchRequest {
  864. req := &pb.WatchProgressRequest{}
  865. cr := &pb.WatchRequest_ProgressRequest{ProgressRequest: req}
  866. return &pb.WatchRequest{RequestUnion: cr}
  867. }
  868. func streamKeyFromCtx(ctx context.Context) string {
  869. if md, ok := metadata.FromOutgoingContext(ctx); ok {
  870. return fmt.Sprintf("%+v", md)
  871. }
  872. return ""
  873. }