watch.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "fmt"
  18. "sync"
  19. "time"
  20. v3rpc "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
  21. pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
  22. mvccpb "go.etcd.io/etcd/mvcc/mvccpb"
  23. "google.golang.org/grpc"
  24. "google.golang.org/grpc/codes"
  25. "google.golang.org/grpc/metadata"
  26. "google.golang.org/grpc/status"
  27. )
  28. const (
  29. EventTypeDelete = mvccpb.DELETE
  30. EventTypePut = mvccpb.PUT
  31. closeSendErrTimeout = 250 * time.Millisecond
  32. )
  33. type Event mvccpb.Event
  34. type WatchChan <-chan WatchResponse
  35. type Watcher interface {
  36. // Watch watches on a key or prefix. The watched events will be returned
  37. // through the returned channel. If revisions waiting to be sent over the
  38. // watch are compacted, then the watch will be canceled by the server, the
  39. // client will post a compacted error watch response, and the channel will close.
  40. // If the context "ctx" is canceled or timed out, returned "WatchChan" is closed,
  41. // and "WatchResponse" from this closed channel has zero events and nil "Err()".
  42. // The context "ctx" MUST be canceled, as soon as watcher is no longer being used,
  43. // to release the associated resources.
  44. //
  45. // If the context is "context.Background/TODO", returned "WatchChan" will
  46. // not be closed and block until event is triggered, except when server
  47. // returns a non-recoverable error (e.g. ErrCompacted).
  48. // For example, when context passed with "WithRequireLeader" and the
  49. // connected server has no leader (e.g. due to network partition),
  50. // error "etcdserver: no leader" (ErrNoLeader) will be returned,
  51. // and then "WatchChan" is closed with non-nil "Err()".
  52. // In order to prevent a watch stream being stuck in a partitioned node,
  53. // make sure to wrap context with "WithRequireLeader".
  54. //
  55. // Otherwise, as long as the context has not been canceled or timed out,
  56. // watch will retry on other recoverable errors forever until reconnected.
  57. //
  58. // TODO: explicitly set context error in the last "WatchResponse" message and close channel?
  59. // Currently, client contexts are overwritten with "valCtx" that never closes.
  60. // TODO(v3.4): configure watch retry policy, limit maximum retry number
  61. // (see https://go.etcd.io/etcd/issues/8980)
  62. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  63. // RequestProgress requests a progress notify response be sent in all watch channels.
  64. RequestProgress(ctx context.Context) error
  65. // Close closes the watcher and cancels all watch requests.
  66. Close() error
  67. }
  68. type WatchResponse struct {
  69. Header pb.ResponseHeader
  70. Events []*Event
  71. // CompactRevision is the minimum revision the watcher may receive.
  72. CompactRevision int64
  73. // Canceled is used to indicate watch failure.
  74. // If the watch failed and the stream was about to close, before the channel is closed,
  75. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  76. Canceled bool
  77. // Created is used to indicate the creation of the watcher.
  78. Created bool
  79. closeErr error
  80. // cancelReason is a reason of canceling watch
  81. cancelReason string
  82. }
  83. // IsCreate returns true if the event tells that the key is newly created.
  84. func (e *Event) IsCreate() bool {
  85. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  86. }
  87. // IsModify returns true if the event tells that a new value is put on existing key.
  88. func (e *Event) IsModify() bool {
  89. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  90. }
  91. // Err is the error value if this WatchResponse holds an error.
  92. func (wr *WatchResponse) Err() error {
  93. switch {
  94. case wr.closeErr != nil:
  95. return v3rpc.Error(wr.closeErr)
  96. case wr.CompactRevision != 0:
  97. return v3rpc.ErrCompacted
  98. case wr.Canceled:
  99. if len(wr.cancelReason) != 0 {
  100. return v3rpc.Error(status.Error(codes.FailedPrecondition, wr.cancelReason))
  101. }
  102. return v3rpc.ErrFutureRev
  103. }
  104. return nil
  105. }
  106. // IsProgressNotify returns true if the WatchResponse is progress notification.
  107. func (wr *WatchResponse) IsProgressNotify() bool {
  108. return len(wr.Events) == 0 && !wr.Canceled && !wr.Created && wr.CompactRevision == 0 && wr.Header.Revision != 0
  109. }
  110. // watcher implements the Watcher interface
  111. type watcher struct {
  112. remote pb.WatchClient
  113. callOpts []grpc.CallOption
  114. // mu protects the grpc streams map
  115. mu sync.RWMutex
  116. // streams holds all the active grpc streams keyed by ctx value.
  117. streams map[string]*watchGrpcStream
  118. }
  119. // watchGrpcStream tracks all watch resources attached to a single grpc stream.
  120. type watchGrpcStream struct {
  121. owner *watcher
  122. remote pb.WatchClient
  123. callOpts []grpc.CallOption
  124. // ctx controls internal remote.Watch requests
  125. ctx context.Context
  126. // ctxKey is the key used when looking up this stream's context
  127. ctxKey string
  128. cancel context.CancelFunc
  129. // substreams holds all active watchers on this grpc stream
  130. substreams map[int64]*watcherStream
  131. // resuming holds all resuming watchers on this grpc stream
  132. resuming []*watcherStream
  133. // reqc sends a watch request from Watch() to the main goroutine
  134. reqc chan watchStreamRequest
  135. // respc receives data from the watch client
  136. respc chan *pb.WatchResponse
  137. // donec closes to broadcast shutdown
  138. donec chan struct{}
  139. // errc transmits errors from grpc Recv to the watch stream reconnect logic
  140. errc chan error
  141. // closingc gets the watcherStream of closing watchers
  142. closingc chan *watcherStream
  143. // wg is Done when all substream goroutines have exited
  144. wg sync.WaitGroup
  145. // resumec closes to signal that all substreams should begin resuming
  146. resumec chan struct{}
  147. // closeErr is the error that closed the watch stream
  148. closeErr error
  149. }
  150. // watchStreamRequest is a union of the supported watch request operation types
  151. type watchStreamRequest interface {
  152. toPB() *pb.WatchRequest
  153. }
  154. // watchRequest is issued by the subscriber to start a new watcher
  155. type watchRequest struct {
  156. ctx context.Context
  157. key string
  158. end string
  159. rev int64
  160. // send created notification event if this field is true
  161. createdNotify bool
  162. // progressNotify is for progress updates
  163. progressNotify bool
  164. // fragmentation should be disabled by default
  165. // if true, split watch events when total exceeds
  166. // "--max-request-bytes" flag value + 512-byte
  167. fragment bool
  168. // filters is the list of events to filter out
  169. filters []pb.WatchCreateRequest_FilterType
  170. // get the previous key-value pair before the event happens
  171. prevKV bool
  172. // retc receives a chan WatchResponse once the watcher is established
  173. retc chan chan WatchResponse
  174. }
  175. // progressRequest is issued by the subscriber to request watch progress
  176. type progressRequest struct {
  177. }
  178. // watcherStream represents a registered watcher
  179. type watcherStream struct {
  180. // initReq is the request that initiated this request
  181. initReq watchRequest
  182. // outc publishes watch responses to subscriber
  183. outc chan WatchResponse
  184. // recvc buffers watch responses before publishing
  185. recvc chan *WatchResponse
  186. // donec closes when the watcherStream goroutine stops.
  187. donec chan struct{}
  188. // closing is set to true when stream should be scheduled to shutdown.
  189. closing bool
  190. // id is the registered watch id on the grpc stream
  191. id int64
  192. // buf holds all events received from etcd but not yet consumed by the client
  193. buf []*WatchResponse
  194. }
  195. func NewWatcher(c *Client) Watcher {
  196. return NewWatchFromWatchClient(pb.NewWatchClient(c.conn), c)
  197. }
  198. func NewWatchFromWatchClient(wc pb.WatchClient, c *Client) Watcher {
  199. w := &watcher{
  200. remote: wc,
  201. streams: make(map[string]*watchGrpcStream),
  202. }
  203. if c != nil {
  204. w.callOpts = c.callOpts
  205. }
  206. return w
  207. }
  208. // never closes
  209. var valCtxCh = make(chan struct{})
  210. var zeroTime = time.Unix(0, 0)
  211. // ctx with only the values; never Done
  212. type valCtx struct{ context.Context }
  213. func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
  214. func (vc *valCtx) Done() <-chan struct{} { return valCtxCh }
  215. func (vc *valCtx) Err() error { return nil }
  216. func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
  217. ctx, cancel := context.WithCancel(&valCtx{inctx})
  218. wgs := &watchGrpcStream{
  219. owner: w,
  220. remote: w.remote,
  221. callOpts: w.callOpts,
  222. ctx: ctx,
  223. ctxKey: streamKeyFromCtx(inctx),
  224. cancel: cancel,
  225. substreams: make(map[int64]*watcherStream),
  226. respc: make(chan *pb.WatchResponse),
  227. reqc: make(chan watchStreamRequest),
  228. donec: make(chan struct{}),
  229. errc: make(chan error, 1),
  230. closingc: make(chan *watcherStream),
  231. resumec: make(chan struct{}),
  232. }
  233. go wgs.run()
  234. return wgs
  235. }
  236. // Watch posts a watch request to run() and waits for a new watcher channel
  237. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  238. ow := opWatch(key, opts...)
  239. var filters []pb.WatchCreateRequest_FilterType
  240. if ow.filterPut {
  241. filters = append(filters, pb.WatchCreateRequest_NOPUT)
  242. }
  243. if ow.filterDelete {
  244. filters = append(filters, pb.WatchCreateRequest_NODELETE)
  245. }
  246. wr := &watchRequest{
  247. ctx: ctx,
  248. createdNotify: ow.createdNotify,
  249. key: string(ow.key),
  250. end: string(ow.end),
  251. rev: ow.rev,
  252. progressNotify: ow.progressNotify,
  253. fragment: ow.fragment,
  254. filters: filters,
  255. prevKV: ow.prevKV,
  256. retc: make(chan chan WatchResponse, 1),
  257. }
  258. ok := false
  259. ctxKey := streamKeyFromCtx(ctx)
  260. // find or allocate appropriate grpc watch stream
  261. w.mu.Lock()
  262. if w.streams == nil {
  263. // closed
  264. w.mu.Unlock()
  265. ch := make(chan WatchResponse)
  266. close(ch)
  267. return ch
  268. }
  269. wgs := w.streams[ctxKey]
  270. if wgs == nil {
  271. wgs = w.newWatcherGrpcStream(ctx)
  272. w.streams[ctxKey] = wgs
  273. }
  274. donec := wgs.donec
  275. reqc := wgs.reqc
  276. w.mu.Unlock()
  277. // couldn't create channel; return closed channel
  278. closeCh := make(chan WatchResponse, 1)
  279. // submit request
  280. select {
  281. case reqc <- wr:
  282. ok = true
  283. case <-wr.ctx.Done():
  284. case <-donec:
  285. if wgs.closeErr != nil {
  286. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  287. break
  288. }
  289. // retry; may have dropped stream from no ctxs
  290. return w.Watch(ctx, key, opts...)
  291. }
  292. // receive channel
  293. if ok {
  294. select {
  295. case ret := <-wr.retc:
  296. return ret
  297. case <-ctx.Done():
  298. case <-donec:
  299. if wgs.closeErr != nil {
  300. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  301. break
  302. }
  303. // retry; may have dropped stream from no ctxs
  304. return w.Watch(ctx, key, opts...)
  305. }
  306. }
  307. close(closeCh)
  308. return closeCh
  309. }
  310. func (w *watcher) Close() (err error) {
  311. w.mu.Lock()
  312. streams := w.streams
  313. w.streams = nil
  314. w.mu.Unlock()
  315. for _, wgs := range streams {
  316. if werr := wgs.close(); werr != nil {
  317. err = werr
  318. }
  319. }
  320. return err
  321. }
  322. // RequestProgress requests a progress notify response be sent in all watch channels.
  323. func (w *watcher) RequestProgress(ctx context.Context) (err error) {
  324. ctxKey := streamKeyFromCtx(ctx)
  325. w.mu.Lock()
  326. if w.streams == nil {
  327. return fmt.Errorf("no stream found for context")
  328. }
  329. wgs := w.streams[ctxKey]
  330. if wgs == nil {
  331. wgs = w.newWatcherGrpcStream(ctx)
  332. w.streams[ctxKey] = wgs
  333. }
  334. donec := wgs.donec
  335. reqc := wgs.reqc
  336. w.mu.Unlock()
  337. pr := &progressRequest{}
  338. select {
  339. case reqc <- pr:
  340. return nil
  341. case <-ctx.Done():
  342. if err == nil {
  343. return ctx.Err()
  344. }
  345. return err
  346. case <-donec:
  347. if wgs.closeErr != nil {
  348. return wgs.closeErr
  349. }
  350. // retry; may have dropped stream from no ctxs
  351. return w.RequestProgress(ctx)
  352. }
  353. }
  354. func (w *watchGrpcStream) close() (err error) {
  355. w.cancel()
  356. <-w.donec
  357. select {
  358. case err = <-w.errc:
  359. default:
  360. }
  361. return toErr(w.ctx, err)
  362. }
  363. func (w *watcher) closeStream(wgs *watchGrpcStream) {
  364. w.mu.Lock()
  365. close(wgs.donec)
  366. wgs.cancel()
  367. if w.streams != nil {
  368. delete(w.streams, wgs.ctxKey)
  369. }
  370. w.mu.Unlock()
  371. }
  372. func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
  373. // check watch ID for backward compatibility (<= v3.3)
  374. if resp.WatchId == -1 || (resp.Canceled && resp.CancelReason != "") {
  375. // failed; no channel
  376. close(ws.recvc)
  377. return
  378. }
  379. ws.id = resp.WatchId
  380. w.substreams[ws.id] = ws
  381. }
  382. func (w *watchGrpcStream) sendCloseSubstream(ws *watcherStream, resp *WatchResponse) {
  383. select {
  384. case ws.outc <- *resp:
  385. case <-ws.initReq.ctx.Done():
  386. case <-time.After(closeSendErrTimeout):
  387. }
  388. close(ws.outc)
  389. }
  390. func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
  391. // send channel response in case stream was never established
  392. select {
  393. case ws.initReq.retc <- ws.outc:
  394. default:
  395. }
  396. // close subscriber's channel
  397. if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
  398. go w.sendCloseSubstream(ws, &WatchResponse{closeErr: w.closeErr})
  399. } else if ws.outc != nil {
  400. close(ws.outc)
  401. }
  402. if ws.id != -1 {
  403. delete(w.substreams, ws.id)
  404. return
  405. }
  406. for i := range w.resuming {
  407. if w.resuming[i] == ws {
  408. w.resuming[i] = nil
  409. return
  410. }
  411. }
  412. }
  413. // run is the root of the goroutines for managing a watcher client
  414. func (w *watchGrpcStream) run() {
  415. var wc pb.Watch_WatchClient
  416. var closeErr error
  417. // substreams marked to close but goroutine still running; needed for
  418. // avoiding double-closing recvc on grpc stream teardown
  419. closing := make(map[*watcherStream]struct{})
  420. defer func() {
  421. w.closeErr = closeErr
  422. // shutdown substreams and resuming substreams
  423. for _, ws := range w.substreams {
  424. if _, ok := closing[ws]; !ok {
  425. close(ws.recvc)
  426. closing[ws] = struct{}{}
  427. }
  428. }
  429. for _, ws := range w.resuming {
  430. if _, ok := closing[ws]; ws != nil && !ok {
  431. close(ws.recvc)
  432. closing[ws] = struct{}{}
  433. }
  434. }
  435. w.joinSubstreams()
  436. for range closing {
  437. w.closeSubstream(<-w.closingc)
  438. }
  439. w.wg.Wait()
  440. w.owner.closeStream(w)
  441. }()
  442. // start a stream with the etcd grpc server
  443. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  444. return
  445. }
  446. cancelSet := make(map[int64]struct{})
  447. var cur *pb.WatchResponse
  448. for {
  449. select {
  450. // Watch() requested
  451. case req := <-w.reqc:
  452. switch wreq := req.(type) {
  453. case *watchRequest:
  454. outc := make(chan WatchResponse, 1)
  455. // TODO: pass custom watch ID?
  456. ws := &watcherStream{
  457. initReq: *wreq,
  458. id: -1,
  459. outc: outc,
  460. // unbuffered so resumes won't cause repeat events
  461. recvc: make(chan *WatchResponse),
  462. }
  463. ws.donec = make(chan struct{})
  464. w.wg.Add(1)
  465. go w.serveSubstream(ws, w.resumec)
  466. // queue up for watcher creation/resume
  467. w.resuming = append(w.resuming, ws)
  468. if len(w.resuming) == 1 {
  469. // head of resume queue, can register a new watcher
  470. wc.Send(ws.initReq.toPB())
  471. }
  472. case *progressRequest:
  473. wc.Send(wreq.toPB())
  474. }
  475. // new events from the watch client
  476. case pbresp := <-w.respc:
  477. if cur == nil || pbresp.Created || pbresp.Canceled {
  478. cur = pbresp
  479. } else if cur != nil && cur.WatchId == pbresp.WatchId {
  480. // merge new events
  481. cur.Events = append(cur.Events, pbresp.Events...)
  482. // update "Fragment" field; last response with "Fragment" == false
  483. cur.Fragment = pbresp.Fragment
  484. }
  485. switch {
  486. case pbresp.Created:
  487. // response to head of queue creation
  488. if ws := w.resuming[0]; ws != nil {
  489. w.addSubstream(pbresp, ws)
  490. w.dispatchEvent(pbresp)
  491. w.resuming[0] = nil
  492. }
  493. if ws := w.nextResume(); ws != nil {
  494. wc.Send(ws.initReq.toPB())
  495. }
  496. // reset for next iteration
  497. cur = nil
  498. case pbresp.Canceled && pbresp.CompactRevision == 0:
  499. delete(cancelSet, pbresp.WatchId)
  500. if ws, ok := w.substreams[pbresp.WatchId]; ok {
  501. // signal to stream goroutine to update closingc
  502. close(ws.recvc)
  503. closing[ws] = struct{}{}
  504. }
  505. // reset for next iteration
  506. cur = nil
  507. case cur.Fragment:
  508. // watch response events are still fragmented
  509. // continue to fetch next fragmented event arrival
  510. continue
  511. default:
  512. // dispatch to appropriate watch stream
  513. ok := w.dispatchEvent(cur)
  514. // reset for next iteration
  515. cur = nil
  516. if ok {
  517. break
  518. }
  519. // watch response on unexpected watch id; cancel id
  520. if _, ok := cancelSet[pbresp.WatchId]; ok {
  521. break
  522. }
  523. cancelSet[pbresp.WatchId] = struct{}{}
  524. cr := &pb.WatchRequest_CancelRequest{
  525. CancelRequest: &pb.WatchCancelRequest{
  526. WatchId: pbresp.WatchId,
  527. },
  528. }
  529. req := &pb.WatchRequest{RequestUnion: cr}
  530. wc.Send(req)
  531. }
  532. // watch client failed on Recv; spawn another if possible
  533. case err := <-w.errc:
  534. if isHaltErr(w.ctx, err) || toErr(w.ctx, err) == v3rpc.ErrNoLeader {
  535. closeErr = err
  536. return
  537. }
  538. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  539. return
  540. }
  541. if ws := w.nextResume(); ws != nil {
  542. wc.Send(ws.initReq.toPB())
  543. }
  544. cancelSet = make(map[int64]struct{})
  545. case <-w.ctx.Done():
  546. return
  547. case ws := <-w.closingc:
  548. w.closeSubstream(ws)
  549. delete(closing, ws)
  550. // no more watchers on this stream, shutdown
  551. if len(w.substreams)+len(w.resuming) == 0 {
  552. return
  553. }
  554. }
  555. }
  556. }
  557. // nextResume chooses the next resuming to register with the grpc stream. Abandoned
  558. // streams are marked as nil in the queue since the head must wait for its inflight registration.
  559. func (w *watchGrpcStream) nextResume() *watcherStream {
  560. for len(w.resuming) != 0 {
  561. if w.resuming[0] != nil {
  562. return w.resuming[0]
  563. }
  564. w.resuming = w.resuming[1:len(w.resuming)]
  565. }
  566. return nil
  567. }
  568. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  569. func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
  570. events := make([]*Event, len(pbresp.Events))
  571. for i, ev := range pbresp.Events {
  572. events[i] = (*Event)(ev)
  573. }
  574. // TODO: return watch ID?
  575. wr := &WatchResponse{
  576. Header: *pbresp.Header,
  577. Events: events,
  578. CompactRevision: pbresp.CompactRevision,
  579. Created: pbresp.Created,
  580. Canceled: pbresp.Canceled,
  581. cancelReason: pbresp.CancelReason,
  582. }
  583. // watch IDs are zero indexed, so request notify watch responses are assigned a watch ID of -1 to
  584. // indicate they should be broadcast.
  585. if wr.IsProgressNotify() && pbresp.WatchId == -1 {
  586. return w.broadcastResponse(wr)
  587. }
  588. return w.unicastResponse(wr, pbresp.WatchId)
  589. }
  590. // broadcastResponse send a watch response to all watch substreams.
  591. func (w *watchGrpcStream) broadcastResponse(wr *WatchResponse) bool {
  592. for _, ws := range w.substreams {
  593. select {
  594. case ws.recvc <- wr:
  595. case <-ws.donec:
  596. }
  597. }
  598. return true
  599. }
  600. // unicastResponse sends a watch response to a specific watch substream.
  601. func (w *watchGrpcStream) unicastResponse(wr *WatchResponse, watchId int64) bool {
  602. ws, ok := w.substreams[watchId]
  603. if !ok {
  604. return false
  605. }
  606. select {
  607. case ws.recvc <- wr:
  608. case <-ws.donec:
  609. return false
  610. }
  611. return true
  612. }
  613. // serveWatchClient forwards messages from the grpc stream to run()
  614. func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
  615. for {
  616. resp, err := wc.Recv()
  617. if err != nil {
  618. select {
  619. case w.errc <- err:
  620. case <-w.donec:
  621. }
  622. return
  623. }
  624. select {
  625. case w.respc <- resp:
  626. case <-w.donec:
  627. return
  628. }
  629. }
  630. }
  631. // serveSubstream forwards watch responses from run() to the subscriber
  632. func (w *watchGrpcStream) serveSubstream(ws *watcherStream, resumec chan struct{}) {
  633. if ws.closing {
  634. panic("created substream goroutine but substream is closing")
  635. }
  636. // nextRev is the minimum expected next revision
  637. nextRev := ws.initReq.rev
  638. resuming := false
  639. defer func() {
  640. if !resuming {
  641. ws.closing = true
  642. }
  643. close(ws.donec)
  644. if !resuming {
  645. w.closingc <- ws
  646. }
  647. w.wg.Done()
  648. }()
  649. emptyWr := &WatchResponse{}
  650. for {
  651. curWr := emptyWr
  652. outc := ws.outc
  653. if len(ws.buf) > 0 {
  654. curWr = ws.buf[0]
  655. } else {
  656. outc = nil
  657. }
  658. select {
  659. case outc <- *curWr:
  660. if ws.buf[0].Err() != nil {
  661. return
  662. }
  663. ws.buf[0] = nil
  664. ws.buf = ws.buf[1:]
  665. case wr, ok := <-ws.recvc:
  666. if !ok {
  667. // shutdown from closeSubstream
  668. return
  669. }
  670. if wr.Created {
  671. if ws.initReq.retc != nil {
  672. ws.initReq.retc <- ws.outc
  673. // to prevent next write from taking the slot in buffered channel
  674. // and posting duplicate create events
  675. ws.initReq.retc = nil
  676. // send first creation event only if requested
  677. if ws.initReq.createdNotify {
  678. ws.outc <- *wr
  679. }
  680. // once the watch channel is returned, a current revision
  681. // watch must resume at the store revision. This is necessary
  682. // for the following case to work as expected:
  683. // wch := m1.Watch("a")
  684. // m2.Put("a", "b")
  685. // <-wch
  686. // If the revision is only bound on the first observed event,
  687. // if wch is disconnected before the Put is issued, then reconnects
  688. // after it is committed, it'll miss the Put.
  689. if ws.initReq.rev == 0 {
  690. nextRev = wr.Header.Revision
  691. }
  692. }
  693. } else {
  694. // current progress of watch; <= store revision
  695. nextRev = wr.Header.Revision
  696. }
  697. if len(wr.Events) > 0 {
  698. nextRev = wr.Events[len(wr.Events)-1].Kv.ModRevision + 1
  699. }
  700. ws.initReq.rev = nextRev
  701. // created event is already sent above,
  702. // watcher should not post duplicate events
  703. if wr.Created {
  704. continue
  705. }
  706. // TODO pause channel if buffer gets too large
  707. ws.buf = append(ws.buf, wr)
  708. case <-w.ctx.Done():
  709. return
  710. case <-ws.initReq.ctx.Done():
  711. return
  712. case <-resumec:
  713. resuming = true
  714. return
  715. }
  716. }
  717. // lazily send cancel message if events on missing id
  718. }
  719. func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
  720. // mark all substreams as resuming
  721. close(w.resumec)
  722. w.resumec = make(chan struct{})
  723. w.joinSubstreams()
  724. for _, ws := range w.substreams {
  725. ws.id = -1
  726. w.resuming = append(w.resuming, ws)
  727. }
  728. // strip out nils, if any
  729. var resuming []*watcherStream
  730. for _, ws := range w.resuming {
  731. if ws != nil {
  732. resuming = append(resuming, ws)
  733. }
  734. }
  735. w.resuming = resuming
  736. w.substreams = make(map[int64]*watcherStream)
  737. // connect to grpc stream while accepting watcher cancelation
  738. stopc := make(chan struct{})
  739. donec := w.waitCancelSubstreams(stopc)
  740. wc, err := w.openWatchClient()
  741. close(stopc)
  742. <-donec
  743. // serve all non-closing streams, even if there's a client error
  744. // so that the teardown path can shutdown the streams as expected.
  745. for _, ws := range w.resuming {
  746. if ws.closing {
  747. continue
  748. }
  749. ws.donec = make(chan struct{})
  750. w.wg.Add(1)
  751. go w.serveSubstream(ws, w.resumec)
  752. }
  753. if err != nil {
  754. return nil, v3rpc.Error(err)
  755. }
  756. // receive data from new grpc stream
  757. go w.serveWatchClient(wc)
  758. return wc, nil
  759. }
  760. func (w *watchGrpcStream) waitCancelSubstreams(stopc <-chan struct{}) <-chan struct{} {
  761. var wg sync.WaitGroup
  762. wg.Add(len(w.resuming))
  763. donec := make(chan struct{})
  764. for i := range w.resuming {
  765. go func(ws *watcherStream) {
  766. defer wg.Done()
  767. if ws.closing {
  768. if ws.initReq.ctx.Err() != nil && ws.outc != nil {
  769. close(ws.outc)
  770. ws.outc = nil
  771. }
  772. return
  773. }
  774. select {
  775. case <-ws.initReq.ctx.Done():
  776. // closed ws will be removed from resuming
  777. ws.closing = true
  778. close(ws.outc)
  779. ws.outc = nil
  780. w.wg.Add(1)
  781. go func() {
  782. defer w.wg.Done()
  783. w.closingc <- ws
  784. }()
  785. case <-stopc:
  786. }
  787. }(w.resuming[i])
  788. }
  789. go func() {
  790. defer close(donec)
  791. wg.Wait()
  792. }()
  793. return donec
  794. }
  795. // joinSubstreams waits for all substream goroutines to complete.
  796. func (w *watchGrpcStream) joinSubstreams() {
  797. for _, ws := range w.substreams {
  798. <-ws.donec
  799. }
  800. for _, ws := range w.resuming {
  801. if ws != nil {
  802. <-ws.donec
  803. }
  804. }
  805. }
  806. var maxBackoff = 100 * time.Millisecond
  807. // openWatchClient retries opening a watch client until success or halt.
  808. // manually retry in case "ws==nil && err==nil"
  809. // TODO: remove FailFast=false
  810. func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  811. backoff := time.Millisecond
  812. for {
  813. select {
  814. case <-w.ctx.Done():
  815. if err == nil {
  816. return nil, w.ctx.Err()
  817. }
  818. return nil, err
  819. default:
  820. }
  821. if ws, err = w.remote.Watch(w.ctx, w.callOpts...); ws != nil && err == nil {
  822. break
  823. }
  824. if isHaltErr(w.ctx, err) {
  825. return nil, v3rpc.Error(err)
  826. }
  827. if isUnavailableErr(w.ctx, err) {
  828. // retry, but backoff
  829. if backoff < maxBackoff {
  830. // 25% backoff factor
  831. backoff = backoff + backoff/4
  832. if backoff > maxBackoff {
  833. backoff = maxBackoff
  834. }
  835. }
  836. time.Sleep(backoff)
  837. }
  838. }
  839. return ws, nil
  840. }
  841. // toPB converts an internal watch request structure to its protobuf WatchRequest structure.
  842. func (wr *watchRequest) toPB() *pb.WatchRequest {
  843. req := &pb.WatchCreateRequest{
  844. StartRevision: wr.rev,
  845. Key: []byte(wr.key),
  846. RangeEnd: []byte(wr.end),
  847. ProgressNotify: wr.progressNotify,
  848. Filters: wr.filters,
  849. PrevKv: wr.prevKV,
  850. Fragment: wr.fragment,
  851. }
  852. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  853. return &pb.WatchRequest{RequestUnion: cr}
  854. }
  855. // toPB converts an internal progress request structure to its protobuf WatchRequest structure.
  856. func (pr *progressRequest) toPB() *pb.WatchRequest {
  857. req := &pb.WatchProgressRequest{}
  858. cr := &pb.WatchRequest_ProgressRequest{ProgressRequest: req}
  859. return &pb.WatchRequest{RequestUnion: cr}
  860. }
  861. func streamKeyFromCtx(ctx context.Context) string {
  862. if md, ok := metadata.FromOutgoingContext(ctx); ok {
  863. return fmt.Sprintf("%+v", md)
  864. }
  865. return ""
  866. }