watch.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "fmt"
  18. "sync"
  19. "time"
  20. v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  21. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  22. mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
  23. "google.golang.org/grpc"
  24. "google.golang.org/grpc/codes"
  25. "google.golang.org/grpc/metadata"
  26. "google.golang.org/grpc/status"
  27. )
  28. const (
  29. EventTypeDelete = mvccpb.DELETE
  30. EventTypePut = mvccpb.PUT
  31. closeSendErrTimeout = 250 * time.Millisecond
  32. )
  33. type Event mvccpb.Event
  34. type WatchChan <-chan WatchResponse
  35. type Watcher interface {
  36. // Watch watches on a key or prefix. The watched events will be returned
  37. // through the returned channel. If revisions waiting to be sent over the
  38. // watch are compacted, then the watch will be canceled by the server, the
  39. // client will post a compacted error watch response, and the channel will close.
  40. // If the context "ctx" is canceled or timed out, returned "WatchChan" is closed,
  41. // and "WatchResponse" from this closed channel has zero events and nil "Err()".
  42. // The context "ctx" MUST be canceled, as soon as watcher is no longer being used,
  43. // to release the associated resources.
  44. // If the context is "context.Background/TODO", returned "WatchChan" will not be closed
  45. // and wait until events happen, except when server returns a non-recoverable error.
  46. // For example, when context passed with "WithRequireLeader" and the connected server
  47. // has no leader, error "etcdserver: no leader" is returned, and then "WatchChan" is
  48. // closed with non-nil "Err()".
  49. // Otherwise, as long as the context has not been canceled or timed out, watch will
  50. // retry on other recoverable errors forever until reconnected.
  51. //
  52. // TODO: explicitly set context error in the last "WatchResponse" message and close channel?
  53. // Currently, client contexts are overwritten with "valCtx" that never closes.
  54. // TODO(v3.4): configure watch retry policy, limit maximum retry number
  55. // (see https://github.com/coreos/etcd/issues/8980)
  56. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  57. // Close closes the watcher and cancels all watch requests.
  58. Close() error
  59. }
  60. type WatchResponse struct {
  61. Header pb.ResponseHeader
  62. Events []*Event
  63. // CompactRevision is the minimum revision the watcher may receive.
  64. CompactRevision int64
  65. // Canceled is used to indicate watch failure.
  66. // If the watch failed and the stream was about to close, before the channel is closed,
  67. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  68. Canceled bool
  69. // Created is used to indicate the creation of the watcher.
  70. Created bool
  71. closeErr error
  72. // cancelReason is a reason of canceling watch
  73. cancelReason string
  74. }
  75. // IsCreate returns true if the event tells that the key is newly created.
  76. func (e *Event) IsCreate() bool {
  77. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  78. }
  79. // IsModify returns true if the event tells that a new value is put on existing key.
  80. func (e *Event) IsModify() bool {
  81. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  82. }
  83. // Err is the error value if this WatchResponse holds an error.
  84. func (wr *WatchResponse) Err() error {
  85. switch {
  86. case wr.closeErr != nil:
  87. return v3rpc.Error(wr.closeErr)
  88. case wr.CompactRevision != 0:
  89. return v3rpc.ErrCompacted
  90. case wr.Canceled:
  91. if len(wr.cancelReason) != 0 {
  92. return v3rpc.Error(status.Error(codes.FailedPrecondition, wr.cancelReason))
  93. }
  94. return v3rpc.ErrFutureRev
  95. }
  96. return nil
  97. }
  98. // IsProgressNotify returns true if the WatchResponse is progress notification.
  99. func (wr *WatchResponse) IsProgressNotify() bool {
  100. return len(wr.Events) == 0 && !wr.Canceled && !wr.Created && wr.CompactRevision == 0 && wr.Header.Revision != 0
  101. }
  102. // watcher implements the Watcher interface
  103. type watcher struct {
  104. remote pb.WatchClient
  105. callOpts []grpc.CallOption
  106. // mu protects the grpc streams map
  107. mu sync.RWMutex
  108. // streams holds all the active grpc streams keyed by ctx value.
  109. streams map[string]*watchGrpcStream
  110. }
  111. // watchGrpcStream tracks all watch resources attached to a single grpc stream.
  112. type watchGrpcStream struct {
  113. owner *watcher
  114. remote pb.WatchClient
  115. callOpts []grpc.CallOption
  116. // ctx controls internal remote.Watch requests
  117. ctx context.Context
  118. // ctxKey is the key used when looking up this stream's context
  119. ctxKey string
  120. cancel context.CancelFunc
  121. // substreams holds all active watchers on this grpc stream
  122. substreams map[int64]*watcherStream
  123. // resuming holds all resuming watchers on this grpc stream
  124. resuming []*watcherStream
  125. // reqc sends a watch request from Watch() to the main goroutine
  126. reqc chan *watchRequest
  127. // respc receives data from the watch client
  128. respc chan *pb.WatchResponse
  129. // donec closes to broadcast shutdown
  130. donec chan struct{}
  131. // errc transmits errors from grpc Recv to the watch stream reconnect logic
  132. errc chan error
  133. // closingc gets the watcherStream of closing watchers
  134. closingc chan *watcherStream
  135. // wg is Done when all substream goroutines have exited
  136. wg sync.WaitGroup
  137. // resumec closes to signal that all substreams should begin resuming
  138. resumec chan struct{}
  139. // closeErr is the error that closed the watch stream
  140. closeErr error
  141. }
  142. // watchRequest is issued by the subscriber to start a new watcher
  143. type watchRequest struct {
  144. ctx context.Context
  145. key string
  146. end string
  147. rev int64
  148. // send created notification event if this field is true
  149. createdNotify bool
  150. // progressNotify is for progress updates
  151. progressNotify bool
  152. // fragmentation should be disabled by default
  153. // if true, split watch events when total exceeds
  154. // "--max-request-bytes" flag value + 512-byte
  155. fragment bool
  156. // filters is the list of events to filter out
  157. filters []pb.WatchCreateRequest_FilterType
  158. // get the previous key-value pair before the event happens
  159. prevKV bool
  160. // retc receives a chan WatchResponse once the watcher is established
  161. retc chan chan WatchResponse
  162. }
  163. // watcherStream represents a registered watcher
  164. type watcherStream struct {
  165. // initReq is the request that initiated this request
  166. initReq watchRequest
  167. // outc publishes watch responses to subscriber
  168. outc chan WatchResponse
  169. // recvc buffers watch responses before publishing
  170. recvc chan *WatchResponse
  171. // donec closes when the watcherStream goroutine stops.
  172. donec chan struct{}
  173. // closing is set to true when stream should be scheduled to shutdown.
  174. closing bool
  175. // id is the registered watch id on the grpc stream
  176. id int64
  177. // buf holds all events received from etcd but not yet consumed by the client
  178. buf []*WatchResponse
  179. }
  180. func NewWatcher(c *Client) Watcher {
  181. return NewWatchFromWatchClient(pb.NewWatchClient(c.conn), c)
  182. }
  183. func NewWatchFromWatchClient(wc pb.WatchClient, c *Client) Watcher {
  184. w := &watcher{
  185. remote: wc,
  186. streams: make(map[string]*watchGrpcStream),
  187. }
  188. if c != nil {
  189. w.callOpts = c.callOpts
  190. }
  191. return w
  192. }
  193. // never closes
  194. var valCtxCh = make(chan struct{})
  195. var zeroTime = time.Unix(0, 0)
  196. // ctx with only the values; never Done
  197. type valCtx struct{ context.Context }
  198. func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
  199. func (vc *valCtx) Done() <-chan struct{} { return valCtxCh }
  200. func (vc *valCtx) Err() error { return nil }
  201. func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
  202. ctx, cancel := context.WithCancel(&valCtx{inctx})
  203. wgs := &watchGrpcStream{
  204. owner: w,
  205. remote: w.remote,
  206. callOpts: w.callOpts,
  207. ctx: ctx,
  208. ctxKey: streamKeyFromCtx(inctx),
  209. cancel: cancel,
  210. substreams: make(map[int64]*watcherStream),
  211. respc: make(chan *pb.WatchResponse),
  212. reqc: make(chan *watchRequest),
  213. donec: make(chan struct{}),
  214. errc: make(chan error, 1),
  215. closingc: make(chan *watcherStream),
  216. resumec: make(chan struct{}),
  217. }
  218. go wgs.run()
  219. return wgs
  220. }
  221. // Watch posts a watch request to run() and waits for a new watcher channel
  222. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  223. ow := opWatch(key, opts...)
  224. var filters []pb.WatchCreateRequest_FilterType
  225. if ow.filterPut {
  226. filters = append(filters, pb.WatchCreateRequest_NOPUT)
  227. }
  228. if ow.filterDelete {
  229. filters = append(filters, pb.WatchCreateRequest_NODELETE)
  230. }
  231. wr := &watchRequest{
  232. ctx: ctx,
  233. createdNotify: ow.createdNotify,
  234. key: string(ow.key),
  235. end: string(ow.end),
  236. rev: ow.rev,
  237. progressNotify: ow.progressNotify,
  238. fragment: ow.fragment,
  239. filters: filters,
  240. prevKV: ow.prevKV,
  241. retc: make(chan chan WatchResponse, 1),
  242. }
  243. ok := false
  244. ctxKey := streamKeyFromCtx(ctx)
  245. // find or allocate appropriate grpc watch stream
  246. w.mu.Lock()
  247. if w.streams == nil {
  248. // closed
  249. w.mu.Unlock()
  250. ch := make(chan WatchResponse)
  251. close(ch)
  252. return ch
  253. }
  254. wgs := w.streams[ctxKey]
  255. if wgs == nil {
  256. wgs = w.newWatcherGrpcStream(ctx)
  257. w.streams[ctxKey] = wgs
  258. }
  259. donec := wgs.donec
  260. reqc := wgs.reqc
  261. w.mu.Unlock()
  262. // couldn't create channel; return closed channel
  263. closeCh := make(chan WatchResponse, 1)
  264. // submit request
  265. select {
  266. case reqc <- wr:
  267. ok = true
  268. case <-wr.ctx.Done():
  269. case <-donec:
  270. if wgs.closeErr != nil {
  271. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  272. break
  273. }
  274. // retry; may have dropped stream from no ctxs
  275. return w.Watch(ctx, key, opts...)
  276. }
  277. // receive channel
  278. if ok {
  279. select {
  280. case ret := <-wr.retc:
  281. return ret
  282. case <-ctx.Done():
  283. case <-donec:
  284. if wgs.closeErr != nil {
  285. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  286. break
  287. }
  288. // retry; may have dropped stream from no ctxs
  289. return w.Watch(ctx, key, opts...)
  290. }
  291. }
  292. close(closeCh)
  293. return closeCh
  294. }
  295. func (w *watcher) Close() (err error) {
  296. w.mu.Lock()
  297. streams := w.streams
  298. w.streams = nil
  299. w.mu.Unlock()
  300. for _, wgs := range streams {
  301. if werr := wgs.close(); werr != nil {
  302. err = werr
  303. }
  304. }
  305. return err
  306. }
  307. func (w *watchGrpcStream) close() (err error) {
  308. w.cancel()
  309. <-w.donec
  310. select {
  311. case err = <-w.errc:
  312. default:
  313. }
  314. return toErr(w.ctx, err)
  315. }
  316. func (w *watcher) closeStream(wgs *watchGrpcStream) {
  317. w.mu.Lock()
  318. close(wgs.donec)
  319. wgs.cancel()
  320. if w.streams != nil {
  321. delete(w.streams, wgs.ctxKey)
  322. }
  323. w.mu.Unlock()
  324. }
  325. func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
  326. // check watch ID for backward compatibility (<= v3.3)
  327. if resp.WatchId == -1 || (resp.Canceled && resp.CancelReason != "") {
  328. // failed; no channel
  329. close(ws.recvc)
  330. return
  331. }
  332. ws.id = resp.WatchId
  333. w.substreams[ws.id] = ws
  334. }
  335. func (w *watchGrpcStream) sendCloseSubstream(ws *watcherStream, resp *WatchResponse) {
  336. select {
  337. case ws.outc <- *resp:
  338. case <-ws.initReq.ctx.Done():
  339. case <-time.After(closeSendErrTimeout):
  340. }
  341. close(ws.outc)
  342. }
  343. func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
  344. // send channel response in case stream was never established
  345. select {
  346. case ws.initReq.retc <- ws.outc:
  347. default:
  348. }
  349. // close subscriber's channel
  350. if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
  351. go w.sendCloseSubstream(ws, &WatchResponse{closeErr: w.closeErr})
  352. } else if ws.outc != nil {
  353. close(ws.outc)
  354. }
  355. if ws.id != -1 {
  356. delete(w.substreams, ws.id)
  357. return
  358. }
  359. for i := range w.resuming {
  360. if w.resuming[i] == ws {
  361. w.resuming[i] = nil
  362. return
  363. }
  364. }
  365. }
  366. // run is the root of the goroutines for managing a watcher client
  367. func (w *watchGrpcStream) run() {
  368. var wc pb.Watch_WatchClient
  369. var closeErr error
  370. // substreams marked to close but goroutine still running; needed for
  371. // avoiding double-closing recvc on grpc stream teardown
  372. closing := make(map[*watcherStream]struct{})
  373. defer func() {
  374. w.closeErr = closeErr
  375. // shutdown substreams and resuming substreams
  376. for _, ws := range w.substreams {
  377. if _, ok := closing[ws]; !ok {
  378. close(ws.recvc)
  379. closing[ws] = struct{}{}
  380. }
  381. }
  382. for _, ws := range w.resuming {
  383. if _, ok := closing[ws]; ws != nil && !ok {
  384. close(ws.recvc)
  385. closing[ws] = struct{}{}
  386. }
  387. }
  388. w.joinSubstreams()
  389. for range closing {
  390. w.closeSubstream(<-w.closingc)
  391. }
  392. w.wg.Wait()
  393. w.owner.closeStream(w)
  394. }()
  395. // start a stream with the etcd grpc server
  396. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  397. return
  398. }
  399. cancelSet := make(map[int64]struct{})
  400. var cur *pb.WatchResponse
  401. for {
  402. select {
  403. // Watch() requested
  404. case wreq := <-w.reqc:
  405. outc := make(chan WatchResponse, 1)
  406. // TODO: pass custom watch ID?
  407. ws := &watcherStream{
  408. initReq: *wreq,
  409. id: -1,
  410. outc: outc,
  411. // unbuffered so resumes won't cause repeat events
  412. recvc: make(chan *WatchResponse),
  413. }
  414. ws.donec = make(chan struct{})
  415. w.wg.Add(1)
  416. go w.serveSubstream(ws, w.resumec)
  417. // queue up for watcher creation/resume
  418. w.resuming = append(w.resuming, ws)
  419. if len(w.resuming) == 1 {
  420. // head of resume queue, can register a new watcher
  421. wc.Send(ws.initReq.toPB())
  422. }
  423. // new events from the watch client
  424. case pbresp := <-w.respc:
  425. if cur == nil || pbresp.Created || pbresp.Canceled {
  426. cur = pbresp
  427. } else if cur != nil && cur.WatchId == pbresp.WatchId {
  428. // merge new events
  429. cur.Events = append(cur.Events, pbresp.Events...)
  430. // update "Fragment" field; last response with "Fragment" == false
  431. cur.Fragment = pbresp.Fragment
  432. }
  433. switch {
  434. case pbresp.Created:
  435. // response to head of queue creation
  436. if ws := w.resuming[0]; ws != nil {
  437. w.addSubstream(pbresp, ws)
  438. w.dispatchEvent(pbresp)
  439. w.resuming[0] = nil
  440. }
  441. if ws := w.nextResume(); ws != nil {
  442. wc.Send(ws.initReq.toPB())
  443. }
  444. // reset for next iteration
  445. cur = nil
  446. case pbresp.Canceled && pbresp.CompactRevision == 0:
  447. delete(cancelSet, pbresp.WatchId)
  448. if ws, ok := w.substreams[pbresp.WatchId]; ok {
  449. // signal to stream goroutine to update closingc
  450. close(ws.recvc)
  451. closing[ws] = struct{}{}
  452. }
  453. // reset for next iteration
  454. cur = nil
  455. case cur.Fragment:
  456. // watch response events are still fragmented
  457. // continue to fetch next fragmented event arrival
  458. continue
  459. default:
  460. // dispatch to appropriate watch stream
  461. ok := w.dispatchEvent(cur)
  462. // reset for next iteration
  463. cur = nil
  464. if ok {
  465. break
  466. }
  467. // watch response on unexpected watch id; cancel id
  468. if _, ok := cancelSet[pbresp.WatchId]; ok {
  469. break
  470. }
  471. cancelSet[pbresp.WatchId] = struct{}{}
  472. cr := &pb.WatchRequest_CancelRequest{
  473. CancelRequest: &pb.WatchCancelRequest{
  474. WatchId: pbresp.WatchId,
  475. },
  476. }
  477. req := &pb.WatchRequest{RequestUnion: cr}
  478. wc.Send(req)
  479. }
  480. // watch client failed on Recv; spawn another if possible
  481. case err := <-w.errc:
  482. if isHaltErr(w.ctx, err) || toErr(w.ctx, err) == v3rpc.ErrNoLeader {
  483. closeErr = err
  484. return
  485. }
  486. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  487. return
  488. }
  489. if ws := w.nextResume(); ws != nil {
  490. wc.Send(ws.initReq.toPB())
  491. }
  492. cancelSet = make(map[int64]struct{})
  493. case <-w.ctx.Done():
  494. return
  495. case ws := <-w.closingc:
  496. w.closeSubstream(ws)
  497. delete(closing, ws)
  498. // no more watchers on this stream, shutdown
  499. if len(w.substreams)+len(w.resuming) == 0 {
  500. return
  501. }
  502. }
  503. }
  504. }
  505. // nextResume chooses the next resuming to register with the grpc stream. Abandoned
  506. // streams are marked as nil in the queue since the head must wait for its inflight registration.
  507. func (w *watchGrpcStream) nextResume() *watcherStream {
  508. for len(w.resuming) != 0 {
  509. if w.resuming[0] != nil {
  510. return w.resuming[0]
  511. }
  512. w.resuming = w.resuming[1:len(w.resuming)]
  513. }
  514. return nil
  515. }
  516. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  517. func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
  518. events := make([]*Event, len(pbresp.Events))
  519. for i, ev := range pbresp.Events {
  520. events[i] = (*Event)(ev)
  521. }
  522. // TODO: return watch ID?
  523. wr := &WatchResponse{
  524. Header: *pbresp.Header,
  525. Events: events,
  526. CompactRevision: pbresp.CompactRevision,
  527. Created: pbresp.Created,
  528. Canceled: pbresp.Canceled,
  529. cancelReason: pbresp.CancelReason,
  530. }
  531. ws, ok := w.substreams[pbresp.WatchId]
  532. if !ok {
  533. return false
  534. }
  535. select {
  536. case ws.recvc <- wr:
  537. case <-ws.donec:
  538. return false
  539. }
  540. return true
  541. }
  542. // serveWatchClient forwards messages from the grpc stream to run()
  543. func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
  544. for {
  545. resp, err := wc.Recv()
  546. if err != nil {
  547. select {
  548. case w.errc <- err:
  549. case <-w.donec:
  550. }
  551. return
  552. }
  553. select {
  554. case w.respc <- resp:
  555. case <-w.donec:
  556. return
  557. }
  558. }
  559. }
  560. // serveSubstream forwards watch responses from run() to the subscriber
  561. func (w *watchGrpcStream) serveSubstream(ws *watcherStream, resumec chan struct{}) {
  562. if ws.closing {
  563. panic("created substream goroutine but substream is closing")
  564. }
  565. // nextRev is the minimum expected next revision
  566. nextRev := ws.initReq.rev
  567. resuming := false
  568. defer func() {
  569. if !resuming {
  570. ws.closing = true
  571. }
  572. close(ws.donec)
  573. if !resuming {
  574. w.closingc <- ws
  575. }
  576. w.wg.Done()
  577. }()
  578. emptyWr := &WatchResponse{}
  579. for {
  580. curWr := emptyWr
  581. outc := ws.outc
  582. if len(ws.buf) > 0 {
  583. curWr = ws.buf[0]
  584. } else {
  585. outc = nil
  586. }
  587. select {
  588. case outc <- *curWr:
  589. if ws.buf[0].Err() != nil {
  590. return
  591. }
  592. ws.buf[0] = nil
  593. ws.buf = ws.buf[1:]
  594. case wr, ok := <-ws.recvc:
  595. if !ok {
  596. // shutdown from closeSubstream
  597. return
  598. }
  599. if wr.Created {
  600. if ws.initReq.retc != nil {
  601. ws.initReq.retc <- ws.outc
  602. // to prevent next write from taking the slot in buffered channel
  603. // and posting duplicate create events
  604. ws.initReq.retc = nil
  605. // send first creation event only if requested
  606. if ws.initReq.createdNotify {
  607. ws.outc <- *wr
  608. }
  609. // once the watch channel is returned, a current revision
  610. // watch must resume at the store revision. This is necessary
  611. // for the following case to work as expected:
  612. // wch := m1.Watch("a")
  613. // m2.Put("a", "b")
  614. // <-wch
  615. // If the revision is only bound on the first observed event,
  616. // if wch is disconnected before the Put is issued, then reconnects
  617. // after it is committed, it'll miss the Put.
  618. if ws.initReq.rev == 0 {
  619. nextRev = wr.Header.Revision
  620. }
  621. }
  622. } else {
  623. // current progress of watch; <= store revision
  624. nextRev = wr.Header.Revision
  625. }
  626. if len(wr.Events) > 0 {
  627. nextRev = wr.Events[len(wr.Events)-1].Kv.ModRevision + 1
  628. }
  629. ws.initReq.rev = nextRev
  630. // created event is already sent above,
  631. // watcher should not post duplicate events
  632. if wr.Created {
  633. continue
  634. }
  635. // TODO pause channel if buffer gets too large
  636. ws.buf = append(ws.buf, wr)
  637. case <-w.ctx.Done():
  638. return
  639. case <-ws.initReq.ctx.Done():
  640. return
  641. case <-resumec:
  642. resuming = true
  643. return
  644. }
  645. }
  646. // lazily send cancel message if events on missing id
  647. }
  648. func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
  649. // mark all substreams as resuming
  650. close(w.resumec)
  651. w.resumec = make(chan struct{})
  652. w.joinSubstreams()
  653. for _, ws := range w.substreams {
  654. ws.id = -1
  655. w.resuming = append(w.resuming, ws)
  656. }
  657. // strip out nils, if any
  658. var resuming []*watcherStream
  659. for _, ws := range w.resuming {
  660. if ws != nil {
  661. resuming = append(resuming, ws)
  662. }
  663. }
  664. w.resuming = resuming
  665. w.substreams = make(map[int64]*watcherStream)
  666. // connect to grpc stream while accepting watcher cancelation
  667. stopc := make(chan struct{})
  668. donec := w.waitCancelSubstreams(stopc)
  669. wc, err := w.openWatchClient()
  670. close(stopc)
  671. <-donec
  672. // serve all non-closing streams, even if there's a client error
  673. // so that the teardown path can shutdown the streams as expected.
  674. for _, ws := range w.resuming {
  675. if ws.closing {
  676. continue
  677. }
  678. ws.donec = make(chan struct{})
  679. w.wg.Add(1)
  680. go w.serveSubstream(ws, w.resumec)
  681. }
  682. if err != nil {
  683. return nil, v3rpc.Error(err)
  684. }
  685. // receive data from new grpc stream
  686. go w.serveWatchClient(wc)
  687. return wc, nil
  688. }
  689. func (w *watchGrpcStream) waitCancelSubstreams(stopc <-chan struct{}) <-chan struct{} {
  690. var wg sync.WaitGroup
  691. wg.Add(len(w.resuming))
  692. donec := make(chan struct{})
  693. for i := range w.resuming {
  694. go func(ws *watcherStream) {
  695. defer wg.Done()
  696. if ws.closing {
  697. if ws.initReq.ctx.Err() != nil && ws.outc != nil {
  698. close(ws.outc)
  699. ws.outc = nil
  700. }
  701. return
  702. }
  703. select {
  704. case <-ws.initReq.ctx.Done():
  705. // closed ws will be removed from resuming
  706. ws.closing = true
  707. close(ws.outc)
  708. ws.outc = nil
  709. w.wg.Add(1)
  710. go func() {
  711. defer w.wg.Done()
  712. w.closingc <- ws
  713. }()
  714. case <-stopc:
  715. }
  716. }(w.resuming[i])
  717. }
  718. go func() {
  719. defer close(donec)
  720. wg.Wait()
  721. }()
  722. return donec
  723. }
  724. // joinSubstreams waits for all substream goroutines to complete.
  725. func (w *watchGrpcStream) joinSubstreams() {
  726. for _, ws := range w.substreams {
  727. <-ws.donec
  728. }
  729. for _, ws := range w.resuming {
  730. if ws != nil {
  731. <-ws.donec
  732. }
  733. }
  734. }
  735. // openWatchClient retries opening a watch client until success or halt.
  736. // manually retry in case "ws==nil && err==nil"
  737. // TODO: remove FailFast=false
  738. func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  739. for {
  740. select {
  741. case <-w.ctx.Done():
  742. if err == nil {
  743. return nil, w.ctx.Err()
  744. }
  745. return nil, err
  746. default:
  747. }
  748. if ws, err = w.remote.Watch(w.ctx, w.callOpts...); ws != nil && err == nil {
  749. break
  750. }
  751. if isHaltErr(w.ctx, err) {
  752. return nil, v3rpc.Error(err)
  753. }
  754. }
  755. return ws, nil
  756. }
  757. // toPB converts an internal watch request structure to its protobuf WatchRequest structure.
  758. func (wr *watchRequest) toPB() *pb.WatchRequest {
  759. req := &pb.WatchCreateRequest{
  760. StartRevision: wr.rev,
  761. Key: []byte(wr.key),
  762. RangeEnd: []byte(wr.end),
  763. ProgressNotify: wr.progressNotify,
  764. Filters: wr.filters,
  765. PrevKv: wr.prevKV,
  766. Fragment: wr.fragment,
  767. }
  768. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  769. return &pb.WatchRequest{RequestUnion: cr}
  770. }
  771. func streamKeyFromCtx(ctx context.Context) string {
  772. if md, ok := metadata.FromOutgoingContext(ctx); ok {
  773. return fmt.Sprintf("%+v", md)
  774. }
  775. return ""
  776. }