watch.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "fmt"
  18. "sync"
  19. "time"
  20. v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  21. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  22. mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
  23. "google.golang.org/grpc"
  24. "google.golang.org/grpc/codes"
  25. "google.golang.org/grpc/metadata"
  26. "google.golang.org/grpc/status"
  27. )
  28. const (
  29. EventTypeDelete = mvccpb.DELETE
  30. EventTypePut = mvccpb.PUT
  31. closeSendErrTimeout = 250 * time.Millisecond
  32. )
  33. type Event mvccpb.Event
  34. type WatchChan <-chan WatchResponse
  35. type Watcher interface {
  36. // Watch watches on a key or prefix. The watched events will be returned
  37. // through the returned channel. If revisions waiting to be sent over the
  38. // watch are compacted, then the watch will be canceled by the server, the
  39. // client will post a compacted error watch response, and the channel will close.
  40. // If the context "ctx" is canceled or timed out, returned "WatchChan" is closed,
  41. // and "WatchResponse" from this closed channel has zero events and nil "Err()".
  42. // The context "ctx" MUST be canceled, as soon as watcher is no longer being used,
  43. // to release the associated resources.
  44. //
  45. // If the context is "context.Background/TODO", returned "WatchChan" will
  46. // not be closed and block until event is triggered, except when server
  47. // returns a non-recoverable error (e.g. ErrCompacted).
  48. // For example, when context passed with "WithRequireLeader" and the
  49. // connected server has no leader (e.g. due to network partition),
  50. // error "etcdserver: no leader" (ErrNoLeader) will be returned,
  51. // and then "WatchChan" is closed with non-nil "Err()".
  52. // In order to prevent a watch stream being stuck in a partitioned node,
  53. // make sure to wrap context with "WithRequireLeader".
  54. //
  55. // Otherwise, as long as the context has not been canceled or timed out,
  56. // watch will retry on other recoverable errors forever until reconnected.
  57. //
  58. // TODO: explicitly set context error in the last "WatchResponse" message and close channel?
  59. // Currently, client contexts are overwritten with "valCtx" that never closes.
  60. // TODO(v3.4): configure watch retry policy, limit maximum retry number
  61. // (see https://github.com/coreos/etcd/issues/8980)
  62. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  63. // Close closes the watcher and cancels all watch requests.
  64. Close() error
  65. }
  66. type WatchResponse struct {
  67. Header pb.ResponseHeader
  68. Events []*Event
  69. // CompactRevision is the minimum revision the watcher may receive.
  70. CompactRevision int64
  71. // Canceled is used to indicate watch failure.
  72. // If the watch failed and the stream was about to close, before the channel is closed,
  73. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  74. Canceled bool
  75. // Created is used to indicate the creation of the watcher.
  76. Created bool
  77. closeErr error
  78. // cancelReason is a reason of canceling watch
  79. cancelReason string
  80. }
  81. // IsCreate returns true if the event tells that the key is newly created.
  82. func (e *Event) IsCreate() bool {
  83. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  84. }
  85. // IsModify returns true if the event tells that a new value is put on existing key.
  86. func (e *Event) IsModify() bool {
  87. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  88. }
  89. // Err is the error value if this WatchResponse holds an error.
  90. func (wr *WatchResponse) Err() error {
  91. switch {
  92. case wr.closeErr != nil:
  93. return v3rpc.Error(wr.closeErr)
  94. case wr.CompactRevision != 0:
  95. return v3rpc.ErrCompacted
  96. case wr.Canceled:
  97. if len(wr.cancelReason) != 0 {
  98. return v3rpc.Error(status.Error(codes.FailedPrecondition, wr.cancelReason))
  99. }
  100. return v3rpc.ErrFutureRev
  101. }
  102. return nil
  103. }
  104. // IsProgressNotify returns true if the WatchResponse is progress notification.
  105. func (wr *WatchResponse) IsProgressNotify() bool {
  106. return len(wr.Events) == 0 && !wr.Canceled && !wr.Created && wr.CompactRevision == 0 && wr.Header.Revision != 0
  107. }
  108. // watcher implements the Watcher interface
  109. type watcher struct {
  110. remote pb.WatchClient
  111. callOpts []grpc.CallOption
  112. // mu protects the grpc streams map
  113. mu sync.RWMutex
  114. // streams holds all the active grpc streams keyed by ctx value.
  115. streams map[string]*watchGrpcStream
  116. }
  117. // watchGrpcStream tracks all watch resources attached to a single grpc stream.
  118. type watchGrpcStream struct {
  119. owner *watcher
  120. remote pb.WatchClient
  121. callOpts []grpc.CallOption
  122. // ctx controls internal remote.Watch requests
  123. ctx context.Context
  124. // ctxKey is the key used when looking up this stream's context
  125. ctxKey string
  126. cancel context.CancelFunc
  127. // substreams holds all active watchers on this grpc stream
  128. substreams map[int64]*watcherStream
  129. // resuming holds all resuming watchers on this grpc stream
  130. resuming []*watcherStream
  131. // reqc sends a watch request from Watch() to the main goroutine
  132. reqc chan *watchRequest
  133. // respc receives data from the watch client
  134. respc chan *pb.WatchResponse
  135. // donec closes to broadcast shutdown
  136. donec chan struct{}
  137. // errc transmits errors from grpc Recv to the watch stream reconnect logic
  138. errc chan error
  139. // closingc gets the watcherStream of closing watchers
  140. closingc chan *watcherStream
  141. // wg is Done when all substream goroutines have exited
  142. wg sync.WaitGroup
  143. // resumec closes to signal that all substreams should begin resuming
  144. resumec chan struct{}
  145. // closeErr is the error that closed the watch stream
  146. closeErr error
  147. }
  148. // watchRequest is issued by the subscriber to start a new watcher
  149. type watchRequest struct {
  150. ctx context.Context
  151. key string
  152. end string
  153. rev int64
  154. // send created notification event if this field is true
  155. createdNotify bool
  156. // progressNotify is for progress updates
  157. progressNotify bool
  158. // fragmentation should be disabled by default
  159. // if true, split watch events when total exceeds
  160. // "--max-request-bytes" flag value + 512-byte
  161. fragment bool
  162. // filters is the list of events to filter out
  163. filters []pb.WatchCreateRequest_FilterType
  164. // get the previous key-value pair before the event happens
  165. prevKV bool
  166. // retc receives a chan WatchResponse once the watcher is established
  167. retc chan chan WatchResponse
  168. }
  169. // watcherStream represents a registered watcher
  170. type watcherStream struct {
  171. // initReq is the request that initiated this request
  172. initReq watchRequest
  173. // outc publishes watch responses to subscriber
  174. outc chan WatchResponse
  175. // recvc buffers watch responses before publishing
  176. recvc chan *WatchResponse
  177. // donec closes when the watcherStream goroutine stops.
  178. donec chan struct{}
  179. // closing is set to true when stream should be scheduled to shutdown.
  180. closing bool
  181. // id is the registered watch id on the grpc stream
  182. id int64
  183. // buf holds all events received from etcd but not yet consumed by the client
  184. buf []*WatchResponse
  185. }
  186. func NewWatcher(c *Client) Watcher {
  187. return NewWatchFromWatchClient(pb.NewWatchClient(c.conn), c)
  188. }
  189. func NewWatchFromWatchClient(wc pb.WatchClient, c *Client) Watcher {
  190. w := &watcher{
  191. remote: wc,
  192. streams: make(map[string]*watchGrpcStream),
  193. }
  194. if c != nil {
  195. w.callOpts = c.callOpts
  196. }
  197. return w
  198. }
  199. // never closes
  200. var valCtxCh = make(chan struct{})
  201. var zeroTime = time.Unix(0, 0)
  202. // ctx with only the values; never Done
  203. type valCtx struct{ context.Context }
  204. func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
  205. func (vc *valCtx) Done() <-chan struct{} { return valCtxCh }
  206. func (vc *valCtx) Err() error { return nil }
  207. func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
  208. ctx, cancel := context.WithCancel(&valCtx{inctx})
  209. wgs := &watchGrpcStream{
  210. owner: w,
  211. remote: w.remote,
  212. callOpts: w.callOpts,
  213. ctx: ctx,
  214. ctxKey: streamKeyFromCtx(inctx),
  215. cancel: cancel,
  216. substreams: make(map[int64]*watcherStream),
  217. respc: make(chan *pb.WatchResponse),
  218. reqc: make(chan *watchRequest),
  219. donec: make(chan struct{}),
  220. errc: make(chan error, 1),
  221. closingc: make(chan *watcherStream),
  222. resumec: make(chan struct{}),
  223. }
  224. go wgs.run()
  225. return wgs
  226. }
  227. // Watch posts a watch request to run() and waits for a new watcher channel
  228. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  229. ow := opWatch(key, opts...)
  230. var filters []pb.WatchCreateRequest_FilterType
  231. if ow.filterPut {
  232. filters = append(filters, pb.WatchCreateRequest_NOPUT)
  233. }
  234. if ow.filterDelete {
  235. filters = append(filters, pb.WatchCreateRequest_NODELETE)
  236. }
  237. wr := &watchRequest{
  238. ctx: ctx,
  239. createdNotify: ow.createdNotify,
  240. key: string(ow.key),
  241. end: string(ow.end),
  242. rev: ow.rev,
  243. progressNotify: ow.progressNotify,
  244. fragment: ow.fragment,
  245. filters: filters,
  246. prevKV: ow.prevKV,
  247. retc: make(chan chan WatchResponse, 1),
  248. }
  249. ok := false
  250. ctxKey := streamKeyFromCtx(ctx)
  251. // find or allocate appropriate grpc watch stream
  252. w.mu.Lock()
  253. if w.streams == nil {
  254. // closed
  255. w.mu.Unlock()
  256. ch := make(chan WatchResponse)
  257. close(ch)
  258. return ch
  259. }
  260. wgs := w.streams[ctxKey]
  261. if wgs == nil {
  262. wgs = w.newWatcherGrpcStream(ctx)
  263. w.streams[ctxKey] = wgs
  264. }
  265. donec := wgs.donec
  266. reqc := wgs.reqc
  267. w.mu.Unlock()
  268. // couldn't create channel; return closed channel
  269. closeCh := make(chan WatchResponse, 1)
  270. // submit request
  271. select {
  272. case reqc <- wr:
  273. ok = true
  274. case <-wr.ctx.Done():
  275. case <-donec:
  276. if wgs.closeErr != nil {
  277. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  278. break
  279. }
  280. // retry; may have dropped stream from no ctxs
  281. return w.Watch(ctx, key, opts...)
  282. }
  283. // receive channel
  284. if ok {
  285. select {
  286. case ret := <-wr.retc:
  287. return ret
  288. case <-ctx.Done():
  289. case <-donec:
  290. if wgs.closeErr != nil {
  291. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  292. break
  293. }
  294. // retry; may have dropped stream from no ctxs
  295. return w.Watch(ctx, key, opts...)
  296. }
  297. }
  298. close(closeCh)
  299. return closeCh
  300. }
  301. func (w *watcher) Close() (err error) {
  302. w.mu.Lock()
  303. streams := w.streams
  304. w.streams = nil
  305. w.mu.Unlock()
  306. for _, wgs := range streams {
  307. if werr := wgs.close(); werr != nil {
  308. err = werr
  309. }
  310. }
  311. return err
  312. }
  313. func (w *watchGrpcStream) close() (err error) {
  314. w.cancel()
  315. <-w.donec
  316. select {
  317. case err = <-w.errc:
  318. default:
  319. }
  320. return toErr(w.ctx, err)
  321. }
  322. func (w *watcher) closeStream(wgs *watchGrpcStream) {
  323. w.mu.Lock()
  324. close(wgs.donec)
  325. wgs.cancel()
  326. if w.streams != nil {
  327. delete(w.streams, wgs.ctxKey)
  328. }
  329. w.mu.Unlock()
  330. }
  331. func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
  332. // check watch ID for backward compatibility (<= v3.3)
  333. if resp.WatchId == -1 || (resp.Canceled && resp.CancelReason != "") {
  334. // failed; no channel
  335. close(ws.recvc)
  336. return
  337. }
  338. ws.id = resp.WatchId
  339. w.substreams[ws.id] = ws
  340. }
  341. func (w *watchGrpcStream) sendCloseSubstream(ws *watcherStream, resp *WatchResponse) {
  342. select {
  343. case ws.outc <- *resp:
  344. case <-ws.initReq.ctx.Done():
  345. case <-time.After(closeSendErrTimeout):
  346. }
  347. close(ws.outc)
  348. }
  349. func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
  350. // send channel response in case stream was never established
  351. select {
  352. case ws.initReq.retc <- ws.outc:
  353. default:
  354. }
  355. // close subscriber's channel
  356. if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
  357. go w.sendCloseSubstream(ws, &WatchResponse{closeErr: w.closeErr})
  358. } else if ws.outc != nil {
  359. close(ws.outc)
  360. }
  361. if ws.id != -1 {
  362. delete(w.substreams, ws.id)
  363. return
  364. }
  365. for i := range w.resuming {
  366. if w.resuming[i] == ws {
  367. w.resuming[i] = nil
  368. return
  369. }
  370. }
  371. }
  372. // run is the root of the goroutines for managing a watcher client
  373. func (w *watchGrpcStream) run() {
  374. var wc pb.Watch_WatchClient
  375. var closeErr error
  376. // substreams marked to close but goroutine still running; needed for
  377. // avoiding double-closing recvc on grpc stream teardown
  378. closing := make(map[*watcherStream]struct{})
  379. defer func() {
  380. w.closeErr = closeErr
  381. // shutdown substreams and resuming substreams
  382. for _, ws := range w.substreams {
  383. if _, ok := closing[ws]; !ok {
  384. close(ws.recvc)
  385. closing[ws] = struct{}{}
  386. }
  387. }
  388. for _, ws := range w.resuming {
  389. if _, ok := closing[ws]; ws != nil && !ok {
  390. close(ws.recvc)
  391. closing[ws] = struct{}{}
  392. }
  393. }
  394. w.joinSubstreams()
  395. for range closing {
  396. w.closeSubstream(<-w.closingc)
  397. }
  398. w.wg.Wait()
  399. w.owner.closeStream(w)
  400. }()
  401. // start a stream with the etcd grpc server
  402. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  403. return
  404. }
  405. cancelSet := make(map[int64]struct{})
  406. var cur *pb.WatchResponse
  407. for {
  408. select {
  409. // Watch() requested
  410. case wreq := <-w.reqc:
  411. outc := make(chan WatchResponse, 1)
  412. // TODO: pass custom watch ID?
  413. ws := &watcherStream{
  414. initReq: *wreq,
  415. id: -1,
  416. outc: outc,
  417. // unbuffered so resumes won't cause repeat events
  418. recvc: make(chan *WatchResponse),
  419. }
  420. ws.donec = make(chan struct{})
  421. w.wg.Add(1)
  422. go w.serveSubstream(ws, w.resumec)
  423. // queue up for watcher creation/resume
  424. w.resuming = append(w.resuming, ws)
  425. if len(w.resuming) == 1 {
  426. // head of resume queue, can register a new watcher
  427. wc.Send(ws.initReq.toPB())
  428. }
  429. // new events from the watch client
  430. case pbresp := <-w.respc:
  431. if cur == nil || pbresp.Created || pbresp.Canceled {
  432. cur = pbresp
  433. } else if cur != nil && cur.WatchId == pbresp.WatchId {
  434. // merge new events
  435. cur.Events = append(cur.Events, pbresp.Events...)
  436. // update "Fragment" field; last response with "Fragment" == false
  437. cur.Fragment = pbresp.Fragment
  438. }
  439. switch {
  440. case pbresp.Created:
  441. // response to head of queue creation
  442. if ws := w.resuming[0]; ws != nil {
  443. w.addSubstream(pbresp, ws)
  444. w.dispatchEvent(pbresp)
  445. w.resuming[0] = nil
  446. }
  447. if ws := w.nextResume(); ws != nil {
  448. wc.Send(ws.initReq.toPB())
  449. }
  450. // reset for next iteration
  451. cur = nil
  452. case pbresp.Canceled && pbresp.CompactRevision == 0:
  453. delete(cancelSet, pbresp.WatchId)
  454. if ws, ok := w.substreams[pbresp.WatchId]; ok {
  455. // signal to stream goroutine to update closingc
  456. close(ws.recvc)
  457. closing[ws] = struct{}{}
  458. }
  459. // reset for next iteration
  460. cur = nil
  461. case cur.Fragment:
  462. // watch response events are still fragmented
  463. // continue to fetch next fragmented event arrival
  464. continue
  465. default:
  466. // dispatch to appropriate watch stream
  467. ok := w.dispatchEvent(cur)
  468. // reset for next iteration
  469. cur = nil
  470. if ok {
  471. break
  472. }
  473. // watch response on unexpected watch id; cancel id
  474. if _, ok := cancelSet[pbresp.WatchId]; ok {
  475. break
  476. }
  477. cancelSet[pbresp.WatchId] = struct{}{}
  478. cr := &pb.WatchRequest_CancelRequest{
  479. CancelRequest: &pb.WatchCancelRequest{
  480. WatchId: pbresp.WatchId,
  481. },
  482. }
  483. req := &pb.WatchRequest{RequestUnion: cr}
  484. wc.Send(req)
  485. }
  486. // watch client failed on Recv; spawn another if possible
  487. case err := <-w.errc:
  488. if isHaltErr(w.ctx, err) || toErr(w.ctx, err) == v3rpc.ErrNoLeader {
  489. closeErr = err
  490. return
  491. }
  492. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  493. return
  494. }
  495. if ws := w.nextResume(); ws != nil {
  496. wc.Send(ws.initReq.toPB())
  497. }
  498. cancelSet = make(map[int64]struct{})
  499. case <-w.ctx.Done():
  500. return
  501. case ws := <-w.closingc:
  502. w.closeSubstream(ws)
  503. delete(closing, ws)
  504. // no more watchers on this stream, shutdown
  505. if len(w.substreams)+len(w.resuming) == 0 {
  506. return
  507. }
  508. }
  509. }
  510. }
  511. // nextResume chooses the next resuming to register with the grpc stream. Abandoned
  512. // streams are marked as nil in the queue since the head must wait for its inflight registration.
  513. func (w *watchGrpcStream) nextResume() *watcherStream {
  514. for len(w.resuming) != 0 {
  515. if w.resuming[0] != nil {
  516. return w.resuming[0]
  517. }
  518. w.resuming = w.resuming[1:len(w.resuming)]
  519. }
  520. return nil
  521. }
  522. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  523. func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
  524. events := make([]*Event, len(pbresp.Events))
  525. for i, ev := range pbresp.Events {
  526. events[i] = (*Event)(ev)
  527. }
  528. // TODO: return watch ID?
  529. wr := &WatchResponse{
  530. Header: *pbresp.Header,
  531. Events: events,
  532. CompactRevision: pbresp.CompactRevision,
  533. Created: pbresp.Created,
  534. Canceled: pbresp.Canceled,
  535. cancelReason: pbresp.CancelReason,
  536. }
  537. ws, ok := w.substreams[pbresp.WatchId]
  538. if !ok {
  539. return false
  540. }
  541. select {
  542. case ws.recvc <- wr:
  543. case <-ws.donec:
  544. return false
  545. }
  546. return true
  547. }
  548. // serveWatchClient forwards messages from the grpc stream to run()
  549. func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
  550. for {
  551. resp, err := wc.Recv()
  552. if err != nil {
  553. select {
  554. case w.errc <- err:
  555. case <-w.donec:
  556. }
  557. return
  558. }
  559. select {
  560. case w.respc <- resp:
  561. case <-w.donec:
  562. return
  563. }
  564. }
  565. }
  566. // serveSubstream forwards watch responses from run() to the subscriber
  567. func (w *watchGrpcStream) serveSubstream(ws *watcherStream, resumec chan struct{}) {
  568. if ws.closing {
  569. panic("created substream goroutine but substream is closing")
  570. }
  571. // nextRev is the minimum expected next revision
  572. nextRev := ws.initReq.rev
  573. resuming := false
  574. defer func() {
  575. if !resuming {
  576. ws.closing = true
  577. }
  578. close(ws.donec)
  579. if !resuming {
  580. w.closingc <- ws
  581. }
  582. w.wg.Done()
  583. }()
  584. emptyWr := &WatchResponse{}
  585. for {
  586. curWr := emptyWr
  587. outc := ws.outc
  588. if len(ws.buf) > 0 {
  589. curWr = ws.buf[0]
  590. } else {
  591. outc = nil
  592. }
  593. select {
  594. case outc <- *curWr:
  595. if ws.buf[0].Err() != nil {
  596. return
  597. }
  598. ws.buf[0] = nil
  599. ws.buf = ws.buf[1:]
  600. case wr, ok := <-ws.recvc:
  601. if !ok {
  602. // shutdown from closeSubstream
  603. return
  604. }
  605. if wr.Created {
  606. if ws.initReq.retc != nil {
  607. ws.initReq.retc <- ws.outc
  608. // to prevent next write from taking the slot in buffered channel
  609. // and posting duplicate create events
  610. ws.initReq.retc = nil
  611. // send first creation event only if requested
  612. if ws.initReq.createdNotify {
  613. ws.outc <- *wr
  614. }
  615. // once the watch channel is returned, a current revision
  616. // watch must resume at the store revision. This is necessary
  617. // for the following case to work as expected:
  618. // wch := m1.Watch("a")
  619. // m2.Put("a", "b")
  620. // <-wch
  621. // If the revision is only bound on the first observed event,
  622. // if wch is disconnected before the Put is issued, then reconnects
  623. // after it is committed, it'll miss the Put.
  624. if ws.initReq.rev == 0 {
  625. nextRev = wr.Header.Revision
  626. }
  627. }
  628. } else {
  629. // current progress of watch; <= store revision
  630. nextRev = wr.Header.Revision
  631. }
  632. if len(wr.Events) > 0 {
  633. nextRev = wr.Events[len(wr.Events)-1].Kv.ModRevision + 1
  634. }
  635. ws.initReq.rev = nextRev
  636. // created event is already sent above,
  637. // watcher should not post duplicate events
  638. if wr.Created {
  639. continue
  640. }
  641. // TODO pause channel if buffer gets too large
  642. ws.buf = append(ws.buf, wr)
  643. case <-w.ctx.Done():
  644. return
  645. case <-ws.initReq.ctx.Done():
  646. return
  647. case <-resumec:
  648. resuming = true
  649. return
  650. }
  651. }
  652. // lazily send cancel message if events on missing id
  653. }
  654. func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
  655. // mark all substreams as resuming
  656. close(w.resumec)
  657. w.resumec = make(chan struct{})
  658. w.joinSubstreams()
  659. for _, ws := range w.substreams {
  660. ws.id = -1
  661. w.resuming = append(w.resuming, ws)
  662. }
  663. // strip out nils, if any
  664. var resuming []*watcherStream
  665. for _, ws := range w.resuming {
  666. if ws != nil {
  667. resuming = append(resuming, ws)
  668. }
  669. }
  670. w.resuming = resuming
  671. w.substreams = make(map[int64]*watcherStream)
  672. // connect to grpc stream while accepting watcher cancelation
  673. stopc := make(chan struct{})
  674. donec := w.waitCancelSubstreams(stopc)
  675. wc, err := w.openWatchClient()
  676. close(stopc)
  677. <-donec
  678. // serve all non-closing streams, even if there's a client error
  679. // so that the teardown path can shutdown the streams as expected.
  680. for _, ws := range w.resuming {
  681. if ws.closing {
  682. continue
  683. }
  684. ws.donec = make(chan struct{})
  685. w.wg.Add(1)
  686. go w.serveSubstream(ws, w.resumec)
  687. }
  688. if err != nil {
  689. return nil, v3rpc.Error(err)
  690. }
  691. // receive data from new grpc stream
  692. go w.serveWatchClient(wc)
  693. return wc, nil
  694. }
  695. func (w *watchGrpcStream) waitCancelSubstreams(stopc <-chan struct{}) <-chan struct{} {
  696. var wg sync.WaitGroup
  697. wg.Add(len(w.resuming))
  698. donec := make(chan struct{})
  699. for i := range w.resuming {
  700. go func(ws *watcherStream) {
  701. defer wg.Done()
  702. if ws.closing {
  703. if ws.initReq.ctx.Err() != nil && ws.outc != nil {
  704. close(ws.outc)
  705. ws.outc = nil
  706. }
  707. return
  708. }
  709. select {
  710. case <-ws.initReq.ctx.Done():
  711. // closed ws will be removed from resuming
  712. ws.closing = true
  713. close(ws.outc)
  714. ws.outc = nil
  715. w.wg.Add(1)
  716. go func() {
  717. defer w.wg.Done()
  718. w.closingc <- ws
  719. }()
  720. case <-stopc:
  721. }
  722. }(w.resuming[i])
  723. }
  724. go func() {
  725. defer close(donec)
  726. wg.Wait()
  727. }()
  728. return donec
  729. }
  730. // joinSubstreams waits for all substream goroutines to complete.
  731. func (w *watchGrpcStream) joinSubstreams() {
  732. for _, ws := range w.substreams {
  733. <-ws.donec
  734. }
  735. for _, ws := range w.resuming {
  736. if ws != nil {
  737. <-ws.donec
  738. }
  739. }
  740. }
  741. var maxBackoff = 100 * time.Millisecond
  742. // openWatchClient retries opening a watch client until success or halt.
  743. // manually retry in case "ws==nil && err==nil"
  744. // TODO: remove FailFast=false
  745. func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  746. backoff := time.Millisecond
  747. for {
  748. select {
  749. case <-w.ctx.Done():
  750. if err == nil {
  751. return nil, w.ctx.Err()
  752. }
  753. return nil, err
  754. default:
  755. }
  756. if ws, err = w.remote.Watch(w.ctx, w.callOpts...); ws != nil && err == nil {
  757. break
  758. }
  759. if isHaltErr(w.ctx, err) {
  760. return nil, v3rpc.Error(err)
  761. }
  762. if isUnavailableErr(w.ctx, err) {
  763. // retry, but backoff
  764. if backoff < maxBackoff {
  765. // 25% backoff factor
  766. backoff = backoff + backoff/4
  767. if backoff > maxBackoff {
  768. backoff = maxBackoff
  769. }
  770. }
  771. time.Sleep(backoff)
  772. }
  773. }
  774. return ws, nil
  775. }
  776. // toPB converts an internal watch request structure to its protobuf WatchRequest structure.
  777. func (wr *watchRequest) toPB() *pb.WatchRequest {
  778. req := &pb.WatchCreateRequest{
  779. StartRevision: wr.rev,
  780. Key: []byte(wr.key),
  781. RangeEnd: []byte(wr.end),
  782. ProgressNotify: wr.progressNotify,
  783. Filters: wr.filters,
  784. PrevKv: wr.prevKV,
  785. Fragment: wr.fragment,
  786. }
  787. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  788. return &pb.WatchRequest{RequestUnion: cr}
  789. }
  790. func streamKeyFromCtx(ctx context.Context) string {
  791. if md, ok := metadata.FromOutgoingContext(ctx); ok {
  792. return fmt.Sprintf("%+v", md)
  793. }
  794. return ""
  795. }