watch.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "fmt"
  17. "sync"
  18. "time"
  19. v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  20. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  21. mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
  22. "golang.org/x/net/context"
  23. "google.golang.org/grpc"
  24. )
  25. const (
  26. EventTypeDelete = mvccpb.DELETE
  27. EventTypePut = mvccpb.PUT
  28. closeSendErrTimeout = 250 * time.Millisecond
  29. )
  30. type Event mvccpb.Event
  31. type WatchChan <-chan WatchResponse
  32. type Watcher interface {
  33. // Watch watches on a key or prefix. The watched events will be returned
  34. // through the returned channel.
  35. // If the watch is slow or the required rev is compacted, the watch request
  36. // might be canceled from the server-side and the chan will be closed.
  37. // 'opts' can be: 'WithRev' and/or 'WithPrefix'.
  38. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  39. // Close closes the watcher and cancels all watch requests.
  40. Close() error
  41. }
  42. type WatchResponse struct {
  43. Header pb.ResponseHeader
  44. Events []*Event
  45. // CompactRevision is the minimum revision the watcher may receive.
  46. CompactRevision int64
  47. // Canceled is used to indicate watch failure.
  48. // If the watch failed and the stream was about to close, before the channel is closed,
  49. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  50. Canceled bool
  51. closeErr error
  52. }
  53. // IsCreate returns true if the event tells that the key is newly created.
  54. func (e *Event) IsCreate() bool {
  55. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  56. }
  57. // IsModify returns true if the event tells that a new value is put on existing key.
  58. func (e *Event) IsModify() bool {
  59. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  60. }
  61. // Err is the error value if this WatchResponse holds an error.
  62. func (wr *WatchResponse) Err() error {
  63. switch {
  64. case wr.closeErr != nil:
  65. return v3rpc.Error(wr.closeErr)
  66. case wr.CompactRevision != 0:
  67. return v3rpc.ErrCompacted
  68. case wr.Canceled:
  69. return v3rpc.ErrFutureRev
  70. }
  71. return nil
  72. }
  73. // IsProgressNotify returns true if the WatchResponse is progress notification.
  74. func (wr *WatchResponse) IsProgressNotify() bool {
  75. return len(wr.Events) == 0 && !wr.Canceled
  76. }
  77. // watcher implements the Watcher interface
  78. type watcher struct {
  79. remote pb.WatchClient
  80. // mu protects the grpc streams map
  81. mu sync.RWMutex
  82. // streams holds all the active grpc streams keyed by ctx value.
  83. streams map[string]*watchGrpcStream
  84. }
  85. type watchGrpcStream struct {
  86. owner *watcher
  87. remote pb.WatchClient
  88. // ctx controls internal remote.Watch requests
  89. ctx context.Context
  90. // ctxKey is the key used when looking up this stream's context
  91. ctxKey string
  92. cancel context.CancelFunc
  93. // mu protects the streams map
  94. mu sync.RWMutex
  95. // streams holds all active watchers
  96. streams map[int64]*watcherStream
  97. // reqc sends a watch request from Watch() to the main goroutine
  98. reqc chan *watchRequest
  99. // respc receives data from the watch client
  100. respc chan *pb.WatchResponse
  101. // stopc is sent to the main goroutine to stop all processing
  102. stopc chan struct{}
  103. // donec closes to broadcast shutdown
  104. donec chan struct{}
  105. // errc transmits errors from grpc Recv to the watch stream reconn logic
  106. errc chan error
  107. // the error that closed the watch stream
  108. closeErr error
  109. }
  110. // watchRequest is issued by the subscriber to start a new watcher
  111. type watchRequest struct {
  112. ctx context.Context
  113. key string
  114. end string
  115. rev int64
  116. // progressNotify is for progress updates
  117. progressNotify bool
  118. // filters is the list of events to filter out
  119. filters []pb.WatchCreateRequest_FilterType
  120. // get the previous key-value pair before the event happens
  121. prevKV bool
  122. // retc receives a chan WatchResponse once the watcher is established
  123. retc chan chan WatchResponse
  124. }
  125. // watcherStream represents a registered watcher
  126. type watcherStream struct {
  127. // initReq is the request that initiated this request
  128. initReq watchRequest
  129. // outc publishes watch responses to subscriber
  130. outc chan<- WatchResponse
  131. // recvc buffers watch responses before publishing
  132. recvc chan *WatchResponse
  133. id int64
  134. // lastRev is revision last successfully sent over outc
  135. lastRev int64
  136. // resumec indicates the stream must recover at a given revision
  137. resumec chan int64
  138. }
  139. func NewWatcher(c *Client) Watcher {
  140. return NewWatchFromWatchClient(pb.NewWatchClient(c.conn))
  141. }
  142. func NewWatchFromWatchClient(wc pb.WatchClient) Watcher {
  143. return &watcher{
  144. remote: wc,
  145. streams: make(map[string]*watchGrpcStream),
  146. }
  147. }
  148. // never closes
  149. var valCtxCh = make(chan struct{})
  150. var zeroTime = time.Unix(0, 0)
  151. // ctx with only the values; never Done
  152. type valCtx struct{ context.Context }
  153. func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
  154. func (vc *valCtx) Done() <-chan struct{} { return valCtxCh }
  155. func (vc *valCtx) Err() error { return nil }
  156. func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
  157. ctx, cancel := context.WithCancel(&valCtx{inctx})
  158. wgs := &watchGrpcStream{
  159. owner: w,
  160. remote: w.remote,
  161. ctx: ctx,
  162. ctxKey: fmt.Sprintf("%v", inctx),
  163. cancel: cancel,
  164. streams: make(map[int64]*watcherStream),
  165. respc: make(chan *pb.WatchResponse),
  166. reqc: make(chan *watchRequest),
  167. stopc: make(chan struct{}),
  168. donec: make(chan struct{}),
  169. errc: make(chan error, 1),
  170. }
  171. go wgs.run()
  172. return wgs
  173. }
  174. // Watch posts a watch request to run() and waits for a new watcher channel
  175. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  176. ow := opWatch(key, opts...)
  177. retc := make(chan chan WatchResponse, 1)
  178. var filters []pb.WatchCreateRequest_FilterType
  179. if ow.filterPut {
  180. filters = append(filters, pb.WatchCreateRequest_NOPUT)
  181. }
  182. if ow.filterDelete {
  183. filters = append(filters, pb.WatchCreateRequest_NODELETE)
  184. }
  185. wr := &watchRequest{
  186. ctx: ctx,
  187. key: string(ow.key),
  188. end: string(ow.end),
  189. rev: ow.rev,
  190. progressNotify: ow.progressNotify,
  191. filters: filters,
  192. prevKV: ow.prevKV,
  193. retc: retc,
  194. }
  195. ok := false
  196. ctxKey := fmt.Sprintf("%v", ctx)
  197. // find or allocate appropriate grpc watch stream
  198. w.mu.Lock()
  199. if w.streams == nil {
  200. // closed
  201. w.mu.Unlock()
  202. ch := make(chan WatchResponse)
  203. close(ch)
  204. return ch
  205. }
  206. wgs := w.streams[ctxKey]
  207. if wgs == nil {
  208. wgs = w.newWatcherGrpcStream(ctx)
  209. w.streams[ctxKey] = wgs
  210. }
  211. donec := wgs.donec
  212. reqc := wgs.reqc
  213. w.mu.Unlock()
  214. // couldn't create channel; return closed channel
  215. closeCh := make(chan WatchResponse, 1)
  216. // submit request
  217. select {
  218. case reqc <- wr:
  219. ok = true
  220. case <-wr.ctx.Done():
  221. case <-donec:
  222. if wgs.closeErr != nil {
  223. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  224. break
  225. }
  226. // retry; may have dropped stream from no ctxs
  227. return w.Watch(ctx, key, opts...)
  228. }
  229. // receive channel
  230. if ok {
  231. select {
  232. case ret := <-retc:
  233. return ret
  234. case <-ctx.Done():
  235. case <-donec:
  236. if wgs.closeErr != nil {
  237. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  238. break
  239. }
  240. // retry; may have dropped stream from no ctxs
  241. return w.Watch(ctx, key, opts...)
  242. }
  243. }
  244. close(closeCh)
  245. return closeCh
  246. }
  247. func (w *watcher) Close() (err error) {
  248. w.mu.Lock()
  249. streams := w.streams
  250. w.streams = nil
  251. w.mu.Unlock()
  252. for _, wgs := range streams {
  253. if werr := wgs.Close(); werr != nil {
  254. err = werr
  255. }
  256. }
  257. return err
  258. }
  259. func (w *watchGrpcStream) Close() (err error) {
  260. close(w.stopc)
  261. <-w.donec
  262. select {
  263. case err = <-w.errc:
  264. default:
  265. }
  266. return toErr(w.ctx, err)
  267. }
  268. func (w *watchGrpcStream) addStream(resp *pb.WatchResponse, pendingReq *watchRequest) {
  269. if pendingReq == nil {
  270. // no pending request; ignore
  271. return
  272. }
  273. if resp.Canceled || resp.CompactRevision != 0 {
  274. // a cancel at id creation time means the start revision has
  275. // been compacted out of the store
  276. ret := make(chan WatchResponse, 1)
  277. ret <- WatchResponse{
  278. Header: *resp.Header,
  279. CompactRevision: resp.CompactRevision,
  280. Canceled: true}
  281. close(ret)
  282. pendingReq.retc <- ret
  283. return
  284. }
  285. ret := make(chan WatchResponse)
  286. if resp.WatchId == -1 {
  287. // failed; no channel
  288. close(ret)
  289. pendingReq.retc <- ret
  290. return
  291. }
  292. ws := &watcherStream{
  293. initReq: *pendingReq,
  294. id: resp.WatchId,
  295. outc: ret,
  296. // buffered so unlikely to block on sending while holding mu
  297. recvc: make(chan *WatchResponse, 4),
  298. resumec: make(chan int64),
  299. }
  300. if pendingReq.rev == 0 {
  301. // note the header revision so that a put following a current watcher
  302. // disconnect will arrive on the watcher channel after reconnect
  303. ws.initReq.rev = resp.Header.Revision
  304. }
  305. w.mu.Lock()
  306. w.streams[ws.id] = ws
  307. w.mu.Unlock()
  308. // pass back the subscriber channel for the watcher
  309. pendingReq.retc <- ret
  310. // send messages to subscriber
  311. go w.serveStream(ws)
  312. }
  313. // closeStream closes the watcher resources and removes it
  314. func (w *watchGrpcStream) closeStream(ws *watcherStream) {
  315. // cancels request stream; subscriber receives nil channel
  316. close(ws.initReq.retc)
  317. // close subscriber's channel
  318. close(ws.outc)
  319. delete(w.streams, ws.id)
  320. }
  321. // run is the root of the goroutines for managing a watcher client
  322. func (w *watchGrpcStream) run() {
  323. var wc pb.Watch_WatchClient
  324. var closeErr error
  325. defer func() {
  326. w.owner.mu.Lock()
  327. w.closeErr = closeErr
  328. if w.owner.streams != nil {
  329. delete(w.owner.streams, w.ctxKey)
  330. }
  331. close(w.donec)
  332. w.owner.mu.Unlock()
  333. w.cancel()
  334. }()
  335. // start a stream with the etcd grpc server
  336. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  337. return
  338. }
  339. var pendingReq, failedReq *watchRequest
  340. curReqC := w.reqc
  341. cancelSet := make(map[int64]struct{})
  342. for {
  343. select {
  344. // Watch() requested
  345. case pendingReq = <-curReqC:
  346. // no more watch requests until there's a response
  347. curReqC = nil
  348. if err := wc.Send(pendingReq.toPB()); err == nil {
  349. // pendingReq now waits on w.respc
  350. break
  351. }
  352. failedReq = pendingReq
  353. // New events from the watch client
  354. case pbresp := <-w.respc:
  355. switch {
  356. case pbresp.Created:
  357. // response to pending req, try to add
  358. w.addStream(pbresp, pendingReq)
  359. pendingReq = nil
  360. curReqC = w.reqc
  361. case pbresp.Canceled:
  362. delete(cancelSet, pbresp.WatchId)
  363. // shutdown serveStream, if any
  364. w.mu.Lock()
  365. if ws, ok := w.streams[pbresp.WatchId]; ok {
  366. close(ws.recvc)
  367. delete(w.streams, ws.id)
  368. }
  369. numStreams := len(w.streams)
  370. w.mu.Unlock()
  371. if numStreams == 0 {
  372. // don't leak watcher streams
  373. return
  374. }
  375. default:
  376. // dispatch to appropriate watch stream
  377. if ok := w.dispatchEvent(pbresp); ok {
  378. break
  379. }
  380. // watch response on unexpected watch id; cancel id
  381. if _, ok := cancelSet[pbresp.WatchId]; ok {
  382. break
  383. }
  384. cancelSet[pbresp.WatchId] = struct{}{}
  385. cr := &pb.WatchRequest_CancelRequest{
  386. CancelRequest: &pb.WatchCancelRequest{
  387. WatchId: pbresp.WatchId,
  388. },
  389. }
  390. req := &pb.WatchRequest{RequestUnion: cr}
  391. wc.Send(req)
  392. }
  393. // watch client failed to recv; spawn another if possible
  394. // TODO report watch client errors from errc?
  395. case err := <-w.errc:
  396. if toErr(w.ctx, err) == v3rpc.ErrNoLeader {
  397. closeErr = err
  398. return
  399. }
  400. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  401. return
  402. }
  403. curReqC = w.reqc
  404. if pendingReq != nil {
  405. failedReq = pendingReq
  406. }
  407. cancelSet = make(map[int64]struct{})
  408. case <-w.stopc:
  409. return
  410. }
  411. // send failed; queue for retry
  412. if failedReq != nil {
  413. go func(wr *watchRequest) {
  414. select {
  415. case w.reqc <- wr:
  416. case <-wr.ctx.Done():
  417. case <-w.donec:
  418. }
  419. }(pendingReq)
  420. failedReq = nil
  421. pendingReq = nil
  422. }
  423. }
  424. }
  425. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  426. func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
  427. w.mu.RLock()
  428. defer w.mu.RUnlock()
  429. ws, ok := w.streams[pbresp.WatchId]
  430. events := make([]*Event, len(pbresp.Events))
  431. for i, ev := range pbresp.Events {
  432. events[i] = (*Event)(ev)
  433. }
  434. if ok {
  435. wr := &WatchResponse{
  436. Header: *pbresp.Header,
  437. Events: events,
  438. CompactRevision: pbresp.CompactRevision,
  439. Canceled: pbresp.Canceled}
  440. ws.recvc <- wr
  441. }
  442. return ok
  443. }
  444. // serveWatchClient forwards messages from the grpc stream to run()
  445. func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
  446. for {
  447. resp, err := wc.Recv()
  448. if err != nil {
  449. select {
  450. case w.errc <- err:
  451. case <-w.donec:
  452. }
  453. return
  454. }
  455. select {
  456. case w.respc <- resp:
  457. case <-w.donec:
  458. return
  459. }
  460. }
  461. }
  462. // serveStream forwards watch responses from run() to the subscriber
  463. func (w *watchGrpcStream) serveStream(ws *watcherStream) {
  464. var closeErr error
  465. emptyWr := &WatchResponse{}
  466. wrs := []*WatchResponse{}
  467. resuming := false
  468. closing := false
  469. for !closing {
  470. curWr := emptyWr
  471. outc := ws.outc
  472. if len(wrs) > 0 {
  473. curWr = wrs[0]
  474. } else {
  475. outc = nil
  476. }
  477. select {
  478. case outc <- *curWr:
  479. if wrs[0].Err() != nil {
  480. closing = true
  481. break
  482. }
  483. var newRev int64
  484. if len(wrs[0].Events) > 0 {
  485. newRev = wrs[0].Events[len(wrs[0].Events)-1].Kv.ModRevision
  486. } else {
  487. newRev = wrs[0].Header.Revision
  488. }
  489. if newRev != ws.lastRev {
  490. ws.lastRev = newRev
  491. }
  492. wrs[0] = nil
  493. wrs = wrs[1:]
  494. case wr, ok := <-ws.recvc:
  495. if !ok {
  496. // shutdown from closeStream
  497. return
  498. }
  499. // resume up to last seen event if disconnected
  500. if resuming && wr.Err() == nil {
  501. resuming = false
  502. // trim events already seen
  503. for i := 0; i < len(wr.Events); i++ {
  504. if wr.Events[i].Kv.ModRevision > ws.lastRev {
  505. wr.Events = wr.Events[i:]
  506. break
  507. }
  508. }
  509. // only forward new events
  510. if wr.Events[0].Kv.ModRevision == ws.lastRev {
  511. break
  512. }
  513. }
  514. resuming = false
  515. // TODO don't keep buffering if subscriber stops reading
  516. wrs = append(wrs, wr)
  517. case resumeRev := <-ws.resumec:
  518. wrs = nil
  519. resuming = true
  520. if resumeRev == -1 {
  521. // pause serving stream while resume gets set up
  522. break
  523. }
  524. if resumeRev != ws.lastRev {
  525. panic("unexpected resume revision")
  526. }
  527. case <-w.donec:
  528. closing = true
  529. closeErr = w.closeErr
  530. case <-ws.initReq.ctx.Done():
  531. closing = true
  532. }
  533. }
  534. // try to send off close error
  535. if closeErr != nil {
  536. select {
  537. case ws.outc <- WatchResponse{closeErr: w.closeErr}:
  538. case <-w.donec:
  539. case <-time.After(closeSendErrTimeout):
  540. }
  541. }
  542. w.mu.Lock()
  543. w.closeStream(ws)
  544. w.mu.Unlock()
  545. // lazily send cancel message if events on missing id
  546. }
  547. func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
  548. ws, rerr := w.resume()
  549. if rerr != nil {
  550. return nil, rerr
  551. }
  552. go w.serveWatchClient(ws)
  553. return ws, nil
  554. }
  555. // resume creates a new WatchClient with all current watchers reestablished
  556. func (w *watchGrpcStream) resume() (ws pb.Watch_WatchClient, err error) {
  557. for {
  558. if ws, err = w.openWatchClient(); err != nil {
  559. break
  560. } else if err = w.resumeWatchers(ws); err == nil {
  561. break
  562. }
  563. }
  564. return ws, v3rpc.Error(err)
  565. }
  566. // openWatchClient retries opening a watchclient until retryConnection fails
  567. func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  568. for {
  569. select {
  570. case <-w.stopc:
  571. if err == nil {
  572. err = context.Canceled
  573. }
  574. return nil, err
  575. default:
  576. }
  577. if ws, err = w.remote.Watch(w.ctx, grpc.FailFast(false)); ws != nil && err == nil {
  578. break
  579. }
  580. if isHaltErr(w.ctx, err) {
  581. return nil, v3rpc.Error(err)
  582. }
  583. }
  584. return ws, nil
  585. }
  586. // resumeWatchers rebuilds every registered watcher on a new client
  587. func (w *watchGrpcStream) resumeWatchers(wc pb.Watch_WatchClient) error {
  588. w.mu.RLock()
  589. streams := make([]*watcherStream, 0, len(w.streams))
  590. for _, ws := range w.streams {
  591. streams = append(streams, ws)
  592. }
  593. w.mu.RUnlock()
  594. for _, ws := range streams {
  595. // pause serveStream
  596. ws.resumec <- -1
  597. // reconstruct watcher from initial request
  598. if ws.lastRev != 0 {
  599. ws.initReq.rev = ws.lastRev
  600. }
  601. if err := wc.Send(ws.initReq.toPB()); err != nil {
  602. return err
  603. }
  604. // wait for request ack
  605. resp, err := wc.Recv()
  606. if err != nil {
  607. return err
  608. } else if len(resp.Events) != 0 || !resp.Created {
  609. return fmt.Errorf("watcher: unexpected response (%+v)", resp)
  610. }
  611. // id may be different since new remote watcher; update map
  612. w.mu.Lock()
  613. delete(w.streams, ws.id)
  614. ws.id = resp.WatchId
  615. w.streams[ws.id] = ws
  616. w.mu.Unlock()
  617. // unpause serveStream
  618. ws.resumec <- ws.lastRev
  619. }
  620. return nil
  621. }
  622. // toPB converts an internal watch request structure to its protobuf messagefunc (wr *watchRequest)
  623. func (wr *watchRequest) toPB() *pb.WatchRequest {
  624. req := &pb.WatchCreateRequest{
  625. StartRevision: wr.rev,
  626. Key: []byte(wr.key),
  627. RangeEnd: []byte(wr.end),
  628. ProgressNotify: wr.progressNotify,
  629. Filters: wr.filters,
  630. PrevKv: wr.prevKV,
  631. }
  632. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  633. return &pb.WatchRequest{RequestUnion: cr}
  634. }