watch.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "fmt"
  17. "sync"
  18. "time"
  19. v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  20. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  21. mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
  22. "golang.org/x/net/context"
  23. "google.golang.org/grpc"
  24. )
  25. const (
  26. EventTypeDelete = mvccpb.DELETE
  27. EventTypePut = mvccpb.PUT
  28. closeSendErrTimeout = 250 * time.Millisecond
  29. )
  30. type Event mvccpb.Event
  31. type WatchChan <-chan WatchResponse
  32. type Watcher interface {
  33. // Watch watches on a key or prefix. The watched events will be returned
  34. // through the returned channel.
  35. // If the watch is slow or the required rev is compacted, the watch request
  36. // might be canceled from the server-side and the chan will be closed.
  37. // 'opts' can be: 'WithRev' and/or 'WithPrefix'.
  38. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  39. // Close closes the watcher and cancels all watch requests.
  40. Close() error
  41. }
  42. type WatchResponse struct {
  43. Header pb.ResponseHeader
  44. Events []*Event
  45. // CompactRevision is the minimum revision the watcher may receive.
  46. CompactRevision int64
  47. // Canceled is used to indicate watch failure.
  48. // If the watch failed and the stream was about to close, before the channel is closed,
  49. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  50. Canceled bool
  51. closeErr error
  52. }
  53. // IsCreate returns true if the event tells that the key is newly created.
  54. func (e *Event) IsCreate() bool {
  55. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  56. }
  57. // IsModify returns true if the event tells that a new value is put on existing key.
  58. func (e *Event) IsModify() bool {
  59. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  60. }
  61. // Err is the error value if this WatchResponse holds an error.
  62. func (wr *WatchResponse) Err() error {
  63. switch {
  64. case wr.closeErr != nil:
  65. return v3rpc.Error(wr.closeErr)
  66. case wr.CompactRevision != 0:
  67. return v3rpc.ErrCompacted
  68. case wr.Canceled:
  69. return v3rpc.ErrFutureRev
  70. }
  71. return nil
  72. }
  73. // IsProgressNotify returns true if the WatchResponse is progress notification.
  74. func (wr *WatchResponse) IsProgressNotify() bool {
  75. return len(wr.Events) == 0 && !wr.Canceled
  76. }
  77. // watcher implements the Watcher interface
  78. type watcher struct {
  79. remote pb.WatchClient
  80. // mu protects the grpc streams map
  81. mu sync.RWMutex
  82. // streams holds all the active grpc streams keyed by ctx value.
  83. streams map[string]*watchGrpcStream
  84. }
  85. type watchGrpcStream struct {
  86. owner *watcher
  87. remote pb.WatchClient
  88. // ctx controls internal remote.Watch requests
  89. ctx context.Context
  90. // ctxKey is the key used when looking up this stream's context
  91. ctxKey string
  92. cancel context.CancelFunc
  93. // mu protects the streams map
  94. mu sync.RWMutex
  95. // streams holds all active watchers
  96. streams map[int64]*watcherStream
  97. // reqc sends a watch request from Watch() to the main goroutine
  98. reqc chan *watchRequest
  99. // respc receives data from the watch client
  100. respc chan *pb.WatchResponse
  101. // stopc is sent to the main goroutine to stop all processing
  102. stopc chan struct{}
  103. // donec closes to broadcast shutdown
  104. donec chan struct{}
  105. // errc transmits errors from grpc Recv to the watch stream reconn logic
  106. errc chan error
  107. // the error that closed the watch stream
  108. closeErr error
  109. }
  110. // watchRequest is issued by the subscriber to start a new watcher
  111. type watchRequest struct {
  112. ctx context.Context
  113. key string
  114. end string
  115. rev int64
  116. // progressNotify is for progress updates
  117. progressNotify bool
  118. // get the previous key-value pair before the event happens
  119. prevKV bool
  120. // retc receives a chan WatchResponse once the watcher is established
  121. retc chan chan WatchResponse
  122. }
  123. // watcherStream represents a registered watcher
  124. type watcherStream struct {
  125. // initReq is the request that initiated this request
  126. initReq watchRequest
  127. // outc publishes watch responses to subscriber
  128. outc chan<- WatchResponse
  129. // recvc buffers watch responses before publishing
  130. recvc chan *WatchResponse
  131. id int64
  132. // lastRev is revision last successfully sent over outc
  133. lastRev int64
  134. // resumec indicates the stream must recover at a given revision
  135. resumec chan int64
  136. }
  137. func NewWatcher(c *Client) Watcher {
  138. return &watcher{
  139. remote: pb.NewWatchClient(c.conn),
  140. streams: make(map[string]*watchGrpcStream),
  141. }
  142. }
  143. // never closes
  144. var valCtxCh = make(chan struct{})
  145. var zeroTime = time.Unix(0, 0)
  146. // ctx with only the values; never Done
  147. type valCtx struct{ context.Context }
  148. func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
  149. func (vc *valCtx) Done() <-chan struct{} { return valCtxCh }
  150. func (vc *valCtx) Err() error { return nil }
  151. func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
  152. ctx, cancel := context.WithCancel(&valCtx{inctx})
  153. wgs := &watchGrpcStream{
  154. owner: w,
  155. remote: w.remote,
  156. ctx: ctx,
  157. ctxKey: fmt.Sprintf("%v", inctx),
  158. cancel: cancel,
  159. streams: make(map[int64]*watcherStream),
  160. respc: make(chan *pb.WatchResponse),
  161. reqc: make(chan *watchRequest),
  162. stopc: make(chan struct{}),
  163. donec: make(chan struct{}),
  164. errc: make(chan error, 1),
  165. }
  166. go wgs.run()
  167. return wgs
  168. }
  169. // Watch posts a watch request to run() and waits for a new watcher channel
  170. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  171. ow := opWatch(key, opts...)
  172. retc := make(chan chan WatchResponse, 1)
  173. wr := &watchRequest{
  174. ctx: ctx,
  175. key: string(ow.key),
  176. end: string(ow.end),
  177. rev: ow.rev,
  178. progressNotify: ow.progressNotify,
  179. prevKV: ow.prevKV,
  180. retc: retc,
  181. }
  182. ok := false
  183. ctxKey := fmt.Sprintf("%v", ctx)
  184. // find or allocate appropriate grpc watch stream
  185. w.mu.Lock()
  186. if w.streams == nil {
  187. // closed
  188. w.mu.Unlock()
  189. ch := make(chan WatchResponse)
  190. close(ch)
  191. return ch
  192. }
  193. wgs := w.streams[ctxKey]
  194. if wgs == nil {
  195. wgs = w.newWatcherGrpcStream(ctx)
  196. w.streams[ctxKey] = wgs
  197. }
  198. donec := wgs.donec
  199. reqc := wgs.reqc
  200. w.mu.Unlock()
  201. // couldn't create channel; return closed channel
  202. closeCh := make(chan WatchResponse, 1)
  203. // submit request
  204. select {
  205. case reqc <- wr:
  206. ok = true
  207. case <-wr.ctx.Done():
  208. case <-donec:
  209. if wgs.closeErr != nil {
  210. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  211. break
  212. }
  213. // retry; may have dropped stream from no ctxs
  214. return w.Watch(ctx, key, opts...)
  215. }
  216. // receive channel
  217. if ok {
  218. select {
  219. case ret := <-retc:
  220. return ret
  221. case <-ctx.Done():
  222. case <-donec:
  223. if wgs.closeErr != nil {
  224. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  225. break
  226. }
  227. // retry; may have dropped stream from no ctxs
  228. return w.Watch(ctx, key, opts...)
  229. }
  230. }
  231. close(closeCh)
  232. return closeCh
  233. }
  234. func (w *watcher) Close() (err error) {
  235. w.mu.Lock()
  236. streams := w.streams
  237. w.streams = nil
  238. w.mu.Unlock()
  239. for _, wgs := range streams {
  240. if werr := wgs.Close(); werr != nil {
  241. err = werr
  242. }
  243. }
  244. return err
  245. }
  246. func (w *watchGrpcStream) Close() (err error) {
  247. close(w.stopc)
  248. <-w.donec
  249. select {
  250. case err = <-w.errc:
  251. default:
  252. }
  253. return toErr(w.ctx, err)
  254. }
  255. func (w *watchGrpcStream) addStream(resp *pb.WatchResponse, pendingReq *watchRequest) {
  256. if pendingReq == nil {
  257. // no pending request; ignore
  258. return
  259. }
  260. if resp.Canceled || resp.CompactRevision != 0 {
  261. // a cancel at id creation time means the start revision has
  262. // been compacted out of the store
  263. ret := make(chan WatchResponse, 1)
  264. ret <- WatchResponse{
  265. Header: *resp.Header,
  266. CompactRevision: resp.CompactRevision,
  267. Canceled: true}
  268. close(ret)
  269. pendingReq.retc <- ret
  270. return
  271. }
  272. ret := make(chan WatchResponse)
  273. if resp.WatchId == -1 {
  274. // failed; no channel
  275. close(ret)
  276. pendingReq.retc <- ret
  277. return
  278. }
  279. ws := &watcherStream{
  280. initReq: *pendingReq,
  281. id: resp.WatchId,
  282. outc: ret,
  283. // buffered so unlikely to block on sending while holding mu
  284. recvc: make(chan *WatchResponse, 4),
  285. resumec: make(chan int64),
  286. }
  287. if pendingReq.rev == 0 {
  288. // note the header revision so that a put following a current watcher
  289. // disconnect will arrive on the watcher channel after reconnect
  290. ws.initReq.rev = resp.Header.Revision
  291. }
  292. w.mu.Lock()
  293. w.streams[ws.id] = ws
  294. w.mu.Unlock()
  295. // pass back the subscriber channel for the watcher
  296. pendingReq.retc <- ret
  297. // send messages to subscriber
  298. go w.serveStream(ws)
  299. }
  300. // closeStream closes the watcher resources and removes it
  301. func (w *watchGrpcStream) closeStream(ws *watcherStream) {
  302. // cancels request stream; subscriber receives nil channel
  303. close(ws.initReq.retc)
  304. // close subscriber's channel
  305. close(ws.outc)
  306. delete(w.streams, ws.id)
  307. }
  308. // run is the root of the goroutines for managing a watcher client
  309. func (w *watchGrpcStream) run() {
  310. var wc pb.Watch_WatchClient
  311. var closeErr error
  312. defer func() {
  313. w.owner.mu.Lock()
  314. w.closeErr = closeErr
  315. if w.owner.streams != nil {
  316. delete(w.owner.streams, w.ctxKey)
  317. }
  318. close(w.donec)
  319. w.owner.mu.Unlock()
  320. w.cancel()
  321. }()
  322. // start a stream with the etcd grpc server
  323. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  324. return
  325. }
  326. var pendingReq, failedReq *watchRequest
  327. curReqC := w.reqc
  328. cancelSet := make(map[int64]struct{})
  329. for {
  330. select {
  331. // Watch() requested
  332. case pendingReq = <-curReqC:
  333. // no more watch requests until there's a response
  334. curReqC = nil
  335. if err := wc.Send(pendingReq.toPB()); err == nil {
  336. // pendingReq now waits on w.respc
  337. break
  338. }
  339. failedReq = pendingReq
  340. // New events from the watch client
  341. case pbresp := <-w.respc:
  342. switch {
  343. case pbresp.Created:
  344. // response to pending req, try to add
  345. w.addStream(pbresp, pendingReq)
  346. pendingReq = nil
  347. curReqC = w.reqc
  348. case pbresp.Canceled:
  349. delete(cancelSet, pbresp.WatchId)
  350. // shutdown serveStream, if any
  351. w.mu.Lock()
  352. if ws, ok := w.streams[pbresp.WatchId]; ok {
  353. close(ws.recvc)
  354. delete(w.streams, ws.id)
  355. }
  356. numStreams := len(w.streams)
  357. w.mu.Unlock()
  358. if numStreams == 0 {
  359. // don't leak watcher streams
  360. return
  361. }
  362. default:
  363. // dispatch to appropriate watch stream
  364. if ok := w.dispatchEvent(pbresp); ok {
  365. break
  366. }
  367. // watch response on unexpected watch id; cancel id
  368. if _, ok := cancelSet[pbresp.WatchId]; ok {
  369. break
  370. }
  371. cancelSet[pbresp.WatchId] = struct{}{}
  372. cr := &pb.WatchRequest_CancelRequest{
  373. CancelRequest: &pb.WatchCancelRequest{
  374. WatchId: pbresp.WatchId,
  375. },
  376. }
  377. req := &pb.WatchRequest{RequestUnion: cr}
  378. wc.Send(req)
  379. }
  380. // watch client failed to recv; spawn another if possible
  381. // TODO report watch client errors from errc?
  382. case err := <-w.errc:
  383. if toErr(w.ctx, err) == v3rpc.ErrNoLeader {
  384. closeErr = err
  385. return
  386. }
  387. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  388. return
  389. }
  390. curReqC = w.reqc
  391. if pendingReq != nil {
  392. failedReq = pendingReq
  393. }
  394. cancelSet = make(map[int64]struct{})
  395. case <-w.stopc:
  396. return
  397. }
  398. // send failed; queue for retry
  399. if failedReq != nil {
  400. go func(wr *watchRequest) {
  401. select {
  402. case w.reqc <- wr:
  403. case <-wr.ctx.Done():
  404. case <-w.donec:
  405. }
  406. }(pendingReq)
  407. failedReq = nil
  408. pendingReq = nil
  409. }
  410. }
  411. }
  412. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  413. func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
  414. w.mu.RLock()
  415. defer w.mu.RUnlock()
  416. ws, ok := w.streams[pbresp.WatchId]
  417. events := make([]*Event, len(pbresp.Events))
  418. for i, ev := range pbresp.Events {
  419. events[i] = (*Event)(ev)
  420. }
  421. if ok {
  422. wr := &WatchResponse{
  423. Header: *pbresp.Header,
  424. Events: events,
  425. CompactRevision: pbresp.CompactRevision,
  426. Canceled: pbresp.Canceled}
  427. ws.recvc <- wr
  428. }
  429. return ok
  430. }
  431. // serveWatchClient forwards messages from the grpc stream to run()
  432. func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
  433. for {
  434. resp, err := wc.Recv()
  435. if err != nil {
  436. select {
  437. case w.errc <- err:
  438. case <-w.donec:
  439. }
  440. return
  441. }
  442. select {
  443. case w.respc <- resp:
  444. case <-w.donec:
  445. return
  446. }
  447. }
  448. }
  449. // serveStream forwards watch responses from run() to the subscriber
  450. func (w *watchGrpcStream) serveStream(ws *watcherStream) {
  451. var closeErr error
  452. emptyWr := &WatchResponse{}
  453. wrs := []*WatchResponse{}
  454. resuming := false
  455. closing := false
  456. for !closing {
  457. curWr := emptyWr
  458. outc := ws.outc
  459. if len(wrs) > 0 {
  460. curWr = wrs[0]
  461. } else {
  462. outc = nil
  463. }
  464. select {
  465. case outc <- *curWr:
  466. if wrs[0].Err() != nil {
  467. closing = true
  468. break
  469. }
  470. var newRev int64
  471. if len(wrs[0].Events) > 0 {
  472. newRev = wrs[0].Events[len(wrs[0].Events)-1].Kv.ModRevision
  473. } else {
  474. newRev = wrs[0].Header.Revision
  475. }
  476. if newRev != ws.lastRev {
  477. ws.lastRev = newRev
  478. }
  479. wrs[0] = nil
  480. wrs = wrs[1:]
  481. case wr, ok := <-ws.recvc:
  482. if !ok {
  483. // shutdown from closeStream
  484. return
  485. }
  486. // resume up to last seen event if disconnected
  487. if resuming && wr.Err() == nil {
  488. resuming = false
  489. // trim events already seen
  490. for i := 0; i < len(wr.Events); i++ {
  491. if wr.Events[i].Kv.ModRevision > ws.lastRev {
  492. wr.Events = wr.Events[i:]
  493. break
  494. }
  495. }
  496. // only forward new events
  497. if wr.Events[0].Kv.ModRevision == ws.lastRev {
  498. break
  499. }
  500. }
  501. resuming = false
  502. // TODO don't keep buffering if subscriber stops reading
  503. wrs = append(wrs, wr)
  504. case resumeRev := <-ws.resumec:
  505. wrs = nil
  506. resuming = true
  507. if resumeRev == -1 {
  508. // pause serving stream while resume gets set up
  509. break
  510. }
  511. if resumeRev != ws.lastRev {
  512. panic("unexpected resume revision")
  513. }
  514. case <-w.donec:
  515. closing = true
  516. closeErr = w.closeErr
  517. case <-ws.initReq.ctx.Done():
  518. closing = true
  519. }
  520. }
  521. // try to send off close error
  522. if closeErr != nil {
  523. select {
  524. case ws.outc <- WatchResponse{closeErr: w.closeErr}:
  525. case <-w.donec:
  526. case <-time.After(closeSendErrTimeout):
  527. }
  528. }
  529. w.mu.Lock()
  530. w.closeStream(ws)
  531. w.mu.Unlock()
  532. // lazily send cancel message if events on missing id
  533. }
  534. func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
  535. ws, rerr := w.resume()
  536. if rerr != nil {
  537. return nil, rerr
  538. }
  539. go w.serveWatchClient(ws)
  540. return ws, nil
  541. }
  542. // resume creates a new WatchClient with all current watchers reestablished
  543. func (w *watchGrpcStream) resume() (ws pb.Watch_WatchClient, err error) {
  544. for {
  545. if ws, err = w.openWatchClient(); err != nil {
  546. break
  547. } else if err = w.resumeWatchers(ws); err == nil {
  548. break
  549. }
  550. }
  551. return ws, v3rpc.Error(err)
  552. }
  553. // openWatchClient retries opening a watchclient until retryConnection fails
  554. func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  555. for {
  556. select {
  557. case <-w.stopc:
  558. if err == nil {
  559. err = context.Canceled
  560. }
  561. return nil, err
  562. default:
  563. }
  564. if ws, err = w.remote.Watch(w.ctx, grpc.FailFast(false)); ws != nil && err == nil {
  565. break
  566. }
  567. if isHaltErr(w.ctx, err) {
  568. return nil, v3rpc.Error(err)
  569. }
  570. }
  571. return ws, nil
  572. }
  573. // resumeWatchers rebuilds every registered watcher on a new client
  574. func (w *watchGrpcStream) resumeWatchers(wc pb.Watch_WatchClient) error {
  575. w.mu.RLock()
  576. streams := make([]*watcherStream, 0, len(w.streams))
  577. for _, ws := range w.streams {
  578. streams = append(streams, ws)
  579. }
  580. w.mu.RUnlock()
  581. for _, ws := range streams {
  582. // pause serveStream
  583. ws.resumec <- -1
  584. // reconstruct watcher from initial request
  585. if ws.lastRev != 0 {
  586. ws.initReq.rev = ws.lastRev
  587. }
  588. if err := wc.Send(ws.initReq.toPB()); err != nil {
  589. return err
  590. }
  591. // wait for request ack
  592. resp, err := wc.Recv()
  593. if err != nil {
  594. return err
  595. } else if len(resp.Events) != 0 || !resp.Created {
  596. return fmt.Errorf("watcher: unexpected response (%+v)", resp)
  597. }
  598. // id may be different since new remote watcher; update map
  599. w.mu.Lock()
  600. delete(w.streams, ws.id)
  601. ws.id = resp.WatchId
  602. w.streams[ws.id] = ws
  603. w.mu.Unlock()
  604. // unpause serveStream
  605. ws.resumec <- ws.lastRev
  606. }
  607. return nil
  608. }
  609. // toPB converts an internal watch request structure to its protobuf messagefunc (wr *watchRequest)
  610. func (wr *watchRequest) toPB() *pb.WatchRequest {
  611. req := &pb.WatchCreateRequest{
  612. StartRevision: wr.rev,
  613. Key: []byte(wr.key),
  614. RangeEnd: []byte(wr.end),
  615. ProgressNotify: wr.progressNotify,
  616. PrevKv: wr.prevKV,
  617. }
  618. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  619. return &pb.WatchRequest{RequestUnion: cr}
  620. }