watch.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "fmt"
  17. "sync"
  18. "time"
  19. v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  20. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  21. mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
  22. "golang.org/x/net/context"
  23. )
  24. const (
  25. EventTypeDelete = mvccpb.DELETE
  26. EventTypePut = mvccpb.PUT
  27. closeSendErrTimeout = 250 * time.Millisecond
  28. )
  29. type Event mvccpb.Event
  30. type WatchChan <-chan WatchResponse
  31. type Watcher interface {
  32. // Watch watches on a key or prefix. The watched events will be returned
  33. // through the returned channel.
  34. // If the watch is slow or the required rev is compacted, the watch request
  35. // might be canceled from the server-side and the chan will be closed.
  36. // 'opts' can be: 'WithRev' and/or 'WithPrefix'.
  37. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  38. // Close closes the watcher and cancels all watch requests.
  39. Close() error
  40. }
  41. type WatchResponse struct {
  42. Header pb.ResponseHeader
  43. Events []*Event
  44. // CompactRevision is the minimum revision the watcher may receive.
  45. CompactRevision int64
  46. // Canceled is used to indicate watch failure.
  47. // If the watch failed and the stream was about to close, before the channel is closed,
  48. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  49. Canceled bool
  50. closeErr error
  51. }
  52. // IsCreate returns true if the event tells that the key is newly created.
  53. func (e *Event) IsCreate() bool {
  54. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  55. }
  56. // IsModify returns true if the event tells that a new value is put on existing key.
  57. func (e *Event) IsModify() bool {
  58. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  59. }
  60. // Err is the error value if this WatchResponse holds an error.
  61. func (wr *WatchResponse) Err() error {
  62. switch {
  63. case wr.closeErr != nil:
  64. return v3rpc.Error(wr.closeErr)
  65. case wr.CompactRevision != 0:
  66. return v3rpc.ErrCompacted
  67. case wr.Canceled:
  68. return v3rpc.ErrFutureRev
  69. }
  70. return nil
  71. }
  72. // IsProgressNotify returns true if the WatchResponse is progress notification.
  73. func (wr *WatchResponse) IsProgressNotify() bool {
  74. return len(wr.Events) == 0 && !wr.Canceled
  75. }
  76. // watcher implements the Watcher interface
  77. type watcher struct {
  78. remote pb.WatchClient
  79. // mu protects the grpc streams map
  80. mu sync.RWMutex
  81. // streams holds all the active grpc streams keyed by ctx value.
  82. streams map[string]*watchGrpcStream
  83. }
  84. type watchGrpcStream struct {
  85. owner *watcher
  86. remote pb.WatchClient
  87. // ctx controls internal remote.Watch requests
  88. ctx context.Context
  89. // ctxKey is the key used when looking up this stream's context
  90. ctxKey string
  91. cancel context.CancelFunc
  92. // mu protects the streams map
  93. mu sync.RWMutex
  94. // streams holds all active watchers
  95. streams map[int64]*watcherStream
  96. // reqc sends a watch request from Watch() to the main goroutine
  97. reqc chan *watchRequest
  98. // respc receives data from the watch client
  99. respc chan *pb.WatchResponse
  100. // stopc is sent to the main goroutine to stop all processing
  101. stopc chan struct{}
  102. // donec closes to broadcast shutdown
  103. donec chan struct{}
  104. // errc transmits errors from grpc Recv to the watch stream reconn logic
  105. errc chan error
  106. // the error that closed the watch stream
  107. closeErr error
  108. }
  109. // watchRequest is issued by the subscriber to start a new watcher
  110. type watchRequest struct {
  111. ctx context.Context
  112. key string
  113. end string
  114. rev int64
  115. // progressNotify is for progress updates
  116. progressNotify bool
  117. // get the previous key-value pair before the event happens
  118. prevKV bool
  119. // retc receives a chan WatchResponse once the watcher is established
  120. retc chan chan WatchResponse
  121. }
  122. // watcherStream represents a registered watcher
  123. type watcherStream struct {
  124. // initReq is the request that initiated this request
  125. initReq watchRequest
  126. // outc publishes watch responses to subscriber
  127. outc chan<- WatchResponse
  128. // recvc buffers watch responses before publishing
  129. recvc chan *WatchResponse
  130. id int64
  131. // lastRev is revision last successfully sent over outc
  132. lastRev int64
  133. // resumec indicates the stream must recover at a given revision
  134. resumec chan int64
  135. }
  136. func NewWatcher(c *Client) Watcher {
  137. return &watcher{
  138. remote: pb.NewWatchClient(c.conn),
  139. streams: make(map[string]*watchGrpcStream),
  140. }
  141. }
  142. // never closes
  143. var valCtxCh = make(chan struct{})
  144. var zeroTime = time.Unix(0, 0)
  145. // ctx with only the values; never Done
  146. type valCtx struct{ context.Context }
  147. func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
  148. func (vc *valCtx) Done() <-chan struct{} { return valCtxCh }
  149. func (vc *valCtx) Err() error { return nil }
  150. func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
  151. ctx, cancel := context.WithCancel(&valCtx{inctx})
  152. wgs := &watchGrpcStream{
  153. owner: w,
  154. remote: w.remote,
  155. ctx: ctx,
  156. ctxKey: fmt.Sprintf("%v", inctx),
  157. cancel: cancel,
  158. streams: make(map[int64]*watcherStream),
  159. respc: make(chan *pb.WatchResponse),
  160. reqc: make(chan *watchRequest),
  161. stopc: make(chan struct{}),
  162. donec: make(chan struct{}),
  163. errc: make(chan error, 1),
  164. }
  165. go wgs.run()
  166. return wgs
  167. }
  168. // Watch posts a watch request to run() and waits for a new watcher channel
  169. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  170. ow := opWatch(key, opts...)
  171. retc := make(chan chan WatchResponse, 1)
  172. wr := &watchRequest{
  173. ctx: ctx,
  174. key: string(ow.key),
  175. end: string(ow.end),
  176. rev: ow.rev,
  177. progressNotify: ow.progressNotify,
  178. prevKV: ow.prevKV,
  179. retc: retc,
  180. }
  181. ok := false
  182. ctxKey := fmt.Sprintf("%v", ctx)
  183. // find or allocate appropriate grpc watch stream
  184. w.mu.Lock()
  185. if w.streams == nil {
  186. // closed
  187. w.mu.Unlock()
  188. ch := make(chan WatchResponse)
  189. close(ch)
  190. return ch
  191. }
  192. wgs := w.streams[ctxKey]
  193. if wgs == nil {
  194. wgs = w.newWatcherGrpcStream(ctx)
  195. w.streams[ctxKey] = wgs
  196. }
  197. donec := wgs.donec
  198. reqc := wgs.reqc
  199. w.mu.Unlock()
  200. // couldn't create channel; return closed channel
  201. closeCh := make(chan WatchResponse, 1)
  202. // submit request
  203. select {
  204. case reqc <- wr:
  205. ok = true
  206. case <-wr.ctx.Done():
  207. case <-donec:
  208. if wgs.closeErr != nil {
  209. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  210. break
  211. }
  212. // retry; may have dropped stream from no ctxs
  213. return w.Watch(ctx, key, opts...)
  214. }
  215. // receive channel
  216. if ok {
  217. select {
  218. case ret := <-retc:
  219. return ret
  220. case <-ctx.Done():
  221. case <-donec:
  222. if wgs.closeErr != nil {
  223. closeCh <- WatchResponse{closeErr: wgs.closeErr}
  224. break
  225. }
  226. // retry; may have dropped stream from no ctxs
  227. return w.Watch(ctx, key, opts...)
  228. }
  229. }
  230. close(closeCh)
  231. return closeCh
  232. }
  233. func (w *watcher) Close() (err error) {
  234. w.mu.Lock()
  235. streams := w.streams
  236. w.streams = nil
  237. w.mu.Unlock()
  238. for _, wgs := range streams {
  239. if werr := wgs.Close(); werr != nil {
  240. err = werr
  241. }
  242. }
  243. return err
  244. }
  245. func (w *watchGrpcStream) Close() (err error) {
  246. close(w.stopc)
  247. <-w.donec
  248. select {
  249. case err = <-w.errc:
  250. default:
  251. }
  252. return toErr(w.ctx, err)
  253. }
  254. func (w *watchGrpcStream) addStream(resp *pb.WatchResponse, pendingReq *watchRequest) {
  255. if pendingReq == nil {
  256. // no pending request; ignore
  257. return
  258. }
  259. if resp.Canceled || resp.CompactRevision != 0 {
  260. // a cancel at id creation time means the start revision has
  261. // been compacted out of the store
  262. ret := make(chan WatchResponse, 1)
  263. ret <- WatchResponse{
  264. Header: *resp.Header,
  265. CompactRevision: resp.CompactRevision,
  266. Canceled: true}
  267. close(ret)
  268. pendingReq.retc <- ret
  269. return
  270. }
  271. ret := make(chan WatchResponse)
  272. if resp.WatchId == -1 {
  273. // failed; no channel
  274. close(ret)
  275. pendingReq.retc <- ret
  276. return
  277. }
  278. ws := &watcherStream{
  279. initReq: *pendingReq,
  280. id: resp.WatchId,
  281. outc: ret,
  282. // buffered so unlikely to block on sending while holding mu
  283. recvc: make(chan *WatchResponse, 4),
  284. resumec: make(chan int64),
  285. }
  286. if pendingReq.rev == 0 {
  287. // note the header revision so that a put following a current watcher
  288. // disconnect will arrive on the watcher channel after reconnect
  289. ws.initReq.rev = resp.Header.Revision
  290. }
  291. w.mu.Lock()
  292. w.streams[ws.id] = ws
  293. w.mu.Unlock()
  294. // pass back the subscriber channel for the watcher
  295. pendingReq.retc <- ret
  296. // send messages to subscriber
  297. go w.serveStream(ws)
  298. }
  299. // closeStream closes the watcher resources and removes it
  300. func (w *watchGrpcStream) closeStream(ws *watcherStream) {
  301. // cancels request stream; subscriber receives nil channel
  302. close(ws.initReq.retc)
  303. // close subscriber's channel
  304. close(ws.outc)
  305. delete(w.streams, ws.id)
  306. }
  307. // run is the root of the goroutines for managing a watcher client
  308. func (w *watchGrpcStream) run() {
  309. var wc pb.Watch_WatchClient
  310. var closeErr error
  311. defer func() {
  312. w.owner.mu.Lock()
  313. w.closeErr = closeErr
  314. if w.owner.streams != nil {
  315. delete(w.owner.streams, w.ctxKey)
  316. }
  317. close(w.donec)
  318. w.owner.mu.Unlock()
  319. w.cancel()
  320. }()
  321. // start a stream with the etcd grpc server
  322. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  323. return
  324. }
  325. var pendingReq, failedReq *watchRequest
  326. curReqC := w.reqc
  327. cancelSet := make(map[int64]struct{})
  328. for {
  329. select {
  330. // Watch() requested
  331. case pendingReq = <-curReqC:
  332. // no more watch requests until there's a response
  333. curReqC = nil
  334. if err := wc.Send(pendingReq.toPB()); err == nil {
  335. // pendingReq now waits on w.respc
  336. break
  337. }
  338. failedReq = pendingReq
  339. // New events from the watch client
  340. case pbresp := <-w.respc:
  341. switch {
  342. case pbresp.Created:
  343. // response to pending req, try to add
  344. w.addStream(pbresp, pendingReq)
  345. pendingReq = nil
  346. curReqC = w.reqc
  347. case pbresp.Canceled:
  348. delete(cancelSet, pbresp.WatchId)
  349. // shutdown serveStream, if any
  350. w.mu.Lock()
  351. if ws, ok := w.streams[pbresp.WatchId]; ok {
  352. close(ws.recvc)
  353. delete(w.streams, ws.id)
  354. }
  355. numStreams := len(w.streams)
  356. w.mu.Unlock()
  357. if numStreams == 0 {
  358. // don't leak watcher streams
  359. return
  360. }
  361. default:
  362. // dispatch to appropriate watch stream
  363. if ok := w.dispatchEvent(pbresp); ok {
  364. break
  365. }
  366. // watch response on unexpected watch id; cancel id
  367. if _, ok := cancelSet[pbresp.WatchId]; ok {
  368. break
  369. }
  370. cancelSet[pbresp.WatchId] = struct{}{}
  371. cr := &pb.WatchRequest_CancelRequest{
  372. CancelRequest: &pb.WatchCancelRequest{
  373. WatchId: pbresp.WatchId,
  374. },
  375. }
  376. req := &pb.WatchRequest{RequestUnion: cr}
  377. wc.Send(req)
  378. }
  379. // watch client failed to recv; spawn another if possible
  380. // TODO report watch client errors from errc?
  381. case err := <-w.errc:
  382. if toErr(w.ctx, err) == v3rpc.ErrNoLeader {
  383. closeErr = err
  384. return
  385. }
  386. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  387. return
  388. }
  389. curReqC = w.reqc
  390. if pendingReq != nil {
  391. failedReq = pendingReq
  392. }
  393. cancelSet = make(map[int64]struct{})
  394. case <-w.stopc:
  395. return
  396. }
  397. // send failed; queue for retry
  398. if failedReq != nil {
  399. go func(wr *watchRequest) {
  400. select {
  401. case w.reqc <- wr:
  402. case <-wr.ctx.Done():
  403. case <-w.donec:
  404. }
  405. }(pendingReq)
  406. failedReq = nil
  407. pendingReq = nil
  408. }
  409. }
  410. }
  411. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  412. func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
  413. w.mu.RLock()
  414. defer w.mu.RUnlock()
  415. ws, ok := w.streams[pbresp.WatchId]
  416. events := make([]*Event, len(pbresp.Events))
  417. for i, ev := range pbresp.Events {
  418. events[i] = (*Event)(ev)
  419. }
  420. if ok {
  421. wr := &WatchResponse{
  422. Header: *pbresp.Header,
  423. Events: events,
  424. CompactRevision: pbresp.CompactRevision,
  425. Canceled: pbresp.Canceled}
  426. ws.recvc <- wr
  427. }
  428. return ok
  429. }
  430. // serveWatchClient forwards messages from the grpc stream to run()
  431. func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
  432. for {
  433. resp, err := wc.Recv()
  434. if err != nil {
  435. select {
  436. case w.errc <- err:
  437. case <-w.donec:
  438. }
  439. return
  440. }
  441. select {
  442. case w.respc <- resp:
  443. case <-w.donec:
  444. return
  445. }
  446. }
  447. }
  448. // serveStream forwards watch responses from run() to the subscriber
  449. func (w *watchGrpcStream) serveStream(ws *watcherStream) {
  450. var closeErr error
  451. emptyWr := &WatchResponse{}
  452. wrs := []*WatchResponse{}
  453. resuming := false
  454. closing := false
  455. for !closing {
  456. curWr := emptyWr
  457. outc := ws.outc
  458. if len(wrs) > 0 {
  459. curWr = wrs[0]
  460. } else {
  461. outc = nil
  462. }
  463. select {
  464. case outc <- *curWr:
  465. if wrs[0].Err() != nil {
  466. closing = true
  467. break
  468. }
  469. var newRev int64
  470. if len(wrs[0].Events) > 0 {
  471. newRev = wrs[0].Events[len(wrs[0].Events)-1].Kv.ModRevision
  472. } else {
  473. newRev = wrs[0].Header.Revision
  474. }
  475. if newRev != ws.lastRev {
  476. ws.lastRev = newRev
  477. }
  478. wrs[0] = nil
  479. wrs = wrs[1:]
  480. case wr, ok := <-ws.recvc:
  481. if !ok {
  482. // shutdown from closeStream
  483. return
  484. }
  485. // resume up to last seen event if disconnected
  486. if resuming && wr.Err() == nil {
  487. resuming = false
  488. // trim events already seen
  489. for i := 0; i < len(wr.Events); i++ {
  490. if wr.Events[i].Kv.ModRevision > ws.lastRev {
  491. wr.Events = wr.Events[i:]
  492. break
  493. }
  494. }
  495. // only forward new events
  496. if wr.Events[0].Kv.ModRevision == ws.lastRev {
  497. break
  498. }
  499. }
  500. resuming = false
  501. // TODO don't keep buffering if subscriber stops reading
  502. wrs = append(wrs, wr)
  503. case resumeRev := <-ws.resumec:
  504. wrs = nil
  505. resuming = true
  506. if resumeRev == -1 {
  507. // pause serving stream while resume gets set up
  508. break
  509. }
  510. if resumeRev != ws.lastRev {
  511. panic("unexpected resume revision")
  512. }
  513. case <-w.donec:
  514. closing = true
  515. closeErr = w.closeErr
  516. case <-ws.initReq.ctx.Done():
  517. closing = true
  518. }
  519. }
  520. // try to send off close error
  521. if closeErr != nil {
  522. select {
  523. case ws.outc <- WatchResponse{closeErr: w.closeErr}:
  524. case <-w.donec:
  525. case <-time.After(closeSendErrTimeout):
  526. }
  527. }
  528. w.mu.Lock()
  529. w.closeStream(ws)
  530. w.mu.Unlock()
  531. // lazily send cancel message if events on missing id
  532. }
  533. func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
  534. ws, rerr := w.resume()
  535. if rerr != nil {
  536. return nil, rerr
  537. }
  538. go w.serveWatchClient(ws)
  539. return ws, nil
  540. }
  541. // resume creates a new WatchClient with all current watchers reestablished
  542. func (w *watchGrpcStream) resume() (ws pb.Watch_WatchClient, err error) {
  543. for {
  544. if ws, err = w.openWatchClient(); err != nil {
  545. break
  546. } else if err = w.resumeWatchers(ws); err == nil {
  547. break
  548. }
  549. }
  550. return ws, v3rpc.Error(err)
  551. }
  552. // openWatchClient retries opening a watchclient until retryConnection fails
  553. func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  554. for {
  555. select {
  556. case <-w.stopc:
  557. if err == nil {
  558. err = context.Canceled
  559. }
  560. return nil, err
  561. default:
  562. }
  563. if ws, err = w.remote.Watch(w.ctx); ws != nil && err == nil {
  564. break
  565. }
  566. if isHaltErr(w.ctx, err) {
  567. return nil, v3rpc.Error(err)
  568. }
  569. }
  570. return ws, nil
  571. }
  572. // resumeWatchers rebuilds every registered watcher on a new client
  573. func (w *watchGrpcStream) resumeWatchers(wc pb.Watch_WatchClient) error {
  574. w.mu.RLock()
  575. streams := make([]*watcherStream, 0, len(w.streams))
  576. for _, ws := range w.streams {
  577. streams = append(streams, ws)
  578. }
  579. w.mu.RUnlock()
  580. for _, ws := range streams {
  581. // pause serveStream
  582. ws.resumec <- -1
  583. // reconstruct watcher from initial request
  584. if ws.lastRev != 0 {
  585. ws.initReq.rev = ws.lastRev
  586. }
  587. if err := wc.Send(ws.initReq.toPB()); err != nil {
  588. return err
  589. }
  590. // wait for request ack
  591. resp, err := wc.Recv()
  592. if err != nil {
  593. return err
  594. } else if len(resp.Events) != 0 || !resp.Created {
  595. return fmt.Errorf("watcher: unexpected response (%+v)", resp)
  596. }
  597. // id may be different since new remote watcher; update map
  598. w.mu.Lock()
  599. delete(w.streams, ws.id)
  600. ws.id = resp.WatchId
  601. w.streams[ws.id] = ws
  602. w.mu.Unlock()
  603. // unpause serveStream
  604. ws.resumec <- ws.lastRev
  605. }
  606. return nil
  607. }
  608. // toPB converts an internal watch request structure to its protobuf messagefunc (wr *watchRequest)
  609. func (wr *watchRequest) toPB() *pb.WatchRequest {
  610. req := &pb.WatchCreateRequest{
  611. StartRevision: wr.rev,
  612. Key: []byte(wr.key),
  613. RangeEnd: []byte(wr.end),
  614. ProgressNotify: wr.progressNotify,
  615. PrevKv: wr.prevKV,
  616. }
  617. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  618. return &pb.WatchRequest{RequestUnion: cr}
  619. }