watch.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "fmt"
  17. "sync"
  18. v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  19. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  20. mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
  21. "golang.org/x/net/context"
  22. "google.golang.org/grpc"
  23. )
  24. const (
  25. EventTypeDelete = mvccpb.DELETE
  26. EventTypePut = mvccpb.PUT
  27. )
  28. type Event mvccpb.Event
  29. type WatchChan <-chan WatchResponse
  30. type Watcher interface {
  31. // Watch watches on a key or prefix. The watched events will be returned
  32. // through the returned channel.
  33. // If the watch is slow or the required rev is compacted, the watch request
  34. // might be canceled from the server-side and the chan will be closed.
  35. // 'opts' can be: 'WithRev' and/or 'WitchPrefix'.
  36. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  37. // Close closes the watcher and cancels all watch requests.
  38. Close() error
  39. }
  40. type WatchResponse struct {
  41. Header pb.ResponseHeader
  42. Events []*Event
  43. // CompactRevision is the minimum revision the watcher may receive.
  44. CompactRevision int64
  45. // Canceled is used to indicate watch failure.
  46. // If the watch failed and the stream was about to close, before the channel is closed,
  47. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  48. Canceled bool
  49. }
  50. // IsCreate returns true if the event tells that the key is newly created.
  51. func (e *Event) IsCreate() bool {
  52. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  53. }
  54. // IsModify returns true if the event tells that a new value is put on existing key.
  55. func (e *Event) IsModify() bool {
  56. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  57. }
  58. // Err is the error value if this WatchResponse holds an error.
  59. func (wr *WatchResponse) Err() error {
  60. if wr.CompactRevision != 0 {
  61. return v3rpc.ErrCompacted
  62. }
  63. if wr.Canceled {
  64. return v3rpc.ErrFutureRev
  65. }
  66. return nil
  67. }
  68. // IsProgressNotify returns true if the WatchResponse is progress notification.
  69. func (wr *WatchResponse) IsProgressNotify() bool {
  70. return len(wr.Events) == 0 && !wr.Canceled
  71. }
  72. // watcher implements the Watcher interface
  73. type watcher struct {
  74. rc *remoteClient
  75. remote pb.WatchClient
  76. // ctx controls internal remote.Watch requests
  77. ctx context.Context
  78. cancel context.CancelFunc
  79. // streams holds all active watchers
  80. streams map[int64]*watcherStream
  81. // mu protects the streams map
  82. mu sync.RWMutex
  83. // reqc sends a watch request from Watch() to the main goroutine
  84. reqc chan *watchRequest
  85. // respc receives data from the watch client
  86. respc chan *pb.WatchResponse
  87. // stopc is sent to the main goroutine to stop all processing
  88. stopc chan struct{}
  89. // donec closes to broadcast shutdown
  90. donec chan struct{}
  91. // errc transmits errors from grpc Recv
  92. errc chan error
  93. }
  94. // watchRequest is issued by the subscriber to start a new watcher
  95. type watchRequest struct {
  96. ctx context.Context
  97. key string
  98. end string
  99. rev int64
  100. // progressNotify is for progress updates.
  101. progressNotify bool
  102. // retc receives a chan WatchResponse once the watcher is established
  103. retc chan chan WatchResponse
  104. }
  105. // watcherStream represents a registered watcher
  106. type watcherStream struct {
  107. initReq watchRequest
  108. // outc publishes watch responses to subscriber
  109. outc chan<- WatchResponse
  110. // recvc buffers watch responses before publishing
  111. recvc chan *WatchResponse
  112. id int64
  113. // lastRev is revision last successfully sent over outc
  114. lastRev int64
  115. // resumec indicates the stream must recover at a given revision
  116. resumec chan int64
  117. }
  118. func NewWatcher(c *Client) Watcher {
  119. ctx, cancel := context.WithCancel(context.Background())
  120. w := &watcher{
  121. ctx: ctx,
  122. cancel: cancel,
  123. streams: make(map[int64]*watcherStream),
  124. respc: make(chan *pb.WatchResponse),
  125. reqc: make(chan *watchRequest),
  126. stopc: make(chan struct{}),
  127. donec: make(chan struct{}),
  128. errc: make(chan error, 1),
  129. }
  130. f := func(conn *grpc.ClientConn) { w.remote = pb.NewWatchClient(conn) }
  131. w.rc = newRemoteClient(c, f)
  132. go w.run()
  133. return w
  134. }
  135. // Watch posts a watch request to run() and waits for a new watcher channel
  136. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  137. ow := opWatch(key, opts...)
  138. retc := make(chan chan WatchResponse, 1)
  139. wr := &watchRequest{
  140. ctx: ctx,
  141. key: string(ow.key),
  142. end: string(ow.end),
  143. rev: ow.rev,
  144. progressNotify: ow.progressNotify,
  145. retc: retc,
  146. }
  147. ok := false
  148. // submit request
  149. select {
  150. case w.reqc <- wr:
  151. ok = true
  152. case <-wr.ctx.Done():
  153. case <-w.donec:
  154. }
  155. // receive channel
  156. if ok {
  157. select {
  158. case ret := <-retc:
  159. return ret
  160. case <-ctx.Done():
  161. case <-w.donec:
  162. }
  163. }
  164. // couldn't create channel; return closed channel
  165. ch := make(chan WatchResponse)
  166. close(ch)
  167. return ch
  168. }
  169. func (w *watcher) Close() error {
  170. close(w.stopc)
  171. <-w.donec
  172. return v3rpc.Error(<-w.errc)
  173. }
  174. func (w *watcher) addStream(resp *pb.WatchResponse, pendingReq *watchRequest) {
  175. if pendingReq == nil {
  176. // no pending request; ignore
  177. return
  178. }
  179. if resp.Canceled || resp.CompactRevision != 0 {
  180. // a cancel at id creation time means the start revision has
  181. // been compacted out of the store
  182. ret := make(chan WatchResponse, 1)
  183. ret <- WatchResponse{
  184. Header: *resp.Header,
  185. CompactRevision: resp.CompactRevision,
  186. Canceled: true}
  187. close(ret)
  188. pendingReq.retc <- ret
  189. return
  190. }
  191. ret := make(chan WatchResponse)
  192. if resp.WatchId == -1 {
  193. // failed; no channel
  194. close(ret)
  195. pendingReq.retc <- ret
  196. return
  197. }
  198. ws := &watcherStream{
  199. initReq: *pendingReq,
  200. id: resp.WatchId,
  201. outc: ret,
  202. // buffered so unlikely to block on sending while holding mu
  203. recvc: make(chan *WatchResponse, 4),
  204. resumec: make(chan int64),
  205. }
  206. if pendingReq.rev == 0 {
  207. // note the header revision so that a put following a current watcher
  208. // disconnect will arrive on the watcher channel after reconnect
  209. ws.initReq.rev = resp.Header.Revision
  210. }
  211. w.mu.Lock()
  212. w.streams[ws.id] = ws
  213. w.mu.Unlock()
  214. // pass back the subscriber channel for the watcher
  215. pendingReq.retc <- ret
  216. // send messages to subscriber
  217. go w.serveStream(ws)
  218. }
  219. // closeStream closes the watcher resources and removes it
  220. func (w *watcher) closeStream(ws *watcherStream) {
  221. // cancels request stream; subscriber receives nil channel
  222. close(ws.initReq.retc)
  223. // close subscriber's channel
  224. close(ws.outc)
  225. // shutdown serveStream
  226. close(ws.recvc)
  227. delete(w.streams, ws.id)
  228. }
  229. // run is the root of the goroutines for managing a watcher client
  230. func (w *watcher) run() {
  231. var wc pb.Watch_WatchClient
  232. var closeErr error
  233. defer func() {
  234. select {
  235. case w.errc <- closeErr:
  236. default:
  237. }
  238. close(w.donec)
  239. w.cancel()
  240. }()
  241. // start a stream with the etcd grpc server
  242. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  243. return
  244. }
  245. var pendingReq, failedReq *watchRequest
  246. curReqC := w.reqc
  247. cancelSet := make(map[int64]struct{})
  248. for {
  249. select {
  250. // Watch() requested
  251. case pendingReq = <-curReqC:
  252. // no more watch requests until there's a response
  253. curReqC = nil
  254. if err := wc.Send(pendingReq.toPB()); err == nil {
  255. // pendingReq now waits on w.respc
  256. break
  257. }
  258. failedReq = pendingReq
  259. // New events from the watch client
  260. case pbresp := <-w.respc:
  261. switch {
  262. case pbresp.Created:
  263. // response to pending req, try to add
  264. w.addStream(pbresp, pendingReq)
  265. pendingReq = nil
  266. curReqC = w.reqc
  267. case pbresp.Canceled:
  268. delete(cancelSet, pbresp.WatchId)
  269. default:
  270. // dispatch to appropriate watch stream
  271. if ok := w.dispatchEvent(pbresp); ok {
  272. break
  273. }
  274. // watch response on unexpected watch id; cancel id
  275. if _, ok := cancelSet[pbresp.WatchId]; ok {
  276. break
  277. }
  278. cancelSet[pbresp.WatchId] = struct{}{}
  279. cr := &pb.WatchRequest_CancelRequest{
  280. CancelRequest: &pb.WatchCancelRequest{
  281. WatchId: pbresp.WatchId,
  282. },
  283. }
  284. req := &pb.WatchRequest{RequestUnion: cr}
  285. wc.Send(req)
  286. }
  287. // watch client failed to recv; spawn another if possible
  288. // TODO report watch client errors from errc?
  289. case <-w.errc:
  290. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  291. return
  292. }
  293. curReqC = w.reqc
  294. if pendingReq != nil {
  295. failedReq = pendingReq
  296. }
  297. cancelSet = make(map[int64]struct{})
  298. case <-w.stopc:
  299. return
  300. }
  301. // send failed; queue for retry
  302. if failedReq != nil {
  303. go func(wr *watchRequest) {
  304. select {
  305. case w.reqc <- wr:
  306. case <-wr.ctx.Done():
  307. case <-w.donec:
  308. }
  309. }(pendingReq)
  310. failedReq = nil
  311. pendingReq = nil
  312. }
  313. }
  314. }
  315. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  316. func (w *watcher) dispatchEvent(pbresp *pb.WatchResponse) bool {
  317. w.mu.RLock()
  318. defer w.mu.RUnlock()
  319. ws, ok := w.streams[pbresp.WatchId]
  320. events := make([]*Event, len(pbresp.Events))
  321. for i, ev := range pbresp.Events {
  322. events[i] = (*Event)(ev)
  323. }
  324. if ok {
  325. wr := &WatchResponse{
  326. Header: *pbresp.Header,
  327. Events: events,
  328. CompactRevision: pbresp.CompactRevision,
  329. Canceled: pbresp.Canceled}
  330. ws.recvc <- wr
  331. }
  332. return ok
  333. }
  334. // serveWatchClient forwards messages from the grpc stream to run()
  335. func (w *watcher) serveWatchClient(wc pb.Watch_WatchClient) {
  336. for {
  337. resp, err := wc.Recv()
  338. if err != nil {
  339. select {
  340. case w.errc <- err:
  341. case <-w.donec:
  342. }
  343. return
  344. }
  345. select {
  346. case w.respc <- resp:
  347. case <-w.donec:
  348. return
  349. }
  350. }
  351. }
  352. // serveStream forwards watch responses from run() to the subscriber
  353. func (w *watcher) serveStream(ws *watcherStream) {
  354. emptyWr := &WatchResponse{}
  355. wrs := []*WatchResponse{}
  356. resuming := false
  357. closing := false
  358. for !closing {
  359. curWr := emptyWr
  360. outc := ws.outc
  361. if len(wrs) > 0 {
  362. curWr = wrs[0]
  363. } else {
  364. outc = nil
  365. }
  366. select {
  367. case outc <- *curWr:
  368. if wrs[0].Err() != nil {
  369. closing = true
  370. break
  371. }
  372. var newRev int64
  373. if len(wrs[0].Events) > 0 {
  374. newRev = wrs[0].Events[len(wrs[0].Events)-1].Kv.ModRevision
  375. } else {
  376. newRev = wrs[0].Header.Revision
  377. }
  378. if newRev != ws.lastRev {
  379. ws.lastRev = newRev
  380. }
  381. wrs[0] = nil
  382. wrs = wrs[1:]
  383. case wr, ok := <-ws.recvc:
  384. if !ok {
  385. // shutdown from closeStream
  386. return
  387. }
  388. // resume up to last seen event if disconnected
  389. if resuming && wr.Err() == nil {
  390. resuming = false
  391. // trim events already seen
  392. for i := 0; i < len(wr.Events); i++ {
  393. if wr.Events[i].Kv.ModRevision > ws.lastRev {
  394. wr.Events = wr.Events[i:]
  395. break
  396. }
  397. }
  398. // only forward new events
  399. if wr.Events[0].Kv.ModRevision == ws.lastRev {
  400. break
  401. }
  402. }
  403. resuming = false
  404. // TODO don't keep buffering if subscriber stops reading
  405. wrs = append(wrs, wr)
  406. case resumeRev := <-ws.resumec:
  407. wrs = nil
  408. resuming = true
  409. if resumeRev == -1 {
  410. // pause serving stream while resume gets set up
  411. break
  412. }
  413. if resumeRev != ws.lastRev {
  414. panic("unexpected resume revision")
  415. }
  416. case <-w.donec:
  417. closing = true
  418. case <-ws.initReq.ctx.Done():
  419. closing = true
  420. }
  421. }
  422. w.mu.Lock()
  423. w.closeStream(ws)
  424. w.mu.Unlock()
  425. // lazily send cancel message if events on missing id
  426. }
  427. func (w *watcher) newWatchClient() (pb.Watch_WatchClient, error) {
  428. ws, rerr := w.resume()
  429. if rerr != nil {
  430. return nil, rerr
  431. }
  432. go w.serveWatchClient(ws)
  433. return ws, nil
  434. }
  435. // resume creates a new WatchClient with all current watchers reestablished
  436. func (w *watcher) resume() (ws pb.Watch_WatchClient, err error) {
  437. for {
  438. if ws, err = w.openWatchClient(); err != nil {
  439. break
  440. } else if err = w.resumeWatchers(ws); err == nil {
  441. break
  442. }
  443. }
  444. return ws, v3rpc.Error(err)
  445. }
  446. // openWatchClient retries opening a watchclient until retryConnection fails
  447. func (w *watcher) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  448. for {
  449. select {
  450. case <-w.stopc:
  451. if err == nil {
  452. err = context.Canceled
  453. }
  454. return nil, err
  455. default:
  456. }
  457. if ws, err = w.remote.Watch(w.ctx); ws != nil && err == nil {
  458. break
  459. } else if isHaltErr(w.ctx, err) {
  460. return nil, v3rpc.Error(err)
  461. }
  462. err = w.rc.reconnectWait(w.ctx, nil)
  463. }
  464. return ws, nil
  465. }
  466. // resumeWatchers rebuilds every registered watcher on a new client
  467. func (w *watcher) resumeWatchers(wc pb.Watch_WatchClient) error {
  468. streams := []*watcherStream{}
  469. w.mu.RLock()
  470. for _, ws := range w.streams {
  471. streams = append(streams, ws)
  472. }
  473. w.mu.RUnlock()
  474. for _, ws := range streams {
  475. // pause serveStream
  476. ws.resumec <- -1
  477. // reconstruct watcher from initial request
  478. if ws.lastRev != 0 {
  479. ws.initReq.rev = ws.lastRev
  480. }
  481. if err := wc.Send(ws.initReq.toPB()); err != nil {
  482. return err
  483. }
  484. // wait for request ack
  485. resp, err := wc.Recv()
  486. if err != nil {
  487. return err
  488. } else if len(resp.Events) != 0 || !resp.Created {
  489. return fmt.Errorf("watcher: unexpected response (%+v)", resp)
  490. }
  491. // id may be different since new remote watcher; update map
  492. w.mu.Lock()
  493. delete(w.streams, ws.id)
  494. ws.id = resp.WatchId
  495. w.streams[ws.id] = ws
  496. w.mu.Unlock()
  497. // unpause serveStream
  498. ws.resumec <- ws.lastRev
  499. }
  500. return nil
  501. }
  502. // toPB converts an internal watch request structure to its protobuf messagefunc (wr *watchRequest)
  503. func (wr *watchRequest) toPB() *pb.WatchRequest {
  504. req := &pb.WatchCreateRequest{
  505. StartRevision: wr.rev,
  506. Key: []byte(wr.key),
  507. RangeEnd: []byte(wr.end),
  508. ProgressNotify: wr.progressNotify,
  509. }
  510. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  511. return &pb.WatchRequest{RequestUnion: cr}
  512. }