watch.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "fmt"
  17. "sync"
  18. v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  19. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  20. mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
  21. "golang.org/x/net/context"
  22. )
  23. const (
  24. EventTypeDelete = mvccpb.DELETE
  25. EventTypePut = mvccpb.PUT
  26. )
  27. type Event mvccpb.Event
  28. type WatchChan <-chan WatchResponse
  29. type Watcher interface {
  30. // Watch watches on a key or prefix. The watched events will be returned
  31. // through the returned channel.
  32. // If the watch is slow or the required rev is compacted, the watch request
  33. // might be canceled from the server-side and the chan will be closed.
  34. // 'opts' can be: 'WithRev' and/or 'WithPrefix'.
  35. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  36. // Close closes the watcher and cancels all watch requests.
  37. Close() error
  38. }
  39. type WatchResponse struct {
  40. Header pb.ResponseHeader
  41. Events []*Event
  42. // CompactRevision is the minimum revision the watcher may receive.
  43. CompactRevision int64
  44. // Canceled is used to indicate watch failure.
  45. // If the watch failed and the stream was about to close, before the channel is closed,
  46. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  47. Canceled bool
  48. }
  49. // IsCreate returns true if the event tells that the key is newly created.
  50. func (e *Event) IsCreate() bool {
  51. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  52. }
  53. // IsModify returns true if the event tells that a new value is put on existing key.
  54. func (e *Event) IsModify() bool {
  55. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  56. }
  57. // Err is the error value if this WatchResponse holds an error.
  58. func (wr *WatchResponse) Err() error {
  59. if wr.CompactRevision != 0 {
  60. return v3rpc.ErrCompacted
  61. }
  62. if wr.Canceled {
  63. return v3rpc.ErrFutureRev
  64. }
  65. return nil
  66. }
  67. // IsProgressNotify returns true if the WatchResponse is progress notification.
  68. func (wr *WatchResponse) IsProgressNotify() bool {
  69. return len(wr.Events) == 0 && !wr.Canceled
  70. }
  71. // watcher implements the Watcher interface
  72. type watcher struct {
  73. remote pb.WatchClient
  74. // ctx controls internal remote.Watch requests
  75. ctx context.Context
  76. cancel context.CancelFunc
  77. // streams holds all active watchers
  78. streams map[int64]*watcherStream
  79. // mu protects the streams map
  80. mu sync.RWMutex
  81. // reqc sends a watch request from Watch() to the main goroutine
  82. reqc chan *watchRequest
  83. // respc receives data from the watch client
  84. respc chan *pb.WatchResponse
  85. // stopc is sent to the main goroutine to stop all processing
  86. stopc chan struct{}
  87. // donec closes to broadcast shutdown
  88. donec chan struct{}
  89. // errc transmits errors from grpc Recv
  90. errc chan error
  91. }
  92. // watchRequest is issued by the subscriber to start a new watcher
  93. type watchRequest struct {
  94. ctx context.Context
  95. key string
  96. end string
  97. rev int64
  98. // progressNotify is for progress updates.
  99. progressNotify bool
  100. // retc receives a chan WatchResponse once the watcher is established
  101. retc chan chan WatchResponse
  102. }
  103. // watcherStream represents a registered watcher
  104. type watcherStream struct {
  105. initReq watchRequest
  106. // outc publishes watch responses to subscriber
  107. outc chan<- WatchResponse
  108. // recvc buffers watch responses before publishing
  109. recvc chan *WatchResponse
  110. id int64
  111. // lastRev is revision last successfully sent over outc
  112. lastRev int64
  113. // resumec indicates the stream must recover at a given revision
  114. resumec chan int64
  115. }
  116. func NewWatcher(c *Client) Watcher {
  117. ctx, cancel := context.WithCancel(context.Background())
  118. w := &watcher{
  119. remote: pb.NewWatchClient(c.conn),
  120. ctx: ctx,
  121. cancel: cancel,
  122. streams: make(map[int64]*watcherStream),
  123. respc: make(chan *pb.WatchResponse),
  124. reqc: make(chan *watchRequest),
  125. stopc: make(chan struct{}),
  126. donec: make(chan struct{}),
  127. errc: make(chan error, 1),
  128. }
  129. go w.run()
  130. return w
  131. }
  132. // Watch posts a watch request to run() and waits for a new watcher channel
  133. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  134. ow := opWatch(key, opts...)
  135. retc := make(chan chan WatchResponse, 1)
  136. wr := &watchRequest{
  137. ctx: ctx,
  138. key: string(ow.key),
  139. end: string(ow.end),
  140. rev: ow.rev,
  141. progressNotify: ow.progressNotify,
  142. retc: retc,
  143. }
  144. ok := false
  145. // submit request
  146. select {
  147. case w.reqc <- wr:
  148. ok = true
  149. case <-wr.ctx.Done():
  150. case <-w.donec:
  151. }
  152. // receive channel
  153. if ok {
  154. select {
  155. case ret := <-retc:
  156. return ret
  157. case <-ctx.Done():
  158. case <-w.donec:
  159. }
  160. }
  161. // couldn't create channel; return closed channel
  162. ch := make(chan WatchResponse)
  163. close(ch)
  164. return ch
  165. }
  166. func (w *watcher) Close() error {
  167. close(w.stopc)
  168. <-w.donec
  169. return toErr(w.ctx, <-w.errc)
  170. }
  171. func (w *watcher) addStream(resp *pb.WatchResponse, pendingReq *watchRequest) {
  172. if pendingReq == nil {
  173. // no pending request; ignore
  174. return
  175. }
  176. if resp.Canceled || resp.CompactRevision != 0 {
  177. // a cancel at id creation time means the start revision has
  178. // been compacted out of the store
  179. ret := make(chan WatchResponse, 1)
  180. ret <- WatchResponse{
  181. Header: *resp.Header,
  182. CompactRevision: resp.CompactRevision,
  183. Canceled: true}
  184. close(ret)
  185. pendingReq.retc <- ret
  186. return
  187. }
  188. ret := make(chan WatchResponse)
  189. if resp.WatchId == -1 {
  190. // failed; no channel
  191. close(ret)
  192. pendingReq.retc <- ret
  193. return
  194. }
  195. ws := &watcherStream{
  196. initReq: *pendingReq,
  197. id: resp.WatchId,
  198. outc: ret,
  199. // buffered so unlikely to block on sending while holding mu
  200. recvc: make(chan *WatchResponse, 4),
  201. resumec: make(chan int64),
  202. }
  203. if pendingReq.rev == 0 {
  204. // note the header revision so that a put following a current watcher
  205. // disconnect will arrive on the watcher channel after reconnect
  206. ws.initReq.rev = resp.Header.Revision
  207. }
  208. w.mu.Lock()
  209. w.streams[ws.id] = ws
  210. w.mu.Unlock()
  211. // pass back the subscriber channel for the watcher
  212. pendingReq.retc <- ret
  213. // send messages to subscriber
  214. go w.serveStream(ws)
  215. }
  216. // closeStream closes the watcher resources and removes it
  217. func (w *watcher) closeStream(ws *watcherStream) {
  218. // cancels request stream; subscriber receives nil channel
  219. close(ws.initReq.retc)
  220. // close subscriber's channel
  221. close(ws.outc)
  222. // shutdown serveStream
  223. close(ws.recvc)
  224. delete(w.streams, ws.id)
  225. }
  226. // run is the root of the goroutines for managing a watcher client
  227. func (w *watcher) run() {
  228. var wc pb.Watch_WatchClient
  229. var closeErr error
  230. defer func() {
  231. select {
  232. case w.errc <- closeErr:
  233. default:
  234. }
  235. close(w.donec)
  236. w.cancel()
  237. }()
  238. // start a stream with the etcd grpc server
  239. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  240. return
  241. }
  242. var pendingReq, failedReq *watchRequest
  243. curReqC := w.reqc
  244. cancelSet := make(map[int64]struct{})
  245. for {
  246. select {
  247. // Watch() requested
  248. case pendingReq = <-curReqC:
  249. // no more watch requests until there's a response
  250. curReqC = nil
  251. if err := wc.Send(pendingReq.toPB()); err == nil {
  252. // pendingReq now waits on w.respc
  253. break
  254. }
  255. failedReq = pendingReq
  256. // New events from the watch client
  257. case pbresp := <-w.respc:
  258. switch {
  259. case pbresp.Created:
  260. // response to pending req, try to add
  261. w.addStream(pbresp, pendingReq)
  262. pendingReq = nil
  263. curReqC = w.reqc
  264. case pbresp.Canceled:
  265. delete(cancelSet, pbresp.WatchId)
  266. default:
  267. // dispatch to appropriate watch stream
  268. if ok := w.dispatchEvent(pbresp); ok {
  269. break
  270. }
  271. // watch response on unexpected watch id; cancel id
  272. if _, ok := cancelSet[pbresp.WatchId]; ok {
  273. break
  274. }
  275. cancelSet[pbresp.WatchId] = struct{}{}
  276. cr := &pb.WatchRequest_CancelRequest{
  277. CancelRequest: &pb.WatchCancelRequest{
  278. WatchId: pbresp.WatchId,
  279. },
  280. }
  281. req := &pb.WatchRequest{RequestUnion: cr}
  282. wc.Send(req)
  283. }
  284. // watch client failed to recv; spawn another if possible
  285. // TODO report watch client errors from errc?
  286. case <-w.errc:
  287. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  288. return
  289. }
  290. curReqC = w.reqc
  291. if pendingReq != nil {
  292. failedReq = pendingReq
  293. }
  294. cancelSet = make(map[int64]struct{})
  295. case <-w.stopc:
  296. return
  297. }
  298. // send failed; queue for retry
  299. if failedReq != nil {
  300. go func(wr *watchRequest) {
  301. select {
  302. case w.reqc <- wr:
  303. case <-wr.ctx.Done():
  304. case <-w.donec:
  305. }
  306. }(pendingReq)
  307. failedReq = nil
  308. pendingReq = nil
  309. }
  310. }
  311. }
  312. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  313. func (w *watcher) dispatchEvent(pbresp *pb.WatchResponse) bool {
  314. w.mu.RLock()
  315. defer w.mu.RUnlock()
  316. ws, ok := w.streams[pbresp.WatchId]
  317. events := make([]*Event, len(pbresp.Events))
  318. for i, ev := range pbresp.Events {
  319. events[i] = (*Event)(ev)
  320. }
  321. if ok {
  322. wr := &WatchResponse{
  323. Header: *pbresp.Header,
  324. Events: events,
  325. CompactRevision: pbresp.CompactRevision,
  326. Canceled: pbresp.Canceled}
  327. ws.recvc <- wr
  328. }
  329. return ok
  330. }
  331. // serveWatchClient forwards messages from the grpc stream to run()
  332. func (w *watcher) serveWatchClient(wc pb.Watch_WatchClient) {
  333. for {
  334. resp, err := wc.Recv()
  335. if err != nil {
  336. select {
  337. case w.errc <- err:
  338. case <-w.donec:
  339. }
  340. return
  341. }
  342. select {
  343. case w.respc <- resp:
  344. case <-w.donec:
  345. return
  346. }
  347. }
  348. }
  349. // serveStream forwards watch responses from run() to the subscriber
  350. func (w *watcher) serveStream(ws *watcherStream) {
  351. emptyWr := &WatchResponse{}
  352. wrs := []*WatchResponse{}
  353. resuming := false
  354. closing := false
  355. for !closing {
  356. curWr := emptyWr
  357. outc := ws.outc
  358. if len(wrs) > 0 {
  359. curWr = wrs[0]
  360. } else {
  361. outc = nil
  362. }
  363. select {
  364. case outc <- *curWr:
  365. if wrs[0].Err() != nil {
  366. closing = true
  367. break
  368. }
  369. var newRev int64
  370. if len(wrs[0].Events) > 0 {
  371. newRev = wrs[0].Events[len(wrs[0].Events)-1].Kv.ModRevision
  372. } else {
  373. newRev = wrs[0].Header.Revision
  374. }
  375. if newRev != ws.lastRev {
  376. ws.lastRev = newRev
  377. }
  378. wrs[0] = nil
  379. wrs = wrs[1:]
  380. case wr, ok := <-ws.recvc:
  381. if !ok {
  382. // shutdown from closeStream
  383. return
  384. }
  385. // resume up to last seen event if disconnected
  386. if resuming && wr.Err() == nil {
  387. resuming = false
  388. // trim events already seen
  389. for i := 0; i < len(wr.Events); i++ {
  390. if wr.Events[i].Kv.ModRevision > ws.lastRev {
  391. wr.Events = wr.Events[i:]
  392. break
  393. }
  394. }
  395. // only forward new events
  396. if wr.Events[0].Kv.ModRevision == ws.lastRev {
  397. break
  398. }
  399. }
  400. resuming = false
  401. // TODO don't keep buffering if subscriber stops reading
  402. wrs = append(wrs, wr)
  403. case resumeRev := <-ws.resumec:
  404. wrs = nil
  405. resuming = true
  406. if resumeRev == -1 {
  407. // pause serving stream while resume gets set up
  408. break
  409. }
  410. if resumeRev != ws.lastRev {
  411. panic("unexpected resume revision")
  412. }
  413. case <-w.donec:
  414. closing = true
  415. case <-ws.initReq.ctx.Done():
  416. closing = true
  417. }
  418. }
  419. w.mu.Lock()
  420. w.closeStream(ws)
  421. w.mu.Unlock()
  422. // lazily send cancel message if events on missing id
  423. }
  424. func (w *watcher) newWatchClient() (pb.Watch_WatchClient, error) {
  425. ws, rerr := w.resume()
  426. if rerr != nil {
  427. return nil, rerr
  428. }
  429. go w.serveWatchClient(ws)
  430. return ws, nil
  431. }
  432. // resume creates a new WatchClient with all current watchers reestablished
  433. func (w *watcher) resume() (ws pb.Watch_WatchClient, err error) {
  434. for {
  435. if ws, err = w.openWatchClient(); err != nil {
  436. break
  437. } else if err = w.resumeWatchers(ws); err == nil {
  438. break
  439. }
  440. }
  441. return ws, v3rpc.Error(err)
  442. }
  443. // openWatchClient retries opening a watchclient until retryConnection fails
  444. func (w *watcher) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  445. for {
  446. select {
  447. case <-w.stopc:
  448. if err == nil {
  449. err = context.Canceled
  450. }
  451. return nil, err
  452. default:
  453. }
  454. if ws, err = w.remote.Watch(w.ctx); ws != nil && err == nil {
  455. break
  456. }
  457. if isHaltErr(w.ctx, err) {
  458. return nil, v3rpc.Error(err)
  459. }
  460. }
  461. return ws, nil
  462. }
  463. // resumeWatchers rebuilds every registered watcher on a new client
  464. func (w *watcher) resumeWatchers(wc pb.Watch_WatchClient) error {
  465. w.mu.RLock()
  466. streams := make([]*watcherStream, 0, len(w.streams))
  467. for _, ws := range w.streams {
  468. streams = append(streams, ws)
  469. }
  470. w.mu.RUnlock()
  471. for _, ws := range streams {
  472. // pause serveStream
  473. ws.resumec <- -1
  474. // reconstruct watcher from initial request
  475. if ws.lastRev != 0 {
  476. ws.initReq.rev = ws.lastRev
  477. }
  478. if err := wc.Send(ws.initReq.toPB()); err != nil {
  479. return err
  480. }
  481. // wait for request ack
  482. resp, err := wc.Recv()
  483. if err != nil {
  484. return err
  485. } else if len(resp.Events) != 0 || !resp.Created {
  486. return fmt.Errorf("watcher: unexpected response (%+v)", resp)
  487. }
  488. // id may be different since new remote watcher; update map
  489. w.mu.Lock()
  490. delete(w.streams, ws.id)
  491. ws.id = resp.WatchId
  492. w.streams[ws.id] = ws
  493. w.mu.Unlock()
  494. // unpause serveStream
  495. ws.resumec <- ws.lastRev
  496. }
  497. return nil
  498. }
  499. // toPB converts an internal watch request structure to its protobuf messagefunc (wr *watchRequest)
  500. func (wr *watchRequest) toPB() *pb.WatchRequest {
  501. req := &pb.WatchCreateRequest{
  502. StartRevision: wr.rev,
  503. Key: []byte(wr.key),
  504. RangeEnd: []byte(wr.end),
  505. ProgressNotify: wr.progressNotify,
  506. }
  507. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  508. return &pb.WatchRequest{RequestUnion: cr}
  509. }