watchable_store.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package storage
  15. import (
  16. "errors"
  17. "log"
  18. "sync"
  19. "time"
  20. "github.com/coreos/etcd/storage/storagepb"
  21. )
  22. // ReachEnd is the error returned by Watcher.Err when watcher reaches its end revision and
  23. // no more event is available.
  24. var ExceedEnd = errors.New("storage: watcher reaches end revision")
  25. type watchableStore struct {
  26. mu sync.Mutex
  27. KV
  28. // contains all unsynced watchers that needs to sync events that have happened
  29. // TODO: use map to reduce cancel cost
  30. unsynced []*watcher
  31. // contains all synced watchers that are tracking the events that will happen
  32. // The key of the map is the key that the watcher is watching on.
  33. synced map[string][]*watcher
  34. // contains all synced watchers that have an end revision
  35. // The key of the map is the end revision of the watcher.
  36. endm map[int64][]*watcher
  37. tx *ongoingTx
  38. stopc chan struct{}
  39. wg sync.WaitGroup
  40. }
  41. func newWatchableStore(path string) *watchableStore {
  42. s := &watchableStore{
  43. KV: newStore(path),
  44. synced: make(map[string][]*watcher),
  45. endm: make(map[int64][]*watcher),
  46. stopc: make(chan struct{}),
  47. }
  48. s.wg.Add(1)
  49. go s.syncWatchersLoop()
  50. return s
  51. }
  52. func (s *watchableStore) Put(key, value []byte) (rev int64) {
  53. s.mu.Lock()
  54. defer s.mu.Unlock()
  55. rev = s.KV.Put(key, value)
  56. // TODO: avoid this range
  57. kvs, _, err := s.KV.Range(key, nil, 0, rev)
  58. if err != nil {
  59. log.Panicf("unexpected range error (%v)", err)
  60. }
  61. s.handle(rev, storagepb.Event{
  62. Type: storagepb.PUT,
  63. Kv: &kvs[0],
  64. })
  65. return rev
  66. }
  67. func (s *watchableStore) DeleteRange(key, end []byte) (n, rev int64) {
  68. s.mu.Lock()
  69. defer s.mu.Unlock()
  70. // TODO: avoid this range
  71. kvs, _, err := s.KV.Range(key, end, 0, 0)
  72. if err != nil {
  73. log.Panicf("unexpected range error (%v)", err)
  74. }
  75. n, rev = s.KV.DeleteRange(key, end)
  76. for _, kv := range kvs {
  77. s.handle(rev, storagepb.Event{
  78. Type: storagepb.DELETE,
  79. Kv: &storagepb.KeyValue{
  80. Key: kv.Key,
  81. },
  82. })
  83. }
  84. return n, rev
  85. }
  86. func (s *watchableStore) TxnBegin() int64 {
  87. s.mu.Lock()
  88. s.tx = newOngoingTx()
  89. return s.KV.TxnBegin()
  90. }
  91. func (s *watchableStore) TxnPut(txnID int64, key, value []byte) (rev int64, err error) {
  92. rev, err = s.KV.TxnPut(txnID, key, value)
  93. if err == nil {
  94. s.tx.put(string(key))
  95. }
  96. return rev, err
  97. }
  98. func (s *watchableStore) TxnDeleteRange(txnID int64, key, end []byte) (n, rev int64, err error) {
  99. kvs, _, err := s.KV.TxnRange(txnID, key, end, 0, 0)
  100. if err != nil {
  101. log.Panicf("unexpected range error (%v)", err)
  102. }
  103. n, rev, err = s.KV.TxnDeleteRange(txnID, key, end)
  104. if err == nil {
  105. for _, kv := range kvs {
  106. s.tx.del(string(kv.Key))
  107. }
  108. }
  109. return n, rev, err
  110. }
  111. func (s *watchableStore) TxnEnd(txnID int64) error {
  112. err := s.KV.TxnEnd(txnID)
  113. if err != nil {
  114. return err
  115. }
  116. _, rev, _ := s.KV.Range(nil, nil, 0, 0)
  117. for k := range s.tx.putm {
  118. kvs, _, err := s.KV.Range([]byte(k), nil, 0, 0)
  119. if err != nil {
  120. log.Panicf("unexpected range error (%v)", err)
  121. }
  122. s.handle(rev, storagepb.Event{
  123. Type: storagepb.PUT,
  124. Kv: &kvs[0],
  125. })
  126. }
  127. for k := range s.tx.delm {
  128. s.handle(rev, storagepb.Event{
  129. Type: storagepb.DELETE,
  130. Kv: &storagepb.KeyValue{
  131. Key: []byte(k),
  132. },
  133. })
  134. }
  135. s.mu.Unlock()
  136. return nil
  137. }
  138. func (s *watchableStore) Close() error {
  139. close(s.stopc)
  140. s.wg.Wait()
  141. return s.KV.Close()
  142. }
  143. func (s *watchableStore) Watcher(key []byte, prefix bool, startRev, endRev int64) (Watcher, CancelFunc) {
  144. s.mu.Lock()
  145. defer s.mu.Unlock()
  146. wa := newWatcher(key, prefix, startRev, endRev)
  147. k := string(key)
  148. if startRev == 0 {
  149. s.synced[k] = append(s.synced[k], wa)
  150. if endRev != 0 {
  151. s.endm[endRev] = append(s.endm[endRev], wa)
  152. }
  153. } else {
  154. slowWatchersGauge.Inc()
  155. s.unsynced = append(s.unsynced, wa)
  156. }
  157. watchersGauge.Inc()
  158. cancel := CancelFunc(func() {
  159. s.mu.Lock()
  160. defer s.mu.Unlock()
  161. wa.stopWithError(ErrCanceled)
  162. // remove global references of the watcher
  163. for i, w := range s.unsynced {
  164. if w == wa {
  165. s.unsynced = append(s.unsynced[:i], s.unsynced[i+1:]...)
  166. slowWatchersGauge.Dec()
  167. watchersGauge.Dec()
  168. return
  169. }
  170. }
  171. for i, w := range s.synced[k] {
  172. if w == wa {
  173. s.synced[k] = append(s.synced[k][:i], s.synced[k][i+1:]...)
  174. watchersGauge.Dec()
  175. }
  176. }
  177. if wa.end != 0 {
  178. for i, w := range s.endm[wa.end] {
  179. if w == wa {
  180. s.endm[wa.end] = append(s.endm[wa.end][:i], s.endm[wa.end][i+1:]...)
  181. }
  182. }
  183. }
  184. // If we cannot find it, it should have finished watch.
  185. })
  186. return wa, cancel
  187. }
  188. // keepSyncWatchers syncs the watchers in the unsyncd map every 100ms.
  189. func (s *watchableStore) syncWatchersLoop() {
  190. defer s.wg.Done()
  191. for {
  192. s.mu.Lock()
  193. s.syncWatchers()
  194. s.mu.Unlock()
  195. select {
  196. case <-time.After(100 * time.Millisecond):
  197. case <-s.stopc:
  198. return
  199. }
  200. }
  201. }
  202. // syncWatchers syncs the watchers in the unsyncd map.
  203. func (s *watchableStore) syncWatchers() {
  204. _, curRev, _ := s.KV.Range(nil, nil, 0, 0)
  205. // filtering without allocating
  206. // https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
  207. nws := s.unsynced[:0]
  208. for _, w := range s.unsynced {
  209. var end []byte
  210. if w.prefix {
  211. end = make([]byte, len(w.key))
  212. copy(end, w.key)
  213. end[len(w.key)-1]++
  214. }
  215. limit := cap(w.ch) - len(w.ch)
  216. // the channel is full, try it in the next round
  217. if limit == 0 {
  218. nws = append(nws, w)
  219. continue
  220. }
  221. evs, nextRev, err := s.KV.(*store).RangeEvents(w.key, end, int64(limit), w.cur, w.end)
  222. if err != nil {
  223. w.stopWithError(err)
  224. continue
  225. }
  226. // push events to the channel
  227. for _, ev := range evs {
  228. w.ch <- ev
  229. pendingEventsGauge.Inc()
  230. }
  231. // stop watcher if it reaches the end
  232. if w.end > 0 && nextRev >= w.end {
  233. w.stopWithError(ExceedEnd)
  234. continue
  235. }
  236. // switch to tracking future events if needed
  237. if nextRev > curRev {
  238. s.synced[string(w.key)] = append(s.synced[string(w.key)], w)
  239. if w.end != 0 {
  240. s.endm[w.end] = append(s.endm[w.end], w)
  241. }
  242. continue
  243. }
  244. // put it back to try it in the next round
  245. w.cur = nextRev
  246. nws = append(nws, w)
  247. }
  248. s.unsynced = nws
  249. slowWatchersGauge.Set(float64(len(s.unsynced)))
  250. }
  251. // handle handles the change of the happening event on all watchers.
  252. func (s *watchableStore) handle(rev int64, ev storagepb.Event) {
  253. s.notify(rev, ev)
  254. s.stopWatchers(rev)
  255. }
  256. // notify notifies the fact that given event at the given rev just happened to
  257. // watchers that watch on the key of the event.
  258. func (s *watchableStore) notify(rev int64, ev storagepb.Event) {
  259. // check all prefixes of the key to notify all corresponded watchers
  260. for i := 0; i <= len(ev.Kv.Key); i++ {
  261. ws := s.synced[string(ev.Kv.Key[:i])]
  262. nws := ws[:0]
  263. for _, w := range ws {
  264. // the watcher needs to be notified when either it watches prefix or
  265. // the key is exactly matched.
  266. if !w.prefix && i != len(ev.Kv.Key) {
  267. continue
  268. }
  269. select {
  270. case w.ch <- ev:
  271. pendingEventsGauge.Inc()
  272. nws = append(nws, w)
  273. default:
  274. // put it back to unsynced place
  275. if w.end != 0 {
  276. for i, ew := range s.endm[w.end] {
  277. if ew == w {
  278. s.endm[w.end] = append(s.endm[w.end][:i], s.endm[w.end][i+1:]...)
  279. }
  280. }
  281. }
  282. w.cur = rev
  283. s.unsynced = append(s.unsynced, w)
  284. slowWatchersGauge.Inc()
  285. }
  286. }
  287. s.synced[string(ev.Kv.Key[:i])] = nws
  288. }
  289. }
  290. // stopWatchers stops watchers with limit equal to rev.
  291. func (s *watchableStore) stopWatchers(rev int64) {
  292. for i, wa := range s.endm[rev+1] {
  293. k := string(wa.key)
  294. for _, w := range s.synced[k] {
  295. if w == wa {
  296. s.synced[k] = append(s.synced[k][:i], s.synced[k][i+1:]...)
  297. watchersGauge.Dec()
  298. }
  299. }
  300. wa.stopWithError(ExceedEnd)
  301. }
  302. delete(s.endm, rev+1)
  303. }
  304. type ongoingTx struct {
  305. // keys put/deleted in the ongoing txn
  306. putm map[string]bool
  307. delm map[string]bool
  308. }
  309. func newOngoingTx() *ongoingTx {
  310. return &ongoingTx{
  311. putm: make(map[string]bool),
  312. delm: make(map[string]bool),
  313. }
  314. }
  315. func (tx *ongoingTx) put(k string) {
  316. tx.putm[k] = true
  317. tx.delm[k] = false
  318. }
  319. func (tx *ongoingTx) del(k string) {
  320. tx.delm[k] = true
  321. tx.putm[k] = false
  322. }