watchable_store.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package storage
  15. import (
  16. "log"
  17. "sync"
  18. "time"
  19. "github.com/coreos/etcd/storage/storagepb"
  20. )
  21. type watchableStore struct {
  22. mu sync.Mutex
  23. *store
  24. // contains all unsynced watchers that needs to sync events that have happened
  25. // TODO: use map to reduce cancel cost
  26. unsynced []*watcher
  27. // contains all synced watchers that are tracking the events that will happen
  28. // The key of the map is the key that the watcher is watching on.
  29. synced map[string][]*watcher
  30. tx *ongoingTx
  31. stopc chan struct{}
  32. wg sync.WaitGroup
  33. }
  34. func newWatchableStore(path string) *watchableStore {
  35. s := &watchableStore{
  36. store: newStore(path),
  37. synced: make(map[string][]*watcher),
  38. stopc: make(chan struct{}),
  39. }
  40. s.wg.Add(1)
  41. go s.syncWatchersLoop()
  42. return s
  43. }
  44. func (s *watchableStore) Put(key, value []byte) (rev int64) {
  45. s.mu.Lock()
  46. defer s.mu.Unlock()
  47. rev = s.store.Put(key, value)
  48. // TODO: avoid this range
  49. kvs, _, err := s.store.Range(key, nil, 0, rev)
  50. if err != nil {
  51. log.Panicf("unexpected range error (%v)", err)
  52. }
  53. s.handle(rev, storagepb.Event{
  54. Type: storagepb.PUT,
  55. Kv: &kvs[0],
  56. })
  57. return rev
  58. }
  59. func (s *watchableStore) DeleteRange(key, end []byte) (n, rev int64) {
  60. s.mu.Lock()
  61. defer s.mu.Unlock()
  62. // TODO: avoid this range
  63. kvs, _, err := s.store.Range(key, end, 0, 0)
  64. if err != nil {
  65. log.Panicf("unexpected range error (%v)", err)
  66. }
  67. n, rev = s.store.DeleteRange(key, end)
  68. for _, kv := range kvs {
  69. s.handle(rev, storagepb.Event{
  70. Type: storagepb.DELETE,
  71. Kv: &storagepb.KeyValue{
  72. Key: kv.Key,
  73. },
  74. })
  75. }
  76. return n, rev
  77. }
  78. func (s *watchableStore) TxnBegin() int64 {
  79. s.mu.Lock()
  80. s.tx = newOngoingTx()
  81. return s.store.TxnBegin()
  82. }
  83. func (s *watchableStore) TxnPut(txnID int64, key, value []byte) (rev int64, err error) {
  84. rev, err = s.store.TxnPut(txnID, key, value)
  85. if err == nil {
  86. s.tx.put(string(key))
  87. }
  88. return rev, err
  89. }
  90. func (s *watchableStore) TxnDeleteRange(txnID int64, key, end []byte) (n, rev int64, err error) {
  91. kvs, _, err := s.store.TxnRange(txnID, key, end, 0, 0)
  92. if err != nil {
  93. log.Panicf("unexpected range error (%v)", err)
  94. }
  95. n, rev, err = s.store.TxnDeleteRange(txnID, key, end)
  96. if err == nil {
  97. for _, kv := range kvs {
  98. s.tx.del(string(kv.Key))
  99. }
  100. }
  101. return n, rev, err
  102. }
  103. func (s *watchableStore) TxnEnd(txnID int64) error {
  104. err := s.store.TxnEnd(txnID)
  105. if err != nil {
  106. return err
  107. }
  108. _, rev, _ := s.store.Range(nil, nil, 0, 0)
  109. for k := range s.tx.putm {
  110. kvs, _, err := s.store.Range([]byte(k), nil, 0, 0)
  111. if err != nil {
  112. log.Panicf("unexpected range error (%v)", err)
  113. }
  114. s.handle(rev, storagepb.Event{
  115. Type: storagepb.PUT,
  116. Kv: &kvs[0],
  117. })
  118. }
  119. for k := range s.tx.delm {
  120. s.handle(rev, storagepb.Event{
  121. Type: storagepb.DELETE,
  122. Kv: &storagepb.KeyValue{
  123. Key: []byte(k),
  124. },
  125. })
  126. }
  127. s.mu.Unlock()
  128. return nil
  129. }
  130. func (s *watchableStore) Close() error {
  131. close(s.stopc)
  132. s.wg.Wait()
  133. return s.store.Close()
  134. }
  135. func (s *watchableStore) Watcher(key []byte, prefix bool, startRev int64) (Watcher, CancelFunc) {
  136. s.mu.Lock()
  137. defer s.mu.Unlock()
  138. wa := newWatcher(key, prefix, startRev)
  139. k := string(key)
  140. if startRev == 0 {
  141. s.synced[k] = append(s.synced[k], wa)
  142. } else {
  143. slowWatchersGauge.Inc()
  144. s.unsynced = append(s.unsynced, wa)
  145. }
  146. watchersGauge.Inc()
  147. cancel := CancelFunc(func() {
  148. s.mu.Lock()
  149. defer s.mu.Unlock()
  150. wa.stopWithError(ErrCanceled)
  151. // remove global references of the watcher
  152. for i, w := range s.unsynced {
  153. if w == wa {
  154. s.unsynced = append(s.unsynced[:i], s.unsynced[i+1:]...)
  155. slowWatchersGauge.Dec()
  156. watchersGauge.Dec()
  157. return
  158. }
  159. }
  160. for i, w := range s.synced[k] {
  161. if w == wa {
  162. s.synced[k] = append(s.synced[k][:i], s.synced[k][i+1:]...)
  163. watchersGauge.Dec()
  164. }
  165. }
  166. // If we cannot find it, it should have finished watch.
  167. })
  168. return wa, cancel
  169. }
  170. // keepSyncWatchers syncs the watchers in the unsyncd map every 100ms.
  171. func (s *watchableStore) syncWatchersLoop() {
  172. defer s.wg.Done()
  173. for {
  174. s.mu.Lock()
  175. s.syncWatchers()
  176. s.mu.Unlock()
  177. select {
  178. case <-time.After(100 * time.Millisecond):
  179. case <-s.stopc:
  180. return
  181. }
  182. }
  183. }
  184. // syncWatchers syncs the watchers in the unsyncd map.
  185. func (s *watchableStore) syncWatchers() {
  186. _, curRev, _ := s.store.Range(nil, nil, 0, 0)
  187. // filtering without allocating
  188. // https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
  189. nws := s.unsynced[:0]
  190. for _, w := range s.unsynced {
  191. var end []byte
  192. if w.prefix {
  193. end = make([]byte, len(w.key))
  194. copy(end, w.key)
  195. end[len(w.key)-1]++
  196. }
  197. limit := cap(w.ch) - len(w.ch)
  198. // the channel is full, try it in the next round
  199. if limit == 0 {
  200. nws = append(nws, w)
  201. continue
  202. }
  203. evs, nextRev, err := s.store.RangeEvents(w.key, end, int64(limit), w.cur)
  204. if err != nil {
  205. w.stopWithError(err)
  206. continue
  207. }
  208. // push events to the channel
  209. for _, ev := range evs {
  210. w.ch <- ev
  211. pendingEventsGauge.Inc()
  212. }
  213. // switch to tracking future events if needed
  214. if nextRev > curRev {
  215. s.synced[string(w.key)] = append(s.synced[string(w.key)], w)
  216. continue
  217. }
  218. // put it back to try it in the next round
  219. w.cur = nextRev
  220. nws = append(nws, w)
  221. }
  222. s.unsynced = nws
  223. slowWatchersGauge.Set(float64(len(s.unsynced)))
  224. }
  225. // handle handles the change of the happening event on all watchers.
  226. func (s *watchableStore) handle(rev int64, ev storagepb.Event) {
  227. s.notify(rev, ev)
  228. }
  229. // notify notifies the fact that given event at the given rev just happened to
  230. // watchers that watch on the key of the event.
  231. func (s *watchableStore) notify(rev int64, ev storagepb.Event) {
  232. // check all prefixes of the key to notify all corresponded watchers
  233. for i := 0; i <= len(ev.Kv.Key); i++ {
  234. ws := s.synced[string(ev.Kv.Key[:i])]
  235. nws := ws[:0]
  236. for _, w := range ws {
  237. // the watcher needs to be notified when either it watches prefix or
  238. // the key is exactly matched.
  239. if !w.prefix && i != len(ev.Kv.Key) {
  240. continue
  241. }
  242. select {
  243. case w.ch <- ev:
  244. pendingEventsGauge.Inc()
  245. nws = append(nws, w)
  246. default:
  247. w.cur = rev
  248. s.unsynced = append(s.unsynced, w)
  249. slowWatchersGauge.Inc()
  250. }
  251. }
  252. s.synced[string(ev.Kv.Key[:i])] = nws
  253. }
  254. }
  255. type ongoingTx struct {
  256. // keys put/deleted in the ongoing txn
  257. putm map[string]bool
  258. delm map[string]bool
  259. }
  260. func newOngoingTx() *ongoingTx {
  261. return &ongoingTx{
  262. putm: make(map[string]bool),
  263. delm: make(map[string]bool),
  264. }
  265. }
  266. func (tx *ongoingTx) put(k string) {
  267. tx.putm[k] = true
  268. tx.delm[k] = false
  269. }
  270. func (tx *ongoingTx) del(k string) {
  271. tx.delm[k] = true
  272. tx.putm[k] = false
  273. }