watchable_store.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package storage
  15. import (
  16. "errors"
  17. "log"
  18. "sync"
  19. "time"
  20. "github.com/coreos/etcd/storage/storagepb"
  21. )
  22. // ReachEnd is the error returned by Watcher.Err when watcher reaches its end revision and
  23. // no more event is available.
  24. var ExceedEnd = errors.New("storage: watcher reaches end revision")
  25. type watchableStore struct {
  26. mu sync.Mutex
  27. KV
  28. // contains all unsynced watchers that needs to sync events that have happened
  29. // TODO: use map to reduce cancel cost
  30. unsynced []*watcher
  31. // contains all synced watchers that are tracking the events that will happen
  32. // The key of the map is the key that the watcher is watching on.
  33. synced map[string][]*watcher
  34. // contains all synced watchers that have an end revision
  35. // The key of the map is the end revision of the watcher.
  36. endm map[int64][]*watcher
  37. tx *ongoingTx
  38. stopc chan struct{}
  39. wg sync.WaitGroup
  40. }
  41. func newWatchableStore(path string) *watchableStore {
  42. s := &watchableStore{
  43. KV: newStore(path),
  44. synced: make(map[string][]*watcher),
  45. endm: make(map[int64][]*watcher),
  46. stopc: make(chan struct{}),
  47. }
  48. s.wg.Add(1)
  49. go s.syncWatchersLoop()
  50. return s
  51. }
  52. func (s *watchableStore) Put(key, value []byte) (rev int64) {
  53. s.mu.Lock()
  54. defer s.mu.Unlock()
  55. rev = s.KV.Put(key, value)
  56. // TODO: avoid this range
  57. kvs, _, err := s.KV.Range(key, nil, 0, rev)
  58. if err != nil {
  59. log.Panicf("unexpected range error (%v)", err)
  60. }
  61. s.handle(rev, storagepb.Event{
  62. Type: storagepb.PUT,
  63. Kv: &kvs[0],
  64. })
  65. return rev
  66. }
  67. func (s *watchableStore) DeleteRange(key, end []byte) (n, rev int64) {
  68. s.mu.Lock()
  69. defer s.mu.Unlock()
  70. // TODO: avoid this range
  71. kvs, _, err := s.KV.Range(key, end, 0, 0)
  72. if err != nil {
  73. log.Panicf("unexpected range error (%v)", err)
  74. }
  75. n, rev = s.KV.DeleteRange(key, end)
  76. for _, kv := range kvs {
  77. s.handle(rev, storagepb.Event{
  78. Type: storagepb.DELETE,
  79. Kv: &storagepb.KeyValue{
  80. Key: kv.Key,
  81. },
  82. })
  83. }
  84. return n, rev
  85. }
  86. func (s *watchableStore) TxnBegin() int64 {
  87. s.mu.Lock()
  88. s.tx = newOngoingTx()
  89. return s.KV.TxnBegin()
  90. }
  91. func (s *watchableStore) TxnPut(txnID int64, key, value []byte) (rev int64, err error) {
  92. rev, err = s.KV.TxnPut(txnID, key, value)
  93. if err == nil {
  94. s.tx.put(string(key))
  95. }
  96. return rev, err
  97. }
  98. func (s *watchableStore) TxnDeleteRange(txnID int64, key, end []byte) (n, rev int64, err error) {
  99. kvs, _, err := s.KV.TxnRange(txnID, key, end, 0, 0)
  100. if err != nil {
  101. log.Panicf("unexpected range error (%v)", err)
  102. }
  103. n, rev, err = s.KV.TxnDeleteRange(txnID, key, end)
  104. if err == nil {
  105. for _, kv := range kvs {
  106. s.tx.del(string(kv.Key))
  107. }
  108. }
  109. return n, rev, err
  110. }
  111. func (s *watchableStore) TxnEnd(txnID int64) error {
  112. err := s.KV.TxnEnd(txnID)
  113. if err != nil {
  114. return err
  115. }
  116. _, rev, _ := s.KV.Range(nil, nil, 0, 0)
  117. for k := range s.tx.putm {
  118. kvs, _, err := s.KV.Range([]byte(k), nil, 0, 0)
  119. if err != nil {
  120. log.Panicf("unexpected range error (%v)", err)
  121. }
  122. s.handle(rev, storagepb.Event{
  123. Type: storagepb.PUT,
  124. Kv: &kvs[0],
  125. })
  126. }
  127. for k := range s.tx.delm {
  128. s.handle(rev, storagepb.Event{
  129. Type: storagepb.DELETE,
  130. Kv: &storagepb.KeyValue{
  131. Key: []byte(k),
  132. },
  133. })
  134. }
  135. s.mu.Unlock()
  136. return nil
  137. }
  138. func (s *watchableStore) Close() error {
  139. close(s.stopc)
  140. s.wg.Wait()
  141. return s.KV.Close()
  142. }
  143. func (s *watchableStore) Watcher(key []byte, prefix bool, startRev, endRev int64) (Watcher, CancelFunc) {
  144. s.mu.Lock()
  145. defer s.mu.Unlock()
  146. wa := newWatcher(key, prefix, startRev, endRev)
  147. k := string(key)
  148. if startRev == 0 {
  149. s.synced[k] = append(s.synced[k], wa)
  150. if endRev != 0 {
  151. s.endm[endRev] = append(s.endm[endRev], wa)
  152. }
  153. } else {
  154. s.unsynced = append(s.unsynced, wa)
  155. }
  156. cancel := CancelFunc(func() {
  157. s.mu.Lock()
  158. defer s.mu.Unlock()
  159. wa.stopWithError(ErrCanceled)
  160. // remove global references of the watcher
  161. for i, w := range s.unsynced {
  162. if w == wa {
  163. s.unsynced = append(s.unsynced[:i], s.unsynced[i+1:]...)
  164. return
  165. }
  166. }
  167. for i, w := range s.synced[k] {
  168. if w == wa {
  169. s.synced[k] = append(s.synced[k][:i], s.synced[k][i+1:]...)
  170. }
  171. }
  172. if wa.end != 0 {
  173. for i, w := range s.endm[wa.end] {
  174. if w == wa {
  175. s.endm[wa.end] = append(s.endm[wa.end][:i], s.endm[wa.end][i+1:]...)
  176. }
  177. }
  178. }
  179. // If we cannot find it, it should have finished watch.
  180. })
  181. return wa, cancel
  182. }
  183. // keepSyncWatchers syncs the watchers in the unsyncd map every 100ms.
  184. func (s *watchableStore) syncWatchersLoop() {
  185. defer s.wg.Done()
  186. for {
  187. s.mu.Lock()
  188. s.syncWatchers()
  189. s.mu.Unlock()
  190. select {
  191. case <-time.After(100 * time.Millisecond):
  192. case <-s.stopc:
  193. return
  194. }
  195. }
  196. }
  197. // syncWatchers syncs the watchers in the unsyncd map.
  198. func (s *watchableStore) syncWatchers() {
  199. _, curRev, _ := s.KV.Range(nil, nil, 0, 0)
  200. // filtering without allocating
  201. // https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
  202. nws := s.unsynced[:0]
  203. for _, w := range s.unsynced {
  204. var end []byte
  205. if w.prefix {
  206. end = make([]byte, len(w.key))
  207. copy(end, w.key)
  208. end[len(w.key)-1]++
  209. }
  210. limit := cap(w.ch) - len(w.ch)
  211. // the channel is full, try it in the next round
  212. if limit == 0 {
  213. nws = append(nws, w)
  214. continue
  215. }
  216. evs, nextRev, err := s.KV.(*store).RangeEvents(w.key, end, int64(limit), w.cur, w.end)
  217. if err != nil {
  218. w.stopWithError(err)
  219. continue
  220. }
  221. // push events to the channel
  222. for _, ev := range evs {
  223. w.ch <- ev
  224. }
  225. // stop watcher if it reaches the end
  226. if w.end > 0 && nextRev >= w.end {
  227. w.stopWithError(ExceedEnd)
  228. continue
  229. }
  230. // switch to tracking future events if needed
  231. if nextRev > curRev {
  232. s.synced[string(w.key)] = append(s.synced[string(w.key)], w)
  233. if w.end != 0 {
  234. s.endm[w.end] = append(s.endm[w.end], w)
  235. }
  236. continue
  237. }
  238. // put it back to try it in the next round
  239. w.cur = nextRev
  240. nws = append(nws, w)
  241. }
  242. s.unsynced = nws
  243. }
  244. // handle handles the change of the happening event on all watchers.
  245. func (s *watchableStore) handle(rev int64, ev storagepb.Event) {
  246. s.notify(rev, ev)
  247. s.stopWatchers(rev)
  248. }
  249. // notify notifies the fact that given event at the given rev just happened to
  250. // watchers that watch on the key of the event.
  251. func (s *watchableStore) notify(rev int64, ev storagepb.Event) {
  252. // check all prefixes of the key to notify all corresponded watchers
  253. for i := 0; i <= len(ev.Kv.Key); i++ {
  254. ws := s.synced[string(ev.Kv.Key[:i])]
  255. nws := ws[:0]
  256. for _, w := range ws {
  257. // the watcher needs to be notified when either it watches prefix or
  258. // the key is exactly matched.
  259. if !w.prefix && i != len(ev.Kv.Key) {
  260. continue
  261. }
  262. select {
  263. case w.ch <- ev:
  264. nws = append(nws, w)
  265. default:
  266. // put it back to unsynced place
  267. if w.end != 0 {
  268. for i, ew := range s.endm[w.end] {
  269. if ew == w {
  270. s.endm[w.end] = append(s.endm[w.end][:i], s.endm[w.end][i+1:]...)
  271. }
  272. }
  273. }
  274. w.cur = rev
  275. s.unsynced = append(s.unsynced, w)
  276. }
  277. }
  278. s.synced[string(ev.Kv.Key[:i])] = nws
  279. }
  280. }
  281. // stopWatchers stops watchers with limit equal to rev.
  282. func (s *watchableStore) stopWatchers(rev int64) {
  283. for i, wa := range s.endm[rev+1] {
  284. k := string(wa.key)
  285. for _, w := range s.synced[k] {
  286. if w == wa {
  287. s.synced[k] = append(s.synced[k][:i], s.synced[k][i+1:]...)
  288. }
  289. }
  290. wa.stopWithError(ExceedEnd)
  291. }
  292. delete(s.endm, rev+1)
  293. }
  294. type ongoingTx struct {
  295. // keys put/deleted in the ongoing txn
  296. putm map[string]bool
  297. delm map[string]bool
  298. }
  299. func newOngoingTx() *ongoingTx {
  300. return &ongoingTx{
  301. putm: make(map[string]bool),
  302. delm: make(map[string]bool),
  303. }
  304. }
  305. func (tx *ongoingTx) put(k string) {
  306. tx.putm[k] = true
  307. tx.delm[k] = false
  308. }
  309. func (tx *ongoingTx) del(k string) {
  310. tx.delm[k] = true
  311. tx.putm[k] = false
  312. }