kvstore_txn.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mvcc
  15. import (
  16. "github.com/coreos/etcd/lease"
  17. "github.com/coreos/etcd/mvcc/backend"
  18. "github.com/coreos/etcd/mvcc/mvccpb"
  19. "go.uber.org/zap"
  20. )
  21. type storeTxnRead struct {
  22. s *store
  23. tx backend.ReadTx
  24. firstRev int64
  25. rev int64
  26. }
  27. func (s *store) Read() TxnRead {
  28. s.mu.RLock()
  29. tx := s.b.ReadTx()
  30. s.revMu.RLock()
  31. tx.Lock()
  32. firstRev, rev := s.compactMainRev, s.currentRev
  33. s.revMu.RUnlock()
  34. return newMetricsTxnRead(&storeTxnRead{s, tx, firstRev, rev})
  35. }
  36. func (tr *storeTxnRead) FirstRev() int64 { return tr.firstRev }
  37. func (tr *storeTxnRead) Rev() int64 { return tr.rev }
  38. func (tr *storeTxnRead) Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error) {
  39. return tr.rangeKeys(key, end, tr.Rev(), ro)
  40. }
  41. func (tr *storeTxnRead) End() {
  42. tr.tx.Unlock()
  43. tr.s.mu.RUnlock()
  44. }
  45. type storeTxnWrite struct {
  46. storeTxnRead
  47. tx backend.BatchTx
  48. // beginRev is the revision where the txn begins; it will write to the next revision.
  49. beginRev int64
  50. changes []mvccpb.KeyValue
  51. }
  52. func (s *store) Write() TxnWrite {
  53. s.mu.RLock()
  54. tx := s.b.BatchTx()
  55. tx.Lock()
  56. tw := &storeTxnWrite{
  57. storeTxnRead: storeTxnRead{s, tx, 0, 0},
  58. tx: tx,
  59. beginRev: s.currentRev,
  60. changes: make([]mvccpb.KeyValue, 0, 4),
  61. }
  62. return newMetricsTxnWrite(tw)
  63. }
  64. func (tw *storeTxnWrite) Rev() int64 { return tw.beginRev }
  65. func (tw *storeTxnWrite) Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error) {
  66. rev := tw.beginRev
  67. if len(tw.changes) > 0 {
  68. rev++
  69. }
  70. return tw.rangeKeys(key, end, rev, ro)
  71. }
  72. func (tw *storeTxnWrite) DeleteRange(key, end []byte) (int64, int64) {
  73. if n := tw.deleteRange(key, end); n != 0 || len(tw.changes) > 0 {
  74. return n, tw.beginRev + 1
  75. }
  76. return 0, tw.beginRev
  77. }
  78. func (tw *storeTxnWrite) Put(key, value []byte, lease lease.LeaseID) int64 {
  79. tw.put(key, value, lease)
  80. return tw.beginRev + 1
  81. }
  82. func (tw *storeTxnWrite) End() {
  83. // only update index if the txn modifies the mvcc state.
  84. if len(tw.changes) != 0 {
  85. tw.s.saveIndex(tw.tx)
  86. // hold revMu lock to prevent new read txns from opening until writeback.
  87. tw.s.revMu.Lock()
  88. tw.s.currentRev++
  89. }
  90. tw.tx.Unlock()
  91. if len(tw.changes) != 0 {
  92. tw.s.revMu.Unlock()
  93. }
  94. tw.s.mu.RUnlock()
  95. }
  96. func (tr *storeTxnRead) rangeKeys(key, end []byte, curRev int64, ro RangeOptions) (*RangeResult, error) {
  97. rev := ro.Rev
  98. if rev > curRev {
  99. return &RangeResult{KVs: nil, Count: -1, Rev: curRev}, ErrFutureRev
  100. }
  101. if rev <= 0 {
  102. rev = curRev
  103. }
  104. if rev < tr.s.compactMainRev {
  105. return &RangeResult{KVs: nil, Count: -1, Rev: 0}, ErrCompacted
  106. }
  107. revpairs := tr.s.kvindex.Revisions(key, end, rev)
  108. if len(revpairs) == 0 {
  109. return &RangeResult{KVs: nil, Count: 0, Rev: curRev}, nil
  110. }
  111. if ro.Count {
  112. return &RangeResult{KVs: nil, Count: len(revpairs), Rev: curRev}, nil
  113. }
  114. limit := int(ro.Limit)
  115. if limit <= 0 || limit > len(revpairs) {
  116. limit = len(revpairs)
  117. }
  118. kvs := make([]mvccpb.KeyValue, limit)
  119. revBytes := newRevBytes()
  120. for i, revpair := range revpairs[:len(kvs)] {
  121. revToBytes(revpair, revBytes)
  122. _, vs := tr.tx.UnsafeRange(keyBucketName, revBytes, nil, 0)
  123. if len(vs) != 1 {
  124. if tr.s.lg != nil {
  125. tr.s.lg.Fatal(
  126. "range failed to find revision pair",
  127. zap.Int64("revision-main", revpair.main),
  128. zap.Int64("revision-sub", revpair.sub),
  129. )
  130. } else {
  131. plog.Fatalf("range cannot find rev (%d,%d)", revpair.main, revpair.sub)
  132. }
  133. }
  134. if err := kvs[i].Unmarshal(vs[0]); err != nil {
  135. if tr.s.lg != nil {
  136. tr.s.lg.Fatal(
  137. "failed to unmarshal mvccpb.KeyValue",
  138. zap.Error(err),
  139. )
  140. } else {
  141. plog.Fatalf("cannot unmarshal event: %v", err)
  142. }
  143. }
  144. }
  145. return &RangeResult{KVs: kvs, Count: len(revpairs), Rev: curRev}, nil
  146. }
  147. func (tw *storeTxnWrite) put(key, value []byte, leaseID lease.LeaseID) {
  148. rev := tw.beginRev + 1
  149. c := rev
  150. oldLease := lease.NoLease
  151. // if the key exists before, use its previous created and
  152. // get its previous leaseID
  153. _, created, ver, err := tw.s.kvindex.Get(key, rev)
  154. if err == nil {
  155. c = created.main
  156. oldLease = tw.s.le.GetLease(lease.LeaseItem{Key: string(key)})
  157. }
  158. ibytes := newRevBytes()
  159. idxRev := revision{main: rev, sub: int64(len(tw.changes))}
  160. revToBytes(idxRev, ibytes)
  161. ver = ver + 1
  162. kv := mvccpb.KeyValue{
  163. Key: key,
  164. Value: value,
  165. CreateRevision: c,
  166. ModRevision: rev,
  167. Version: ver,
  168. Lease: int64(leaseID),
  169. }
  170. d, err := kv.Marshal()
  171. if err != nil {
  172. if tw.storeTxnRead.s.lg != nil {
  173. tw.storeTxnRead.s.lg.Fatal(
  174. "failed to marshal mvccpb.KeyValue",
  175. zap.Error(err),
  176. )
  177. } else {
  178. plog.Fatalf("cannot marshal event: %v", err)
  179. }
  180. }
  181. tw.tx.UnsafeSeqPut(keyBucketName, ibytes, d)
  182. tw.s.kvindex.Put(key, idxRev)
  183. tw.changes = append(tw.changes, kv)
  184. if oldLease != lease.NoLease {
  185. if tw.s.le == nil {
  186. panic("no lessor to detach lease")
  187. }
  188. err = tw.s.le.Detach(oldLease, []lease.LeaseItem{{Key: string(key)}})
  189. if err != nil {
  190. if tw.storeTxnRead.s.lg != nil {
  191. tw.storeTxnRead.s.lg.Fatal(
  192. "failed to detach old lease from a key",
  193. zap.Error(err),
  194. )
  195. } else {
  196. plog.Errorf("unexpected error from lease detach: %v", err)
  197. }
  198. }
  199. }
  200. if leaseID != lease.NoLease {
  201. if tw.s.le == nil {
  202. panic("no lessor to attach lease")
  203. }
  204. err = tw.s.le.Attach(leaseID, []lease.LeaseItem{{Key: string(key)}})
  205. if err != nil {
  206. panic("unexpected error from lease Attach")
  207. }
  208. }
  209. }
  210. func (tw *storeTxnWrite) deleteRange(key, end []byte) int64 {
  211. rrev := tw.beginRev
  212. if len(tw.changes) > 0 {
  213. rrev += 1
  214. }
  215. keys, _ := tw.s.kvindex.Range(key, end, rrev)
  216. if len(keys) == 0 {
  217. return 0
  218. }
  219. for _, key := range keys {
  220. tw.delete(key)
  221. }
  222. return int64(len(keys))
  223. }
  224. func (tw *storeTxnWrite) delete(key []byte) {
  225. ibytes := newRevBytes()
  226. idxRev := revision{main: tw.beginRev + 1, sub: int64(len(tw.changes))}
  227. revToBytes(idxRev, ibytes)
  228. if tw.storeTxnRead.s != nil && tw.storeTxnRead.s.lg != nil {
  229. ibytes = appendMarkTombstone(tw.storeTxnRead.s.lg, ibytes)
  230. } else {
  231. // TODO: remove this in v3.5
  232. ibytes = appendMarkTombstone(nil, ibytes)
  233. }
  234. kv := mvccpb.KeyValue{Key: key}
  235. d, err := kv.Marshal()
  236. if err != nil {
  237. if tw.storeTxnRead.s.lg != nil {
  238. tw.storeTxnRead.s.lg.Fatal(
  239. "failed to marshal mvccpb.KeyValue",
  240. zap.Error(err),
  241. )
  242. } else {
  243. plog.Fatalf("cannot marshal event: %v", err)
  244. }
  245. }
  246. tw.tx.UnsafeSeqPut(keyBucketName, ibytes, d)
  247. err = tw.s.kvindex.Tombstone(key, idxRev)
  248. if err != nil {
  249. if tw.storeTxnRead.s.lg != nil {
  250. tw.storeTxnRead.s.lg.Fatal(
  251. "failed to tombstone an existing key",
  252. zap.String("key", string(key)),
  253. zap.Error(err),
  254. )
  255. } else {
  256. plog.Fatalf("cannot tombstone an existing key (%s): %v", string(key), err)
  257. }
  258. }
  259. tw.changes = append(tw.changes, kv)
  260. item := lease.LeaseItem{Key: string(key)}
  261. leaseID := tw.s.le.GetLease(item)
  262. if leaseID != lease.NoLease {
  263. err = tw.s.le.Detach(leaseID, []lease.LeaseItem{item})
  264. if err != nil {
  265. if tw.storeTxnRead.s.lg != nil {
  266. tw.storeTxnRead.s.lg.Fatal(
  267. "failed to detach old lease from a key",
  268. zap.Error(err),
  269. )
  270. } else {
  271. plog.Errorf("cannot detach %v", err)
  272. }
  273. }
  274. }
  275. }
  276. func (tw *storeTxnWrite) Changes() []mvccpb.KeyValue { return tw.changes }