batch_tx.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package backend
  15. import (
  16. "bytes"
  17. "fmt"
  18. "math"
  19. "sync"
  20. "sync/atomic"
  21. "time"
  22. "github.com/boltdb/bolt"
  23. )
  24. type BatchTx interface {
  25. ReadTx
  26. UnsafeCreateBucket(name []byte)
  27. UnsafePut(bucketName []byte, key []byte, value []byte)
  28. UnsafeSeqPut(bucketName []byte, key []byte, value []byte)
  29. UnsafeDelete(bucketName []byte, key []byte)
  30. // Commit commits a previous tx and begins a new writable one.
  31. Commit()
  32. // CommitAndStop commits the previous tx and does not create a new one.
  33. CommitAndStop()
  34. }
  35. type batchTx struct {
  36. sync.Mutex
  37. tx *bolt.Tx
  38. backend *backend
  39. pending int
  40. }
  41. var nopLock sync.Locker = &nopLocker{}
  42. type nopLocker struct{}
  43. func (*nopLocker) Lock() {}
  44. func (*nopLocker) Unlock() {}
  45. func (t *batchTx) UnsafeCreateBucket(name []byte) {
  46. _, err := t.tx.CreateBucket(name)
  47. if err != nil && err != bolt.ErrBucketExists {
  48. plog.Fatalf("cannot create bucket %s (%v)", name, err)
  49. }
  50. t.pending++
  51. }
  52. // UnsafePut must be called holding the lock on the tx.
  53. func (t *batchTx) UnsafePut(bucketName []byte, key []byte, value []byte) {
  54. t.unsafePut(bucketName, key, value, false)
  55. }
  56. // UnsafeSeqPut must be called holding the lock on the tx.
  57. func (t *batchTx) UnsafeSeqPut(bucketName []byte, key []byte, value []byte) {
  58. t.unsafePut(bucketName, key, value, true)
  59. }
  60. func (t *batchTx) unsafePut(bucketName []byte, key []byte, value []byte, seq bool) {
  61. bucket := t.tx.Bucket(bucketName)
  62. if bucket == nil {
  63. plog.Fatalf("bucket %s does not exist", bucketName)
  64. }
  65. if seq {
  66. // it is useful to increase fill percent when the workloads are mostly append-only.
  67. // this can delay the page split and reduce space usage.
  68. bucket.FillPercent = 0.9
  69. }
  70. if err := bucket.Put(key, value); err != nil {
  71. plog.Fatalf("cannot put key into bucket (%v)", err)
  72. }
  73. t.pending++
  74. }
  75. // UnsafeRange must be called holding the lock on the tx.
  76. func (t *batchTx) UnsafeRange(bucketName, key, endKey []byte, limit int64) ([][]byte, [][]byte) {
  77. // nop lock since a write txn should already hold a lock over t.tx
  78. k, v, err := unsafeRange(t.tx, bucketName, key, endKey, limit, nopLock)
  79. if err != nil {
  80. plog.Fatal(err)
  81. }
  82. return k, v
  83. }
  84. func unsafeRange(tx *bolt.Tx, bucketName, key, endKey []byte, limit int64, l sync.Locker) (keys [][]byte, vs [][]byte, err error) {
  85. l.Lock()
  86. bucket := tx.Bucket(bucketName)
  87. if bucket == nil {
  88. l.Unlock()
  89. return nil, nil, fmt.Errorf("bucket %s does not exist", bucketName)
  90. }
  91. if len(endKey) == 0 {
  92. v := bucket.Get(key)
  93. l.Unlock()
  94. if v != nil {
  95. return append(keys, key), append(vs, v), nil
  96. }
  97. return nil, nil, nil
  98. }
  99. c := bucket.Cursor()
  100. l.Unlock()
  101. if limit <= 0 {
  102. limit = math.MaxInt64
  103. }
  104. for ck, cv := c.Seek(key); ck != nil && bytes.Compare(ck, endKey) < 0; ck, cv = c.Next() {
  105. vs = append(vs, cv)
  106. keys = append(keys, ck)
  107. if limit == int64(len(keys)) {
  108. break
  109. }
  110. }
  111. return keys, vs, nil
  112. }
  113. // UnsafeDelete must be called holding the lock on the tx.
  114. func (t *batchTx) UnsafeDelete(bucketName []byte, key []byte) {
  115. bucket := t.tx.Bucket(bucketName)
  116. if bucket == nil {
  117. plog.Fatalf("bucket %s does not exist", bucketName)
  118. }
  119. err := bucket.Delete(key)
  120. if err != nil {
  121. plog.Fatalf("cannot delete key from bucket (%v)", err)
  122. }
  123. t.pending++
  124. }
  125. // UnsafeForEach must be called holding the lock on the tx.
  126. func (t *batchTx) UnsafeForEach(bucketName []byte, visitor func(k, v []byte) error) error {
  127. return unsafeForEach(t.tx, bucketName, visitor)
  128. }
  129. func unsafeForEach(tx *bolt.Tx, bucket []byte, visitor func(k, v []byte) error) error {
  130. if b := tx.Bucket(bucket); b != nil {
  131. return b.ForEach(visitor)
  132. }
  133. return nil
  134. }
  135. // Commit commits a previous tx and begins a new writable one.
  136. func (t *batchTx) Commit() {
  137. t.Lock()
  138. defer t.Unlock()
  139. t.commit(false)
  140. }
  141. // CommitAndStop commits the previous tx and does not create a new one.
  142. func (t *batchTx) CommitAndStop() {
  143. t.Lock()
  144. defer t.Unlock()
  145. t.commit(true)
  146. }
  147. func (t *batchTx) Unlock() {
  148. if t.pending >= t.backend.batchLimit {
  149. t.commit(false)
  150. }
  151. t.Mutex.Unlock()
  152. }
  153. func (t *batchTx) commit(stop bool) {
  154. // commit the last tx
  155. if t.tx != nil {
  156. if t.pending == 0 && !stop {
  157. t.backend.mu.RLock()
  158. defer t.backend.mu.RUnlock()
  159. // t.tx.DB()==nil if 'CommitAndStop' calls 'batchTx.commit(true)',
  160. // which initializes *bolt.Tx.db and *bolt.Tx.meta as nil; panics t.tx.Size().
  161. // Server must make sure 'batchTx.commit(false)' does not follow
  162. // 'batchTx.commit(true)' (e.g. stopping backend, and inflight Hash call).
  163. atomic.StoreInt64(&t.backend.size, t.tx.Size())
  164. return
  165. }
  166. start := time.Now()
  167. // gofail: var beforeCommit struct{}
  168. err := t.tx.Commit()
  169. // gofail: var afterCommit struct{}
  170. commitDurations.Observe(time.Since(start).Seconds())
  171. atomic.AddInt64(&t.backend.commits, 1)
  172. t.pending = 0
  173. if err != nil {
  174. plog.Fatalf("cannot commit tx (%s)", err)
  175. }
  176. }
  177. if !stop {
  178. t.tx = t.backend.begin(true)
  179. }
  180. }
  181. type batchTxBuffered struct {
  182. batchTx
  183. buf txWriteBuffer
  184. }
  185. func newBatchTxBuffered(backend *backend) *batchTxBuffered {
  186. tx := &batchTxBuffered{
  187. batchTx: batchTx{backend: backend},
  188. buf: txWriteBuffer{
  189. txBuffer: txBuffer{make(map[string]*bucketBuffer)},
  190. seq: true,
  191. },
  192. }
  193. tx.Commit()
  194. return tx
  195. }
  196. func (t *batchTxBuffered) Unlock() {
  197. if t.pending != 0 {
  198. t.backend.readTx.mu.Lock()
  199. t.buf.writeback(&t.backend.readTx.buf)
  200. t.backend.readTx.mu.Unlock()
  201. if t.pending >= t.backend.batchLimit {
  202. t.commit(false)
  203. }
  204. }
  205. t.batchTx.Unlock()
  206. }
  207. func (t *batchTxBuffered) Commit() {
  208. t.Lock()
  209. defer t.Unlock()
  210. t.commit(false)
  211. }
  212. func (t *batchTxBuffered) CommitAndStop() {
  213. t.Lock()
  214. defer t.Unlock()
  215. t.commit(true)
  216. }
  217. func (t *batchTxBuffered) commit(stop bool) {
  218. // all read txs must be closed to acquire boltdb commit rwlock
  219. t.backend.readTx.mu.Lock()
  220. defer t.backend.readTx.mu.Unlock()
  221. t.unsafeCommit(stop)
  222. }
  223. func (t *batchTxBuffered) unsafeCommit(stop bool) {
  224. if t.backend.readTx.tx != nil {
  225. if err := t.backend.readTx.tx.Rollback(); err != nil {
  226. plog.Fatalf("cannot rollback tx (%s)", err)
  227. }
  228. t.backend.readTx.buf.reset()
  229. t.backend.readTx.tx = nil
  230. }
  231. t.batchTx.commit(stop)
  232. if !stop {
  233. t.backend.readTx.tx = t.backend.begin(false)
  234. }
  235. }
  236. func (t *batchTxBuffered) UnsafePut(bucketName []byte, key []byte, value []byte) {
  237. t.batchTx.UnsafePut(bucketName, key, value)
  238. t.buf.put(bucketName, key, value)
  239. }
  240. func (t *batchTxBuffered) UnsafeSeqPut(bucketName []byte, key []byte, value []byte) {
  241. t.batchTx.UnsafeSeqPut(bucketName, key, value)
  242. t.buf.putSeq(bucketName, key, value)
  243. }