batch_tx.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package backend
  15. import (
  16. "bytes"
  17. "math"
  18. "sync"
  19. "sync/atomic"
  20. "time"
  21. bolt "github.com/coreos/bbolt"
  22. )
  23. type BatchTx interface {
  24. ReadTx
  25. UnsafeCreateBucket(name []byte)
  26. UnsafePut(bucketName []byte, key []byte, value []byte)
  27. UnsafeSeqPut(bucketName []byte, key []byte, value []byte)
  28. UnsafeDelete(bucketName []byte, key []byte)
  29. // Commit commits a previous tx and begins a new writable one.
  30. Commit()
  31. // CommitAndStop commits the previous tx and does not create a new one.
  32. CommitAndStop()
  33. }
  34. type batchTx struct {
  35. sync.Mutex
  36. tx *bolt.Tx
  37. backend *backend
  38. pending int
  39. }
  40. func (t *batchTx) UnsafeCreateBucket(name []byte) {
  41. _, err := t.tx.CreateBucket(name)
  42. if err != nil && err != bolt.ErrBucketExists {
  43. plog.Fatalf("cannot create bucket %s (%v)", name, err)
  44. }
  45. t.pending++
  46. }
  47. // UnsafePut must be called holding the lock on the tx.
  48. func (t *batchTx) UnsafePut(bucketName []byte, key []byte, value []byte) {
  49. t.unsafePut(bucketName, key, value, false)
  50. }
  51. // UnsafeSeqPut must be called holding the lock on the tx.
  52. func (t *batchTx) UnsafeSeqPut(bucketName []byte, key []byte, value []byte) {
  53. t.unsafePut(bucketName, key, value, true)
  54. }
  55. func (t *batchTx) unsafePut(bucketName []byte, key []byte, value []byte, seq bool) {
  56. bucket := t.tx.Bucket(bucketName)
  57. if bucket == nil {
  58. plog.Fatalf("bucket %s does not exist", bucketName)
  59. }
  60. if seq {
  61. // it is useful to increase fill percent when the workloads are mostly append-only.
  62. // this can delay the page split and reduce space usage.
  63. bucket.FillPercent = 0.9
  64. }
  65. if err := bucket.Put(key, value); err != nil {
  66. plog.Fatalf("cannot put key into bucket (%v)", err)
  67. }
  68. t.pending++
  69. }
  70. // UnsafeRange must be called holding the lock on the tx.
  71. func (t *batchTx) UnsafeRange(bucketName, key, endKey []byte, limit int64) ([][]byte, [][]byte) {
  72. bucket := t.tx.Bucket(bucketName)
  73. if bucket == nil {
  74. plog.Fatalf("bucket %s does not exist", bucketName)
  75. }
  76. return unsafeRange(bucket.Cursor(), key, endKey, limit)
  77. }
  78. func unsafeRange(c *bolt.Cursor, key, endKey []byte, limit int64) (keys [][]byte, vs [][]byte) {
  79. if limit <= 0 {
  80. limit = math.MaxInt64
  81. }
  82. var isMatch func(b []byte) bool
  83. if len(endKey) > 0 {
  84. isMatch = func(b []byte) bool { return bytes.Compare(b, endKey) < 0 }
  85. } else {
  86. isMatch = func(b []byte) bool { return bytes.Equal(b, key) }
  87. limit = 1
  88. }
  89. for ck, cv := c.Seek(key); ck != nil && isMatch(ck); ck, cv = c.Next() {
  90. vs = append(vs, cv)
  91. keys = append(keys, ck)
  92. if limit == int64(len(keys)) {
  93. break
  94. }
  95. }
  96. return keys, vs
  97. }
  98. // UnsafeDelete must be called holding the lock on the tx.
  99. func (t *batchTx) UnsafeDelete(bucketName []byte, key []byte) {
  100. bucket := t.tx.Bucket(bucketName)
  101. if bucket == nil {
  102. plog.Fatalf("bucket %s does not exist", bucketName)
  103. }
  104. err := bucket.Delete(key)
  105. if err != nil {
  106. plog.Fatalf("cannot delete key from bucket (%v)", err)
  107. }
  108. t.pending++
  109. }
  110. // UnsafeForEach must be called holding the lock on the tx.
  111. func (t *batchTx) UnsafeForEach(bucketName []byte, visitor func(k, v []byte) error) error {
  112. return unsafeForEach(t.tx, bucketName, visitor)
  113. }
  114. func unsafeForEach(tx *bolt.Tx, bucket []byte, visitor func(k, v []byte) error) error {
  115. if b := tx.Bucket(bucket); b != nil {
  116. return b.ForEach(visitor)
  117. }
  118. return nil
  119. }
  120. // Commit commits a previous tx and begins a new writable one.
  121. func (t *batchTx) Commit() {
  122. t.Lock()
  123. defer t.Unlock()
  124. t.commit(false)
  125. }
  126. // CommitAndStop commits the previous tx and does not create a new one.
  127. func (t *batchTx) CommitAndStop() {
  128. t.Lock()
  129. defer t.Unlock()
  130. t.commit(true)
  131. }
  132. func (t *batchTx) Unlock() {
  133. if t.pending >= t.backend.batchLimit {
  134. t.commit(false)
  135. }
  136. t.Mutex.Unlock()
  137. }
  138. func (t *batchTx) commit(stop bool) {
  139. // commit the last tx
  140. if t.tx != nil {
  141. if t.pending == 0 && !stop {
  142. t.backend.mu.RLock()
  143. defer t.backend.mu.RUnlock()
  144. // t.tx.DB()==nil if 'CommitAndStop' calls 'batchTx.commit(true)',
  145. // which initializes *bolt.Tx.db and *bolt.Tx.meta as nil; panics t.tx.Size().
  146. // Server must make sure 'batchTx.commit(false)' does not follow
  147. // 'batchTx.commit(true)' (e.g. stopping backend, and inflight Hash call).
  148. size := t.tx.Size()
  149. db := t.tx.DB()
  150. atomic.StoreInt64(&t.backend.size, size)
  151. atomic.StoreInt64(&t.backend.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize)))
  152. return
  153. }
  154. start := time.Now()
  155. // gofail: var beforeCommit struct{}
  156. err := t.tx.Commit()
  157. // gofail: var afterCommit struct{}
  158. commitDurations.Observe(time.Since(start).Seconds())
  159. atomic.AddInt64(&t.backend.commits, 1)
  160. t.pending = 0
  161. if err != nil {
  162. plog.Fatalf("cannot commit tx (%s)", err)
  163. }
  164. }
  165. if !stop {
  166. t.tx = t.backend.begin(true)
  167. }
  168. }
  169. type batchTxBuffered struct {
  170. batchTx
  171. buf txWriteBuffer
  172. }
  173. func newBatchTxBuffered(backend *backend) *batchTxBuffered {
  174. tx := &batchTxBuffered{
  175. batchTx: batchTx{backend: backend},
  176. buf: txWriteBuffer{
  177. txBuffer: txBuffer{make(map[string]*bucketBuffer)},
  178. seq: true,
  179. },
  180. }
  181. tx.Commit()
  182. return tx
  183. }
  184. func (t *batchTxBuffered) Unlock() {
  185. if t.pending != 0 {
  186. t.backend.readTx.mu.Lock()
  187. t.buf.writeback(&t.backend.readTx.buf)
  188. t.backend.readTx.mu.Unlock()
  189. if t.pending >= t.backend.batchLimit {
  190. t.commit(false)
  191. }
  192. }
  193. t.batchTx.Unlock()
  194. }
  195. func (t *batchTxBuffered) Commit() {
  196. t.Lock()
  197. defer t.Unlock()
  198. t.commit(false)
  199. }
  200. func (t *batchTxBuffered) CommitAndStop() {
  201. t.Lock()
  202. defer t.Unlock()
  203. t.commit(true)
  204. }
  205. func (t *batchTxBuffered) commit(stop bool) {
  206. // all read txs must be closed to acquire boltdb commit rwlock
  207. t.backend.readTx.mu.Lock()
  208. defer t.backend.readTx.mu.Unlock()
  209. t.unsafeCommit(stop)
  210. }
  211. func (t *batchTxBuffered) unsafeCommit(stop bool) {
  212. if t.backend.readTx.tx != nil {
  213. if err := t.backend.readTx.tx.Rollback(); err != nil {
  214. plog.Fatalf("cannot rollback tx (%s)", err)
  215. }
  216. t.backend.readTx.reset()
  217. }
  218. t.batchTx.commit(stop)
  219. if !stop {
  220. t.backend.readTx.tx = t.backend.begin(false)
  221. }
  222. }
  223. func (t *batchTxBuffered) UnsafePut(bucketName []byte, key []byte, value []byte) {
  224. t.batchTx.UnsafePut(bucketName, key, value)
  225. t.buf.put(bucketName, key, value)
  226. }
  227. func (t *batchTxBuffered) UnsafeSeqPut(bucketName []byte, key []byte, value []byte) {
  228. t.batchTx.UnsafeSeqPut(bucketName, key, value)
  229. t.buf.putSeq(bucketName, key, value)
  230. }