batch_tx.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package backend
  15. import (
  16. "bytes"
  17. "math"
  18. "sync"
  19. "sync/atomic"
  20. "time"
  21. bolt "go.etcd.io/bbolt"
  22. "go.uber.org/zap"
  23. )
  24. type BatchTx interface {
  25. ReadTx
  26. UnsafeCreateBucket(name []byte)
  27. UnsafePut(bucketName []byte, key []byte, value []byte)
  28. UnsafeSeqPut(bucketName []byte, key []byte, value []byte)
  29. UnsafeDelete(bucketName []byte, key []byte)
  30. // Commit commits a previous tx and begins a new writable one.
  31. Commit()
  32. // CommitAndStop commits the previous tx and does not create a new one.
  33. CommitAndStop()
  34. }
  35. type batchTx struct {
  36. sync.Mutex
  37. tx *bolt.Tx
  38. backend *backend
  39. pending int
  40. }
  41. func (t *batchTx) UnsafeCreateBucket(name []byte) {
  42. _, err := t.tx.CreateBucket(name)
  43. if err != nil && err != bolt.ErrBucketExists {
  44. if t.backend.lg != nil {
  45. t.backend.lg.Fatal(
  46. "failed to create a bucket",
  47. zap.String("bucket-name", string(name)),
  48. zap.Error(err),
  49. )
  50. } else {
  51. plog.Fatalf("cannot create bucket %s (%v)", name, err)
  52. }
  53. }
  54. t.pending++
  55. }
  56. // UnsafePut must be called holding the lock on the tx.
  57. func (t *batchTx) UnsafePut(bucketName []byte, key []byte, value []byte) {
  58. t.unsafePut(bucketName, key, value, false)
  59. }
  60. // UnsafeSeqPut must be called holding the lock on the tx.
  61. func (t *batchTx) UnsafeSeqPut(bucketName []byte, key []byte, value []byte) {
  62. t.unsafePut(bucketName, key, value, true)
  63. }
  64. func (t *batchTx) unsafePut(bucketName []byte, key []byte, value []byte, seq bool) {
  65. bucket := t.tx.Bucket(bucketName)
  66. if bucket == nil {
  67. if t.backend.lg != nil {
  68. t.backend.lg.Fatal(
  69. "failed to find a bucket",
  70. zap.String("bucket-name", string(bucketName)),
  71. )
  72. } else {
  73. plog.Fatalf("bucket %s does not exist", bucketName)
  74. }
  75. }
  76. if seq {
  77. // it is useful to increase fill percent when the workloads are mostly append-only.
  78. // this can delay the page split and reduce space usage.
  79. bucket.FillPercent = 0.9
  80. }
  81. if err := bucket.Put(key, value); err != nil {
  82. if t.backend.lg != nil {
  83. t.backend.lg.Fatal(
  84. "failed to write to a bucket",
  85. zap.String("bucket-name", string(bucketName)),
  86. zap.Error(err),
  87. )
  88. } else {
  89. plog.Fatalf("cannot put key into bucket (%v)", err)
  90. }
  91. }
  92. t.pending++
  93. }
  94. // UnsafeRange must be called holding the lock on the tx.
  95. func (t *batchTx) UnsafeRange(bucketName, key, endKey []byte, limit int64) ([][]byte, [][]byte) {
  96. bucket := t.tx.Bucket(bucketName)
  97. if bucket == nil {
  98. if t.backend.lg != nil {
  99. t.backend.lg.Fatal(
  100. "failed to find a bucket",
  101. zap.String("bucket-name", string(bucketName)),
  102. )
  103. } else {
  104. plog.Fatalf("bucket %s does not exist", bucketName)
  105. }
  106. }
  107. return unsafeRange(bucket.Cursor(), key, endKey, limit)
  108. }
  109. func unsafeRange(c *bolt.Cursor, key, endKey []byte, limit int64) (keys [][]byte, vs [][]byte) {
  110. if limit <= 0 {
  111. limit = math.MaxInt64
  112. }
  113. var isMatch func(b []byte) bool
  114. if len(endKey) > 0 {
  115. isMatch = func(b []byte) bool { return bytes.Compare(b, endKey) < 0 }
  116. } else {
  117. isMatch = func(b []byte) bool { return bytes.Equal(b, key) }
  118. limit = 1
  119. }
  120. for ck, cv := c.Seek(key); ck != nil && isMatch(ck); ck, cv = c.Next() {
  121. vs = append(vs, cv)
  122. keys = append(keys, ck)
  123. if limit == int64(len(keys)) {
  124. break
  125. }
  126. }
  127. return keys, vs
  128. }
  129. // UnsafeDelete must be called holding the lock on the tx.
  130. func (t *batchTx) UnsafeDelete(bucketName []byte, key []byte) {
  131. bucket := t.tx.Bucket(bucketName)
  132. if bucket == nil {
  133. if t.backend.lg != nil {
  134. t.backend.lg.Fatal(
  135. "failed to find a bucket",
  136. zap.String("bucket-name", string(bucketName)),
  137. )
  138. } else {
  139. plog.Fatalf("bucket %s does not exist", bucketName)
  140. }
  141. }
  142. err := bucket.Delete(key)
  143. if err != nil {
  144. if t.backend.lg != nil {
  145. t.backend.lg.Fatal(
  146. "failed to delete a key",
  147. zap.String("bucket-name", string(bucketName)),
  148. zap.Error(err),
  149. )
  150. } else {
  151. plog.Fatalf("cannot delete key from bucket (%v)", err)
  152. }
  153. }
  154. t.pending++
  155. }
  156. // UnsafeForEach must be called holding the lock on the tx.
  157. func (t *batchTx) UnsafeForEach(bucketName []byte, visitor func(k, v []byte) error) error {
  158. return unsafeForEach(t.tx, bucketName, visitor)
  159. }
  160. func unsafeForEach(tx *bolt.Tx, bucket []byte, visitor func(k, v []byte) error) error {
  161. if b := tx.Bucket(bucket); b != nil {
  162. return b.ForEach(visitor)
  163. }
  164. return nil
  165. }
  166. // Commit commits a previous tx and begins a new writable one.
  167. func (t *batchTx) Commit() {
  168. t.Lock()
  169. t.commit(false)
  170. t.Unlock()
  171. }
  172. // CommitAndStop commits the previous tx and does not create a new one.
  173. func (t *batchTx) CommitAndStop() {
  174. t.Lock()
  175. t.commit(true)
  176. t.Unlock()
  177. }
  178. func (t *batchTx) Unlock() {
  179. if t.pending >= t.backend.batchLimit {
  180. t.commit(false)
  181. }
  182. t.Mutex.Unlock()
  183. }
  184. func (t *batchTx) safePending() int {
  185. t.Mutex.Lock()
  186. defer t.Mutex.Unlock()
  187. return t.pending
  188. }
  189. func (t *batchTx) commit(stop bool) {
  190. // commit the last tx
  191. if t.tx != nil {
  192. if t.pending == 0 && !stop {
  193. return
  194. }
  195. start := time.Now()
  196. // gofail: var beforeCommit struct{}
  197. err := t.tx.Commit()
  198. // gofail: var afterCommit struct{}
  199. rebalanceSec.Observe(t.tx.Stats().RebalanceTime.Seconds())
  200. spillSec.Observe(t.tx.Stats().SpillTime.Seconds())
  201. writeSec.Observe(t.tx.Stats().WriteTime.Seconds())
  202. commitSec.Observe(time.Since(start).Seconds())
  203. atomic.AddInt64(&t.backend.commits, 1)
  204. t.pending = 0
  205. if err != nil {
  206. if t.backend.lg != nil {
  207. t.backend.lg.Fatal("failed to commit tx", zap.Error(err))
  208. } else {
  209. plog.Fatalf("cannot commit tx (%s)", err)
  210. }
  211. }
  212. }
  213. if !stop {
  214. t.tx = t.backend.begin(true)
  215. }
  216. }
  217. type batchTxBuffered struct {
  218. batchTx
  219. buf txWriteBuffer
  220. }
  221. func newBatchTxBuffered(backend *backend) *batchTxBuffered {
  222. tx := &batchTxBuffered{
  223. batchTx: batchTx{backend: backend},
  224. buf: txWriteBuffer{
  225. txBuffer: txBuffer{make(map[string]*bucketBuffer)},
  226. seq: true,
  227. },
  228. }
  229. tx.Commit()
  230. return tx
  231. }
  232. func (t *batchTxBuffered) Unlock() {
  233. if t.pending != 0 {
  234. t.backend.readTx.mu.Lock()
  235. t.buf.writeback(&t.backend.readTx.buf)
  236. t.backend.readTx.mu.Unlock()
  237. if t.pending >= t.backend.batchLimit {
  238. t.commit(false)
  239. }
  240. }
  241. t.batchTx.Unlock()
  242. }
  243. func (t *batchTxBuffered) Commit() {
  244. t.Lock()
  245. t.commit(false)
  246. t.Unlock()
  247. }
  248. func (t *batchTxBuffered) CommitAndStop() {
  249. t.Lock()
  250. t.commit(true)
  251. t.Unlock()
  252. }
  253. func (t *batchTxBuffered) commit(stop bool) {
  254. // all read txs must be closed to acquire boltdb commit rwlock
  255. t.backend.readTx.mu.Lock()
  256. t.unsafeCommit(stop)
  257. t.backend.readTx.mu.Unlock()
  258. }
  259. func (t *batchTxBuffered) unsafeCommit(stop bool) {
  260. if t.backend.readTx.tx != nil {
  261. if err := t.backend.readTx.tx.Rollback(); err != nil {
  262. if t.backend.lg != nil {
  263. t.backend.lg.Fatal("failed to rollback tx", zap.Error(err))
  264. } else {
  265. plog.Fatalf("cannot rollback tx (%s)", err)
  266. }
  267. }
  268. t.backend.readTx.reset()
  269. }
  270. t.batchTx.commit(stop)
  271. if !stop {
  272. t.backend.readTx.tx = t.backend.begin(false)
  273. }
  274. }
  275. func (t *batchTxBuffered) UnsafePut(bucketName []byte, key []byte, value []byte) {
  276. t.batchTx.UnsafePut(bucketName, key, value)
  277. t.buf.put(bucketName, key, value)
  278. }
  279. func (t *batchTxBuffered) UnsafeSeqPut(bucketName []byte, key []byte, value []byte) {
  280. t.batchTx.UnsafeSeqPut(bucketName, key, value)
  281. t.buf.putSeq(bucketName, key, value)
  282. }