key_index.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. package storage
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "log"
  7. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/google/btree"
  8. )
  9. var (
  10. ErrRevisionNotFound = errors.New("stroage: revision not found")
  11. )
  12. // keyIndex stores the revision of an key in the backend.
  13. // Each keyIndex has at least one key generation.
  14. // Each generation might have several key versions.
  15. // Tombstone on a key appends an tombstone version at the end
  16. // of the current generation and creates a new empty generation.
  17. // Each version of a key has an index pointing to the backend.
  18. //
  19. // For example: put(1.0);put(2.0);tombstone(3.0);put(4.0);tombstone(5.0) on key "foo"
  20. // generate a keyIndex:
  21. // key: "foo"
  22. // rev: 5
  23. // generations:
  24. // {empty}
  25. // {4.0, 5.0(t)}
  26. // {1.0, 2.0, 3.0(t)}
  27. //
  28. // Compact a keyIndex removes the versions with smaller or equal to
  29. // rev except the largest one. If the generations becomes empty
  30. // during compaction, it will be removed. if all the generations get
  31. // removed, the keyIndex Should be removed.
  32. // For example:
  33. // compact(2) on the previous example
  34. // generations:
  35. // {empty}
  36. // {4.0, 5.0(t)}
  37. // {2.0, 3.0(t)}
  38. //
  39. // compact(4)
  40. // generations:
  41. // {empty}
  42. // {4.0, 5.0(t)}
  43. //
  44. // compact(5):
  45. // generations:
  46. // {empty} -> key SHOULD be removed.
  47. //
  48. // compact(6):
  49. // generations:
  50. // {empty} -> key SHOULD be removed.
  51. type keyIndex struct {
  52. key []byte
  53. modified revision // the main rev of the last modification
  54. generations []generation
  55. }
  56. // put puts a revision to the keyIndex.
  57. func (ki *keyIndex) put(main int64, sub int64) {
  58. rev := revision{main: main, sub: sub}
  59. if !rev.GreaterThan(ki.modified) {
  60. log.Panicf("store.keyindex: put with unexpected smaller revision [%v / %v]", rev, ki.modified)
  61. }
  62. if len(ki.generations) == 0 {
  63. ki.generations = append(ki.generations, generation{})
  64. }
  65. g := &ki.generations[len(ki.generations)-1]
  66. if len(g.revs) == 0 { // create a new key
  67. keysGauge.Inc()
  68. g.created = rev
  69. }
  70. g.revs = append(g.revs, rev)
  71. g.ver++
  72. ki.modified = rev
  73. }
  74. func (ki *keyIndex) restore(created, modified revision, ver int64) {
  75. if len(ki.generations) != 0 {
  76. log.Panicf("store.keyindex: cannot restore non-empty keyIndex")
  77. }
  78. ki.modified = modified
  79. g := generation{created: created, ver: ver, revs: []revision{modified}}
  80. ki.generations = append(ki.generations, g)
  81. keysGauge.Inc()
  82. }
  83. // tombstone puts a revision, pointing to a tombstone, to the keyIndex.
  84. // It also creates a new empty generation in the keyIndex.
  85. // It returns ErrRevisionNotFound when tombstone on an empty generation.
  86. func (ki *keyIndex) tombstone(main int64, sub int64) error {
  87. if ki.isEmpty() {
  88. log.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key))
  89. }
  90. if ki.generations[len(ki.generations)-1].isEmpty() {
  91. return ErrRevisionNotFound
  92. }
  93. ki.put(main, sub)
  94. ki.generations = append(ki.generations, generation{})
  95. keysGauge.Dec()
  96. return nil
  97. }
  98. // get gets the modified, created revision and version of the key that satisfies the given atRev.
  99. // Rev must be higher than or equal to the given atRev.
  100. func (ki *keyIndex) get(atRev int64) (modified, created revision, ver int64, err error) {
  101. if ki.isEmpty() {
  102. log.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
  103. }
  104. g := ki.findGeneration(atRev)
  105. if g.isEmpty() {
  106. return revision{}, revision{}, 0, ErrRevisionNotFound
  107. }
  108. f := func(rev revision) bool {
  109. if rev.main <= atRev {
  110. return false
  111. }
  112. return true
  113. }
  114. n := g.walk(f)
  115. if n != -1 {
  116. return g.revs[n], g.created, g.ver - int64(len(g.revs)-n-1), nil
  117. }
  118. return revision{}, revision{}, 0, ErrRevisionNotFound
  119. }
  120. // compact compacts a keyIndex by removing the versions with smaller or equal
  121. // revision than the given atRev except the largest one (If the largest one is
  122. // a tombstone, it will not be kept).
  123. // If a generation becomes empty during compaction, it will be removed.
  124. func (ki *keyIndex) compact(atRev int64, available map[revision]struct{}) {
  125. if ki.isEmpty() {
  126. log.Panicf("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key))
  127. }
  128. // walk until reaching the first revision that has an revision smaller or equal to
  129. // the atRevision.
  130. // add it to the available map
  131. f := func(rev revision) bool {
  132. if rev.main <= atRev {
  133. available[rev] = struct{}{}
  134. return false
  135. }
  136. return true
  137. }
  138. i, g := 0, &ki.generations[0]
  139. // find first generation includes atRev or created after atRev
  140. for i < len(ki.generations)-1 {
  141. if tomb := g.revs[len(g.revs)-1].main; tomb > atRev {
  142. break
  143. }
  144. i++
  145. g = &ki.generations[i]
  146. }
  147. if !g.isEmpty() {
  148. n := g.walk(f)
  149. // remove the previous contents.
  150. if n != -1 {
  151. g.revs = g.revs[n:]
  152. }
  153. // remove any tombstone
  154. if len(g.revs) == 1 && i != len(ki.generations)-1 {
  155. delete(available, g.revs[0])
  156. i++
  157. }
  158. }
  159. // remove the previous generations.
  160. ki.generations = ki.generations[i:]
  161. return
  162. }
  163. func (ki *keyIndex) isEmpty() bool {
  164. return len(ki.generations) == 1 && ki.generations[0].isEmpty()
  165. }
  166. // findGeneartion finds out the generation of the keyIndex that the
  167. // given rev belongs to. If the given rev is at the gap of two generations,
  168. // which means that the key does not exist at the given rev, it returns nil.
  169. func (ki *keyIndex) findGeneration(rev int64) *generation {
  170. lastg := len(ki.generations) - 1
  171. cg := lastg
  172. for cg >= 0 {
  173. if len(ki.generations[cg].revs) == 0 {
  174. cg--
  175. continue
  176. }
  177. g := ki.generations[cg]
  178. if cg != lastg {
  179. if tomb := g.revs[len(g.revs)-1].main; tomb <= rev {
  180. return nil
  181. }
  182. }
  183. if g.revs[0].main <= rev {
  184. return &ki.generations[cg]
  185. }
  186. cg--
  187. }
  188. return nil
  189. }
  190. func (a *keyIndex) Less(b btree.Item) bool {
  191. return bytes.Compare(a.key, b.(*keyIndex).key) == -1
  192. }
  193. func (a *keyIndex) equal(b *keyIndex) bool {
  194. if !bytes.Equal(a.key, b.key) {
  195. return false
  196. }
  197. if a.modified != b.modified {
  198. return false
  199. }
  200. if len(a.generations) != len(b.generations) {
  201. return false
  202. }
  203. for i := range a.generations {
  204. ag, bg := a.generations[i], b.generations[i]
  205. if !ag.equal(bg) {
  206. return false
  207. }
  208. }
  209. return true
  210. }
  211. func (ki *keyIndex) String() string {
  212. var s string
  213. for _, g := range ki.generations {
  214. s += g.String()
  215. }
  216. return s
  217. }
  218. type generation struct {
  219. ver int64
  220. created revision // when the generation is created (put in first revision).
  221. revs []revision
  222. }
  223. func (g *generation) isEmpty() bool { return g == nil || len(g.revs) == 0 }
  224. // walk walks through the revisions in the generation in descending order.
  225. // It passes the revision to the given function.
  226. // walk returns until: 1. it finishs walking all pairs 2. the function returns false.
  227. // walk returns the position at where it stopped. If it stopped after
  228. // finishing walking, -1 will be returned.
  229. func (g *generation) walk(f func(rev revision) bool) int {
  230. l := len(g.revs)
  231. for i := range g.revs {
  232. ok := f(g.revs[l-i-1])
  233. if !ok {
  234. return l - i - 1
  235. }
  236. }
  237. return -1
  238. }
  239. func (g *generation) String() string {
  240. return fmt.Sprintf("g: created[%d] ver[%d], revs %#v\n", g.created, g.ver, g.revs)
  241. }
  242. func (a generation) equal(b generation) bool {
  243. if a.ver != b.ver {
  244. return false
  245. }
  246. if len(a.revs) != len(b.revs) {
  247. return false
  248. }
  249. for i := range a.revs {
  250. ar, br := a.revs[i], b.revs[i]
  251. if ar != br {
  252. return false
  253. }
  254. }
  255. return true
  256. }