key_index.go 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package storage
  15. import (
  16. "bytes"
  17. "errors"
  18. "fmt"
  19. "log"
  20. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/google/btree"
  21. )
  22. var (
  23. ErrRevisionNotFound = errors.New("stroage: revision not found")
  24. )
  25. // keyIndex stores the revisions of a key in the backend.
  26. // Each keyIndex has at least one key generation.
  27. // Each generation might have several key versions.
  28. // Tombstone on a key appends an tombstone version at the end
  29. // of the current generation and creates a new empty generation.
  30. // Each version of a key has an index pointing to the backend.
  31. //
  32. // For example: put(1.0);put(2.0);tombstone(3.0);put(4.0);tombstone(5.0) on key "foo"
  33. // generate a keyIndex:
  34. // key: "foo"
  35. // rev: 5
  36. // generations:
  37. // {empty}
  38. // {4.0, 5.0(t)}
  39. // {1.0, 2.0, 3.0(t)}
  40. //
  41. // Compact a keyIndex removes the versions with smaller or equal to
  42. // rev except the largest one. If the generation becomes empty
  43. // during compaction, it will be removed. if all the generations get
  44. // removed, the keyIndex should be removed.
  45. // For example:
  46. // compact(2) on the previous example
  47. // generations:
  48. // {empty}
  49. // {4.0, 5.0(t)}
  50. // {2.0, 3.0(t)}
  51. //
  52. // compact(4)
  53. // generations:
  54. // {empty}
  55. // {4.0, 5.0(t)}
  56. //
  57. // compact(5):
  58. // generations:
  59. // {empty} -> key SHOULD be removed.
  60. //
  61. // compact(6):
  62. // generations:
  63. // {empty} -> key SHOULD be removed.
  64. type keyIndex struct {
  65. key []byte
  66. modified revision // the main rev of the last modification
  67. generations []generation
  68. }
  69. // put puts a revision to the keyIndex.
  70. func (ki *keyIndex) put(main int64, sub int64) {
  71. rev := revision{main: main, sub: sub}
  72. if !rev.GreaterThan(ki.modified) {
  73. log.Panicf("store.keyindex: put with unexpected smaller revision [%v / %v]", rev, ki.modified)
  74. }
  75. if len(ki.generations) == 0 {
  76. ki.generations = append(ki.generations, generation{})
  77. }
  78. g := &ki.generations[len(ki.generations)-1]
  79. if len(g.revs) == 0 { // create a new key
  80. keysGauge.Inc()
  81. g.created = rev
  82. }
  83. g.revs = append(g.revs, rev)
  84. g.ver++
  85. ki.modified = rev
  86. }
  87. func (ki *keyIndex) restore(created, modified revision, ver int64) {
  88. if len(ki.generations) != 0 {
  89. log.Panicf("store.keyindex: cannot restore non-empty keyIndex")
  90. }
  91. ki.modified = modified
  92. g := generation{created: created, ver: ver, revs: []revision{modified}}
  93. ki.generations = append(ki.generations, g)
  94. keysGauge.Inc()
  95. }
  96. // tombstone puts a revision, pointing to a tombstone, to the keyIndex.
  97. // It also creates a new empty generation in the keyIndex.
  98. // It returns ErrRevisionNotFound when tombstone on an empty generation.
  99. func (ki *keyIndex) tombstone(main int64, sub int64) error {
  100. if ki.isEmpty() {
  101. log.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key))
  102. }
  103. if ki.generations[len(ki.generations)-1].isEmpty() {
  104. return ErrRevisionNotFound
  105. }
  106. ki.put(main, sub)
  107. ki.generations = append(ki.generations, generation{})
  108. keysGauge.Dec()
  109. return nil
  110. }
  111. // get gets the modified, created revision and version of the key that satisfies the given atRev.
  112. // Rev must be higher than or equal to the given atRev.
  113. func (ki *keyIndex) get(atRev int64) (modified, created revision, ver int64, err error) {
  114. if ki.isEmpty() {
  115. log.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
  116. }
  117. g := ki.findGeneration(atRev)
  118. if g.isEmpty() {
  119. return revision{}, revision{}, 0, ErrRevisionNotFound
  120. }
  121. f := func(rev revision) bool {
  122. if rev.main <= atRev {
  123. return false
  124. }
  125. return true
  126. }
  127. n := g.walk(f)
  128. if n != -1 {
  129. return g.revs[n], g.created, g.ver - int64(len(g.revs)-n-1), nil
  130. }
  131. return revision{}, revision{}, 0, ErrRevisionNotFound
  132. }
  133. // since returns revisions since the given rev. Only the revision with the
  134. // largest sub revision will be returned if multiple revisions have the same
  135. // main revision.
  136. func (ki *keyIndex) since(rev int64) []revision {
  137. if ki.isEmpty() {
  138. log.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
  139. }
  140. since := revision{rev, 0}
  141. var gi int
  142. // find the generations to start checking
  143. for gi = len(ki.generations) - 1; gi > 0; gi-- {
  144. g := ki.generations[gi]
  145. if g.isEmpty() {
  146. continue
  147. }
  148. if since.GreaterThan(g.created) {
  149. break
  150. }
  151. }
  152. var revs []revision
  153. var last int64
  154. for ; gi < len(ki.generations); gi++ {
  155. for _, r := range ki.generations[gi].revs {
  156. if since.GreaterThan(r) {
  157. continue
  158. }
  159. if r.main == last {
  160. // replace the revision with a new one that has higher sub value,
  161. // because the original one should not be seen by external
  162. revs[len(revs)-1] = r
  163. continue
  164. }
  165. revs = append(revs, r)
  166. last = r.main
  167. }
  168. }
  169. return revs
  170. }
  171. // compact compacts a keyIndex by removing the versions with smaller or equal
  172. // revision than the given atRev except the largest one (If the largest one is
  173. // a tombstone, it will not be kept).
  174. // If a generation becomes empty during compaction, it will be removed.
  175. func (ki *keyIndex) compact(atRev int64, available map[revision]struct{}) {
  176. if ki.isEmpty() {
  177. log.Panicf("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key))
  178. }
  179. // walk until reaching the first revision that has an revision smaller or equal to
  180. // the atRev.
  181. // add it to the available map
  182. f := func(rev revision) bool {
  183. if rev.main <= atRev {
  184. available[rev] = struct{}{}
  185. return false
  186. }
  187. return true
  188. }
  189. i, g := 0, &ki.generations[0]
  190. // find first generation includes atRev or created after atRev
  191. for i < len(ki.generations)-1 {
  192. if tomb := g.revs[len(g.revs)-1].main; tomb > atRev {
  193. break
  194. }
  195. i++
  196. g = &ki.generations[i]
  197. }
  198. if !g.isEmpty() {
  199. n := g.walk(f)
  200. // remove the previous contents.
  201. if n != -1 {
  202. g.revs = g.revs[n:]
  203. }
  204. // remove any tombstone
  205. if len(g.revs) == 1 && i != len(ki.generations)-1 {
  206. delete(available, g.revs[0])
  207. i++
  208. }
  209. }
  210. // remove the previous generations.
  211. ki.generations = ki.generations[i:]
  212. return
  213. }
  214. func (ki *keyIndex) isEmpty() bool {
  215. return len(ki.generations) == 1 && ki.generations[0].isEmpty()
  216. }
  217. // findGeneration finds out the generation of the keyIndex that the
  218. // given rev belongs to. If the given rev is at the gap of two generations,
  219. // which means that the key does not exist at the given rev, it returns nil.
  220. func (ki *keyIndex) findGeneration(rev int64) *generation {
  221. lastg := len(ki.generations) - 1
  222. cg := lastg
  223. for cg >= 0 {
  224. if len(ki.generations[cg].revs) == 0 {
  225. cg--
  226. continue
  227. }
  228. g := ki.generations[cg]
  229. if cg != lastg {
  230. if tomb := g.revs[len(g.revs)-1].main; tomb <= rev {
  231. return nil
  232. }
  233. }
  234. if g.revs[0].main <= rev {
  235. return &ki.generations[cg]
  236. }
  237. cg--
  238. }
  239. return nil
  240. }
  241. func (a *keyIndex) Less(b btree.Item) bool {
  242. return bytes.Compare(a.key, b.(*keyIndex).key) == -1
  243. }
  244. func (a *keyIndex) equal(b *keyIndex) bool {
  245. if !bytes.Equal(a.key, b.key) {
  246. return false
  247. }
  248. if a.modified != b.modified {
  249. return false
  250. }
  251. if len(a.generations) != len(b.generations) {
  252. return false
  253. }
  254. for i := range a.generations {
  255. ag, bg := a.generations[i], b.generations[i]
  256. if !ag.equal(bg) {
  257. return false
  258. }
  259. }
  260. return true
  261. }
  262. func (ki *keyIndex) String() string {
  263. var s string
  264. for _, g := range ki.generations {
  265. s += g.String()
  266. }
  267. return s
  268. }
  269. // generation contains multiple revisions of a key.
  270. type generation struct {
  271. ver int64
  272. created revision // when the generation is created (put in first revision).
  273. revs []revision
  274. }
  275. func (g *generation) isEmpty() bool { return g == nil || len(g.revs) == 0 }
  276. // walk walks through the revisions in the generation in descending order.
  277. // It passes the revision to the given function.
  278. // walk returns until: 1. it finishes walking all pairs 2. the function returns false.
  279. // walk returns the position at where it stopped. If it stopped after
  280. // finishing walking, -1 will be returned.
  281. func (g *generation) walk(f func(rev revision) bool) int {
  282. l := len(g.revs)
  283. for i := range g.revs {
  284. ok := f(g.revs[l-i-1])
  285. if !ok {
  286. return l - i - 1
  287. }
  288. }
  289. return -1
  290. }
  291. func (g *generation) String() string {
  292. return fmt.Sprintf("g: created[%d] ver[%d], revs %#v\n", g.created, g.ver, g.revs)
  293. }
  294. func (a generation) equal(b generation) bool {
  295. if a.ver != b.ver {
  296. return false
  297. }
  298. if len(a.revs) != len(b.revs) {
  299. return false
  300. }
  301. for i := range a.revs {
  302. ar, br := a.revs[i], b.revs[i]
  303. if ar != br {
  304. return false
  305. }
  306. }
  307. return true
  308. }