key_index.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. package storage
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "log"
  7. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/google/btree"
  8. )
  9. var (
  10. ErrReversionNotFound = errors.New("stroage: Reversion not found")
  11. )
  12. // keyIndex stores the reversion of an key in the backend.
  13. // Each keyIndex has at least one key generation.
  14. // Each generation might have several key versions.
  15. // Tombstone on a key appends an tombstone version at the end
  16. // of the current generation and creates a new empty generation.
  17. // Each version of a key has an index pointing to the backend.
  18. //
  19. // For example: put(1.0);put(2.0);tombstone(3.0);put(4.0);tombstone(5.0) on key "foo"
  20. // generate a keyIndex:
  21. // key: "foo"
  22. // rev: 5
  23. // generations:
  24. // {empty}
  25. // {4.0, 5.0(t)}
  26. // {1.0, 2.0, 3.0(t)}
  27. //
  28. // Compact a keyIndex removes the versions with smaller or equal to
  29. // rev except the largest one. If the generations becomes empty
  30. // during compaction, it will be removed. if all the generations get
  31. // removed, the keyIndex Should be removed.
  32. // For example:
  33. // compact(2) on the previous example
  34. // generations:
  35. // {empty}
  36. // {4.0, 5.0(t)}
  37. // {2.0, 3.0(t)}
  38. //
  39. // compact(4)
  40. // generations:
  41. // {empty}
  42. // {4.0, 5.0(t)}
  43. //
  44. // compact(5):
  45. // generations:
  46. // {empty} -> key SHOULD be removed.
  47. //
  48. // compact(6):
  49. // generations:
  50. // {empty} -> key SHOULD be removed.
  51. type keyIndex struct {
  52. key []byte
  53. rev int64
  54. generations []generation
  55. }
  56. // put puts a reversion to the keyIndex.
  57. func (ki *keyIndex) put(rev int64, subrev int64) {
  58. if rev < ki.rev {
  59. log.Panicf("store.keyindex: put with unexpected smaller reversion [%d / %d]", rev, ki.rev)
  60. }
  61. if len(ki.generations) == 0 {
  62. ki.generations = append(ki.generations, generation{})
  63. }
  64. g := &ki.generations[len(ki.generations)-1]
  65. g.revs = append(g.revs, reversion{rev, subrev})
  66. g.ver++
  67. ki.rev = rev
  68. }
  69. // tombstone puts a reversion, pointing to a tombstone, to the keyIndex.
  70. // It also creates a new empty generation in the keyIndex.
  71. func (ki *keyIndex) tombstone(rev int64, subrev int64) {
  72. if ki.isEmpty() {
  73. log.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key))
  74. }
  75. ki.put(rev, subrev)
  76. ki.generations = append(ki.generations, generation{})
  77. }
  78. // get gets the reversion of the key that satisfies the given atRev.
  79. // Rev must be higher than or equal to the given atRev.
  80. func (ki *keyIndex) get(atRev int64) (rev reversion, err error) {
  81. if ki.isEmpty() {
  82. log.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
  83. }
  84. g := ki.findGeneration(atRev)
  85. if g.isEmpty() {
  86. return reversion{}, ErrReversionNotFound
  87. }
  88. f := func(rev reversion) bool {
  89. if rev.main <= atRev {
  90. return false
  91. }
  92. return true
  93. }
  94. n := g.walk(f)
  95. if n != -1 {
  96. return g.revs[n], nil
  97. }
  98. return reversion{}, ErrReversionNotFound
  99. }
  100. // compact compacts a keyIndex by removing the versions with smaller or equal
  101. // reversion than the given atRev except the largest one (If the largest one is
  102. // a tombstone, it will not be kept).
  103. // If a generation becomes empty during compaction, it will be removed.
  104. func (ki *keyIndex) compact(atRev int64, available map[reversion]struct{}) {
  105. if ki.isEmpty() {
  106. log.Panic("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key))
  107. }
  108. // walk until reaching the first reversion that has an reversion smaller or equal to
  109. // the atReversion.
  110. // add it to the available map
  111. f := func(rev reversion) bool {
  112. if rev.main <= atRev {
  113. available[rev] = struct{}{}
  114. return false
  115. }
  116. return true
  117. }
  118. g := ki.findGeneration(atRev)
  119. if g == nil {
  120. return
  121. }
  122. i := 0
  123. for i <= len(ki.generations)-1 {
  124. wg := &ki.generations[i]
  125. if wg == g {
  126. break
  127. }
  128. i++
  129. }
  130. if !g.isEmpty() {
  131. n := g.walk(f)
  132. // remove the previous contents.
  133. if n != -1 {
  134. g.revs = g.revs[n:]
  135. }
  136. // remove any tombstone
  137. if len(g.revs) == 1 && i != len(ki.generations)-1 {
  138. delete(available, g.revs[0])
  139. i++
  140. }
  141. }
  142. // remove the previous generations.
  143. ki.generations = ki.generations[i:]
  144. return
  145. }
  146. func (ki *keyIndex) isEmpty() bool {
  147. return len(ki.generations) == 1 && ki.generations[0].isEmpty()
  148. }
  149. // findGeneartion finds out the generation of the keyIndex that the
  150. // given index belongs to.
  151. func (ki *keyIndex) findGeneration(rev int64) *generation {
  152. cg := len(ki.generations) - 1
  153. for cg >= 0 {
  154. if len(ki.generations[cg].revs) == 0 {
  155. cg--
  156. continue
  157. }
  158. g := ki.generations[cg]
  159. if g.revs[0].main <= rev {
  160. return &ki.generations[cg]
  161. }
  162. cg--
  163. }
  164. return nil
  165. }
  166. func (a *keyIndex) Less(b btree.Item) bool {
  167. return bytes.Compare(a.key, b.(*keyIndex).key) == -1
  168. }
  169. func (ki *keyIndex) String() string {
  170. var s string
  171. for _, g := range ki.generations {
  172. s += g.String()
  173. }
  174. return s
  175. }
  176. type generation struct {
  177. ver int64
  178. revs []reversion
  179. }
  180. type reversion struct {
  181. main int64
  182. sub int64
  183. }
  184. func (g *generation) isEmpty() bool { return g == nil || len(g.revs) == 0 }
  185. // walk walks through the reversions in the generation in ascending order.
  186. // It passes the revision to the given function.
  187. // walk returns until: 1. it finishs walking all pairs 2. the function returns false.
  188. // walk returns the position at where it stopped. If it stopped after
  189. // finishing walking, -1 will be returned.
  190. func (g *generation) walk(f func(rev reversion) bool) int {
  191. l := len(g.revs)
  192. for i := range g.revs {
  193. ok := f(g.revs[l-i-1])
  194. if !ok {
  195. return l - i - 1
  196. }
  197. }
  198. return -1
  199. }
  200. func (g *generation) String() string {
  201. return fmt.Sprintf("g: ver[%d], revs %#v\n", g.ver, g.revs)
  202. }