123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649 |
- // Copyright 2014 Google Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- // Package btree implements in-memory B-Trees of arbitrary degree.
- //
- // btree implements an in-memory B-Tree for use as an ordered data structure.
- // It is not meant for persistent storage solutions.
- //
- // It has a flatter structure than an equivalent red-black or other binary tree,
- // which in some cases yields better memory usage and/or performance.
- // See some discussion on the matter here:
- // http://google-opensource.blogspot.com/2013/01/c-containers-that-save-memory-and-time.html
- // Note, though, that this project is in no way related to the C++ B-Tree
- // implmentation written about there.
- //
- // Within this tree, each node contains a slice of items and a (possibly nil)
- // slice of children. For basic numeric values or raw structs, this can cause
- // efficiency differences when compared to equivalent C++ template code that
- // stores values in arrays within the node:
- // * Due to the overhead of storing values as interfaces (each
- // value needs to be stored as the value itself, then 2 words for the
- // interface pointing to that value and its type), resulting in higher
- // memory use.
- // * Since interfaces can point to values anywhere in memory, values are
- // most likely not stored in contiguous blocks, resulting in a higher
- // number of cache misses.
- // These issues don't tend to matter, though, when working with strings or other
- // heap-allocated structures, since C++-equivalent structures also must store
- // pointers and also distribute their values across the heap.
- //
- // This implementation is designed to be a drop-in replacement to gollrb.LLRB
- // trees, (http://github.com/petar/gollrb), an excellent and probably the most
- // widely used ordered tree implementation in the Go ecosystem currently.
- // Its functions, therefore, exactly mirror those of
- // llrb.LLRB where possible. Unlike gollrb, though, we currently don't
- // support storing multiple equivalent values or backwards iteration.
- package btree
- import (
- "fmt"
- "io"
- "sort"
- "strings"
- )
- // Item represents a single object in the tree.
- type Item interface {
- // Less tests whether the current item is less than the given argument.
- //
- // This must provide a strict weak ordering.
- // If !a.Less(b) && !b.Less(a), we treat this to mean a == b (i.e. we can only
- // hold one of either a or b in the tree).
- Less(than Item) bool
- }
- const (
- DefaultFreeListSize = 32
- )
- // FreeList represents a free list of btree nodes. By default each
- // BTree has its own FreeList, but multiple BTrees can share the same
- // FreeList.
- // Two Btrees using the same freelist are not safe for concurrent write access.
- type FreeList struct {
- freelist []*node
- }
- // NewFreeList creates a new free list.
- // size is the maximum size of the returned free list.
- func NewFreeList(size int) *FreeList {
- return &FreeList{freelist: make([]*node, 0, size)}
- }
- func (f *FreeList) newNode() (n *node) {
- index := len(f.freelist) - 1
- if index < 0 {
- return new(node)
- }
- f.freelist, n = f.freelist[:index], f.freelist[index]
- return
- }
- func (f *FreeList) freeNode(n *node) {
- if len(f.freelist) < cap(f.freelist) {
- f.freelist = append(f.freelist, n)
- }
- }
- // ItemIterator allows callers of Ascend* to iterate in-order over portions of
- // the tree. When this function returns false, iteration will stop and the
- // associated Ascend* function will immediately return.
- type ItemIterator func(i Item) bool
- // New creates a new B-Tree with the given degree.
- //
- // New(2), for example, will create a 2-3-4 tree (each node contains 1-3 items
- // and 2-4 children).
- func New(degree int) *BTree {
- return NewWithFreeList(degree, NewFreeList(DefaultFreeListSize))
- }
- // NewWithFreeList creates a new B-Tree that uses the given node free list.
- func NewWithFreeList(degree int, f *FreeList) *BTree {
- if degree <= 1 {
- panic("bad degree")
- }
- return &BTree{
- degree: degree,
- freelist: f,
- }
- }
- // items stores items in a node.
- type items []Item
- // insertAt inserts a value into the given index, pushing all subsequent values
- // forward.
- func (s *items) insertAt(index int, item Item) {
- *s = append(*s, nil)
- if index < len(*s) {
- copy((*s)[index+1:], (*s)[index:])
- }
- (*s)[index] = item
- }
- // removeAt removes a value at a given index, pulling all subsequent values
- // back.
- func (s *items) removeAt(index int) Item {
- item := (*s)[index]
- (*s)[index] = nil
- copy((*s)[index:], (*s)[index+1:])
- *s = (*s)[:len(*s)-1]
- return item
- }
- // pop removes and returns the last element in the list.
- func (s *items) pop() (out Item) {
- index := len(*s) - 1
- out = (*s)[index]
- (*s)[index] = nil
- *s = (*s)[:index]
- return
- }
- // find returns the index where the given item should be inserted into this
- // list. 'found' is true if the item already exists in the list at the given
- // index.
- func (s items) find(item Item) (index int, found bool) {
- i := sort.Search(len(s), func(i int) bool {
- return item.Less(s[i])
- })
- if i > 0 && !s[i-1].Less(item) {
- return i - 1, true
- }
- return i, false
- }
- // children stores child nodes in a node.
- type children []*node
- // insertAt inserts a value into the given index, pushing all subsequent values
- // forward.
- func (s *children) insertAt(index int, n *node) {
- *s = append(*s, nil)
- if index < len(*s) {
- copy((*s)[index+1:], (*s)[index:])
- }
- (*s)[index] = n
- }
- // removeAt removes a value at a given index, pulling all subsequent values
- // back.
- func (s *children) removeAt(index int) *node {
- n := (*s)[index]
- (*s)[index] = nil
- copy((*s)[index:], (*s)[index+1:])
- *s = (*s)[:len(*s)-1]
- return n
- }
- // pop removes and returns the last element in the list.
- func (s *children) pop() (out *node) {
- index := len(*s) - 1
- out = (*s)[index]
- (*s)[index] = nil
- *s = (*s)[:index]
- return
- }
- // node is an internal node in a tree.
- //
- // It must at all times maintain the invariant that either
- // * len(children) == 0, len(items) unconstrained
- // * len(children) == len(items) + 1
- type node struct {
- items items
- children children
- t *BTree
- }
- // split splits the given node at the given index. The current node shrinks,
- // and this function returns the item that existed at that index and a new node
- // containing all items/children after it.
- func (n *node) split(i int) (Item, *node) {
- item := n.items[i]
- next := n.t.newNode()
- next.items = append(next.items, n.items[i+1:]...)
- n.items = n.items[:i]
- if len(n.children) > 0 {
- next.children = append(next.children, n.children[i+1:]...)
- n.children = n.children[:i+1]
- }
- return item, next
- }
- // maybeSplitChild checks if a child should be split, and if so splits it.
- // Returns whether or not a split occurred.
- func (n *node) maybeSplitChild(i, maxItems int) bool {
- if len(n.children[i].items) < maxItems {
- return false
- }
- first := n.children[i]
- item, second := first.split(maxItems / 2)
- n.items.insertAt(i, item)
- n.children.insertAt(i+1, second)
- return true
- }
- // insert inserts an item into the subtree rooted at this node, making sure
- // no nodes in the subtree exceed maxItems items. Should an equivalent item be
- // be found/replaced by insert, it will be returned.
- func (n *node) insert(item Item, maxItems int) Item {
- i, found := n.items.find(item)
- if found {
- out := n.items[i]
- n.items[i] = item
- return out
- }
- if len(n.children) == 0 {
- n.items.insertAt(i, item)
- return nil
- }
- if n.maybeSplitChild(i, maxItems) {
- inTree := n.items[i]
- switch {
- case item.Less(inTree):
- // no change, we want first split node
- case inTree.Less(item):
- i++ // we want second split node
- default:
- out := n.items[i]
- n.items[i] = item
- return out
- }
- }
- return n.children[i].insert(item, maxItems)
- }
- // get finds the given key in the subtree and returns it.
- func (n *node) get(key Item) Item {
- i, found := n.items.find(key)
- if found {
- return n.items[i]
- } else if len(n.children) > 0 {
- return n.children[i].get(key)
- }
- return nil
- }
- // min returns the first item in the subtree.
- func min(n *node) Item {
- if n == nil {
- return nil
- }
- for len(n.children) > 0 {
- n = n.children[0]
- }
- if len(n.items) == 0 {
- return nil
- }
- return n.items[0]
- }
- // max returns the last item in the subtree.
- func max(n *node) Item {
- if n == nil {
- return nil
- }
- for len(n.children) > 0 {
- n = n.children[len(n.children)-1]
- }
- if len(n.items) == 0 {
- return nil
- }
- return n.items[len(n.items)-1]
- }
- // toRemove details what item to remove in a node.remove call.
- type toRemove int
- const (
- removeItem toRemove = iota // removes the given item
- removeMin // removes smallest item in the subtree
- removeMax // removes largest item in the subtree
- )
- // remove removes an item from the subtree rooted at this node.
- func (n *node) remove(item Item, minItems int, typ toRemove) Item {
- var i int
- var found bool
- switch typ {
- case removeMax:
- if len(n.children) == 0 {
- return n.items.pop()
- }
- i = len(n.items)
- case removeMin:
- if len(n.children) == 0 {
- return n.items.removeAt(0)
- }
- i = 0
- case removeItem:
- i, found = n.items.find(item)
- if len(n.children) == 0 {
- if found {
- return n.items.removeAt(i)
- }
- return nil
- }
- default:
- panic("invalid type")
- }
- // If we get to here, we have children.
- child := n.children[i]
- if len(child.items) <= minItems {
- return n.growChildAndRemove(i, item, minItems, typ)
- }
- // Either we had enough items to begin with, or we've done some
- // merging/stealing, because we've got enough now and we're ready to return
- // stuff.
- if found {
- // The item exists at index 'i', and the child we've selected can give us a
- // predecessor, since if we've gotten here it's got > minItems items in it.
- out := n.items[i]
- // We use our special-case 'remove' call with typ=maxItem to pull the
- // predecessor of item i (the rightmost leaf of our immediate left child)
- // and set it into where we pulled the item from.
- n.items[i] = child.remove(nil, minItems, removeMax)
- return out
- }
- // Final recursive call. Once we're here, we know that the item isn't in this
- // node and that the child is big enough to remove from.
- return child.remove(item, minItems, typ)
- }
- // growChildAndRemove grows child 'i' to make sure it's possible to remove an
- // item from it while keeping it at minItems, then calls remove to actually
- // remove it.
- //
- // Most documentation says we have to do two sets of special casing:
- // 1) item is in this node
- // 2) item is in child
- // In both cases, we need to handle the two subcases:
- // A) node has enough values that it can spare one
- // B) node doesn't have enough values
- // For the latter, we have to check:
- // a) left sibling has node to spare
- // b) right sibling has node to spare
- // c) we must merge
- // To simplify our code here, we handle cases #1 and #2 the same:
- // If a node doesn't have enough items, we make sure it does (using a,b,c).
- // We then simply redo our remove call, and the second time (regardless of
- // whether we're in case 1 or 2), we'll have enough items and can guarantee
- // that we hit case A.
- func (n *node) growChildAndRemove(i int, item Item, minItems int, typ toRemove) Item {
- child := n.children[i]
- if i > 0 && len(n.children[i-1].items) > minItems {
- // Steal from left child
- stealFrom := n.children[i-1]
- stolenItem := stealFrom.items.pop()
- child.items.insertAt(0, n.items[i-1])
- n.items[i-1] = stolenItem
- if len(stealFrom.children) > 0 {
- child.children.insertAt(0, stealFrom.children.pop())
- }
- } else if i < len(n.items) && len(n.children[i+1].items) > minItems {
- // steal from right child
- stealFrom := n.children[i+1]
- stolenItem := stealFrom.items.removeAt(0)
- child.items = append(child.items, n.items[i])
- n.items[i] = stolenItem
- if len(stealFrom.children) > 0 {
- child.children = append(child.children, stealFrom.children.removeAt(0))
- }
- } else {
- if i >= len(n.items) {
- i--
- child = n.children[i]
- }
- // merge with right child
- mergeItem := n.items.removeAt(i)
- mergeChild := n.children.removeAt(i + 1)
- child.items = append(child.items, mergeItem)
- child.items = append(child.items, mergeChild.items...)
- child.children = append(child.children, mergeChild.children...)
- n.t.freeNode(mergeChild)
- }
- return n.remove(item, minItems, typ)
- }
- // iterate provides a simple method for iterating over elements in the tree.
- // It could probably use some work to be extra-efficient (it calls from() a
- // little more than it should), but it works pretty well for now.
- //
- // It requires that 'from' and 'to' both return true for values we should hit
- // with the iterator. It should also be the case that 'from' returns true for
- // values less than or equal to values 'to' returns true for, and 'to'
- // returns true for values greater than or equal to those that 'from'
- // does.
- func (n *node) iterate(from, to func(Item) bool, iter ItemIterator) bool {
- for i, item := range n.items {
- if !from(item) {
- continue
- }
- if len(n.children) > 0 && !n.children[i].iterate(from, to, iter) {
- return false
- }
- if !to(item) {
- return false
- }
- if !iter(item) {
- return false
- }
- }
- if len(n.children) > 0 {
- return n.children[len(n.children)-1].iterate(from, to, iter)
- }
- return true
- }
- // Used for testing/debugging purposes.
- func (n *node) print(w io.Writer, level int) {
- fmt.Fprintf(w, "%sNODE:%v\n", strings.Repeat(" ", level), n.items)
- for _, c := range n.children {
- c.print(w, level+1)
- }
- }
- // BTree is an implementation of a B-Tree.
- //
- // BTree stores Item instances in an ordered structure, allowing easy insertion,
- // removal, and iteration.
- //
- // Write operations are not safe for concurrent mutation by multiple
- // goroutines, but Read operations are.
- type BTree struct {
- degree int
- length int
- root *node
- freelist *FreeList
- }
- // maxItems returns the max number of items to allow per node.
- func (t *BTree) maxItems() int {
- return t.degree*2 - 1
- }
- // minItems returns the min number of items to allow per node (ignored for the
- // root node).
- func (t *BTree) minItems() int {
- return t.degree - 1
- }
- func (t *BTree) newNode() (n *node) {
- n = t.freelist.newNode()
- n.t = t
- return
- }
- func (t *BTree) freeNode(n *node) {
- for i := range n.items {
- n.items[i] = nil // clear to allow GC
- }
- n.items = n.items[:0]
- for i := range n.children {
- n.children[i] = nil // clear to allow GC
- }
- n.children = n.children[:0]
- n.t = nil // clear to allow GC
- t.freelist.freeNode(n)
- }
- // ReplaceOrInsert adds the given item to the tree. If an item in the tree
- // already equals the given one, it is removed from the tree and returned.
- // Otherwise, nil is returned.
- //
- // nil cannot be added to the tree (will panic).
- func (t *BTree) ReplaceOrInsert(item Item) Item {
- if item == nil {
- panic("nil item being added to BTree")
- }
- if t.root == nil {
- t.root = t.newNode()
- t.root.items = append(t.root.items, item)
- t.length++
- return nil
- } else if len(t.root.items) >= t.maxItems() {
- item2, second := t.root.split(t.maxItems() / 2)
- oldroot := t.root
- t.root = t.newNode()
- t.root.items = append(t.root.items, item2)
- t.root.children = append(t.root.children, oldroot, second)
- }
- out := t.root.insert(item, t.maxItems())
- if out == nil {
- t.length++
- }
- return out
- }
- // Delete removes an item equal to the passed in item from the tree, returning
- // it. If no such item exists, returns nil.
- func (t *BTree) Delete(item Item) Item {
- return t.deleteItem(item, removeItem)
- }
- // DeleteMin removes the smallest item in the tree and returns it.
- // If no such item exists, returns nil.
- func (t *BTree) DeleteMin() Item {
- return t.deleteItem(nil, removeMin)
- }
- // DeleteMax removes the largest item in the tree and returns it.
- // If no such item exists, returns nil.
- func (t *BTree) DeleteMax() Item {
- return t.deleteItem(nil, removeMax)
- }
- func (t *BTree) deleteItem(item Item, typ toRemove) Item {
- if t.root == nil || len(t.root.items) == 0 {
- return nil
- }
- out := t.root.remove(item, t.minItems(), typ)
- if len(t.root.items) == 0 && len(t.root.children) > 0 {
- oldroot := t.root
- t.root = t.root.children[0]
- t.freeNode(oldroot)
- }
- if out != nil {
- t.length--
- }
- return out
- }
- // AscendRange calls the iterator for every value in the tree within the range
- // [greaterOrEqual, lessThan), until iterator returns false.
- func (t *BTree) AscendRange(greaterOrEqual, lessThan Item, iterator ItemIterator) {
- if t.root == nil {
- return
- }
- t.root.iterate(
- func(a Item) bool { return !a.Less(greaterOrEqual) },
- func(a Item) bool { return a.Less(lessThan) },
- iterator)
- }
- // AscendLessThan calls the iterator for every value in the tree within the range
- // [first, pivot), until iterator returns false.
- func (t *BTree) AscendLessThan(pivot Item, iterator ItemIterator) {
- if t.root == nil {
- return
- }
- t.root.iterate(
- func(a Item) bool { return true },
- func(a Item) bool { return a.Less(pivot) },
- iterator)
- }
- // AscendGreaterOrEqual calls the iterator for every value in the tree within
- // the range [pivot, last], until iterator returns false.
- func (t *BTree) AscendGreaterOrEqual(pivot Item, iterator ItemIterator) {
- if t.root == nil {
- return
- }
- t.root.iterate(
- func(a Item) bool { return !a.Less(pivot) },
- func(a Item) bool { return true },
- iterator)
- }
- // Ascend calls the iterator for every value in the tree within the range
- // [first, last], until iterator returns false.
- func (t *BTree) Ascend(iterator ItemIterator) {
- if t.root == nil {
- return
- }
- t.root.iterate(
- func(a Item) bool { return true },
- func(a Item) bool { return true },
- iterator)
- }
- // Get looks for the key item in the tree, returning it. It returns nil if
- // unable to find that item.
- func (t *BTree) Get(key Item) Item {
- if t.root == nil {
- return nil
- }
- return t.root.get(key)
- }
- // Min returns the smallest item in the tree, or nil if the tree is empty.
- func (t *BTree) Min() Item {
- return min(t.root)
- }
- // Max returns the largest item in the tree, or nil if the tree is empty.
- func (t *BTree) Max() Item {
- return max(t.root)
- }
- // Has returns true if the given key is in the tree.
- func (t *BTree) Has(key Item) bool {
- return t.Get(key) != nil
- }
- // Len returns the number of items currently in the tree.
- func (t *BTree) Len() int {
- return t.length
- }
- // Int implements the Item interface for integers.
- type Int int
- // Less returns true if int(a) < int(b).
- func (a Int) Less(b Item) bool {
- return a < b.(Int)
- }
|