btree.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. // Copyright 2014 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package btree implements in-memory B-Trees of arbitrary degree.
  15. //
  16. // btree implements an in-memory B-Tree for use as an ordered data structure.
  17. // It is not meant for persistent storage solutions.
  18. //
  19. // It has a flatter structure than an equivalent red-black or other binary tree,
  20. // which in some cases yields better memory usage and/or performance.
  21. // See some discussion on the matter here:
  22. // http://google-opensource.blogspot.com/2013/01/c-containers-that-save-memory-and-time.html
  23. // Note, though, that this project is in no way related to the C++ B-Tree
  24. // implmentation written about there.
  25. //
  26. // Within this tree, each node contains a slice of items and a (possibly nil)
  27. // slice of children. For basic numeric values or raw structs, this can cause
  28. // efficiency differences when compared to equivalent C++ template code that
  29. // stores values in arrays within the node:
  30. // * Due to the overhead of storing values as interfaces (each
  31. // value needs to be stored as the value itself, then 2 words for the
  32. // interface pointing to that value and its type), resulting in higher
  33. // memory use.
  34. // * Since interfaces can point to values anywhere in memory, values are
  35. // most likely not stored in contiguous blocks, resulting in a higher
  36. // number of cache misses.
  37. // These issues don't tend to matter, though, when working with strings or other
  38. // heap-allocated structures, since C++-equivalent structures also must store
  39. // pointers and also distribute their values across the heap.
  40. //
  41. // This implementation is designed to be a drop-in replacement to gollrb.LLRB
  42. // trees, (http://github.com/petar/gollrb), an excellent and probably the most
  43. // widely used ordered tree implementation in the Go ecosystem currently.
  44. // Its functions, therefore, exactly mirror those of
  45. // llrb.LLRB where possible. Unlike gollrb, though, we currently don't
  46. // support storing multiple equivalent values or backwards iteration.
  47. package btree
  48. import (
  49. "fmt"
  50. "io"
  51. "sort"
  52. "strings"
  53. )
  54. // Item represents a single object in the tree.
  55. type Item interface {
  56. // Less tests whether the current item is less than the given argument.
  57. //
  58. // This must provide a strict weak ordering.
  59. // If !a.Less(b) && !b.Less(a), we treat this to mean a == b (i.e. we can only
  60. // hold one of either a or b in the tree).
  61. Less(than Item) bool
  62. }
  63. const (
  64. DefaultFreeListSize = 32
  65. )
  66. // FreeList represents a free list of btree nodes. By default each
  67. // BTree has its own FreeList, but multiple BTrees can share the same
  68. // FreeList.
  69. // Two Btrees using the same freelist are not safe for concurrent write access.
  70. type FreeList struct {
  71. freelist []*node
  72. }
  73. // NewFreeList creates a new free list.
  74. // size is the maximum size of the returned free list.
  75. func NewFreeList(size int) *FreeList {
  76. return &FreeList{freelist: make([]*node, 0, size)}
  77. }
  78. func (f *FreeList) newNode() (n *node) {
  79. index := len(f.freelist) - 1
  80. if index < 0 {
  81. return new(node)
  82. }
  83. f.freelist, n = f.freelist[:index], f.freelist[index]
  84. return
  85. }
  86. func (f *FreeList) freeNode(n *node) {
  87. if len(f.freelist) < cap(f.freelist) {
  88. f.freelist = append(f.freelist, n)
  89. }
  90. }
  91. // ItemIterator allows callers of Ascend* to iterate in-order over portions of
  92. // the tree. When this function returns false, iteration will stop and the
  93. // associated Ascend* function will immediately return.
  94. type ItemIterator func(i Item) bool
  95. // New creates a new B-Tree with the given degree.
  96. //
  97. // New(2), for example, will create a 2-3-4 tree (each node contains 1-3 items
  98. // and 2-4 children).
  99. func New(degree int) *BTree {
  100. return NewWithFreeList(degree, NewFreeList(DefaultFreeListSize))
  101. }
  102. // NewWithFreeList creates a new B-Tree that uses the given node free list.
  103. func NewWithFreeList(degree int, f *FreeList) *BTree {
  104. if degree <= 1 {
  105. panic("bad degree")
  106. }
  107. return &BTree{
  108. degree: degree,
  109. freelist: f,
  110. }
  111. }
  112. // items stores items in a node.
  113. type items []Item
  114. // insertAt inserts a value into the given index, pushing all subsequent values
  115. // forward.
  116. func (s *items) insertAt(index int, item Item) {
  117. *s = append(*s, nil)
  118. if index < len(*s) {
  119. copy((*s)[index+1:], (*s)[index:])
  120. }
  121. (*s)[index] = item
  122. }
  123. // removeAt removes a value at a given index, pulling all subsequent values
  124. // back.
  125. func (s *items) removeAt(index int) Item {
  126. item := (*s)[index]
  127. (*s)[index] = nil
  128. copy((*s)[index:], (*s)[index+1:])
  129. *s = (*s)[:len(*s)-1]
  130. return item
  131. }
  132. // pop removes and returns the last element in the list.
  133. func (s *items) pop() (out Item) {
  134. index := len(*s) - 1
  135. out = (*s)[index]
  136. (*s)[index] = nil
  137. *s = (*s)[:index]
  138. return
  139. }
  140. // find returns the index where the given item should be inserted into this
  141. // list. 'found' is true if the item already exists in the list at the given
  142. // index.
  143. func (s items) find(item Item) (index int, found bool) {
  144. i := sort.Search(len(s), func(i int) bool {
  145. return item.Less(s[i])
  146. })
  147. if i > 0 && !s[i-1].Less(item) {
  148. return i - 1, true
  149. }
  150. return i, false
  151. }
  152. // children stores child nodes in a node.
  153. type children []*node
  154. // insertAt inserts a value into the given index, pushing all subsequent values
  155. // forward.
  156. func (s *children) insertAt(index int, n *node) {
  157. *s = append(*s, nil)
  158. if index < len(*s) {
  159. copy((*s)[index+1:], (*s)[index:])
  160. }
  161. (*s)[index] = n
  162. }
  163. // removeAt removes a value at a given index, pulling all subsequent values
  164. // back.
  165. func (s *children) removeAt(index int) *node {
  166. n := (*s)[index]
  167. (*s)[index] = nil
  168. copy((*s)[index:], (*s)[index+1:])
  169. *s = (*s)[:len(*s)-1]
  170. return n
  171. }
  172. // pop removes and returns the last element in the list.
  173. func (s *children) pop() (out *node) {
  174. index := len(*s) - 1
  175. out = (*s)[index]
  176. (*s)[index] = nil
  177. *s = (*s)[:index]
  178. return
  179. }
  180. // node is an internal node in a tree.
  181. //
  182. // It must at all times maintain the invariant that either
  183. // * len(children) == 0, len(items) unconstrained
  184. // * len(children) == len(items) + 1
  185. type node struct {
  186. items items
  187. children children
  188. t *BTree
  189. }
  190. // split splits the given node at the given index. The current node shrinks,
  191. // and this function returns the item that existed at that index and a new node
  192. // containing all items/children after it.
  193. func (n *node) split(i int) (Item, *node) {
  194. item := n.items[i]
  195. next := n.t.newNode()
  196. next.items = append(next.items, n.items[i+1:]...)
  197. n.items = n.items[:i]
  198. if len(n.children) > 0 {
  199. next.children = append(next.children, n.children[i+1:]...)
  200. n.children = n.children[:i+1]
  201. }
  202. return item, next
  203. }
  204. // maybeSplitChild checks if a child should be split, and if so splits it.
  205. // Returns whether or not a split occurred.
  206. func (n *node) maybeSplitChild(i, maxItems int) bool {
  207. if len(n.children[i].items) < maxItems {
  208. return false
  209. }
  210. first := n.children[i]
  211. item, second := first.split(maxItems / 2)
  212. n.items.insertAt(i, item)
  213. n.children.insertAt(i+1, second)
  214. return true
  215. }
  216. // insert inserts an item into the subtree rooted at this node, making sure
  217. // no nodes in the subtree exceed maxItems items. Should an equivalent item be
  218. // be found/replaced by insert, it will be returned.
  219. func (n *node) insert(item Item, maxItems int) Item {
  220. i, found := n.items.find(item)
  221. if found {
  222. out := n.items[i]
  223. n.items[i] = item
  224. return out
  225. }
  226. if len(n.children) == 0 {
  227. n.items.insertAt(i, item)
  228. return nil
  229. }
  230. if n.maybeSplitChild(i, maxItems) {
  231. inTree := n.items[i]
  232. switch {
  233. case item.Less(inTree):
  234. // no change, we want first split node
  235. case inTree.Less(item):
  236. i++ // we want second split node
  237. default:
  238. out := n.items[i]
  239. n.items[i] = item
  240. return out
  241. }
  242. }
  243. return n.children[i].insert(item, maxItems)
  244. }
  245. // get finds the given key in the subtree and returns it.
  246. func (n *node) get(key Item) Item {
  247. i, found := n.items.find(key)
  248. if found {
  249. return n.items[i]
  250. } else if len(n.children) > 0 {
  251. return n.children[i].get(key)
  252. }
  253. return nil
  254. }
  255. // min returns the first item in the subtree.
  256. func min(n *node) Item {
  257. if n == nil {
  258. return nil
  259. }
  260. for len(n.children) > 0 {
  261. n = n.children[0]
  262. }
  263. if len(n.items) == 0 {
  264. return nil
  265. }
  266. return n.items[0]
  267. }
  268. // max returns the last item in the subtree.
  269. func max(n *node) Item {
  270. if n == nil {
  271. return nil
  272. }
  273. for len(n.children) > 0 {
  274. n = n.children[len(n.children)-1]
  275. }
  276. if len(n.items) == 0 {
  277. return nil
  278. }
  279. return n.items[len(n.items)-1]
  280. }
  281. // toRemove details what item to remove in a node.remove call.
  282. type toRemove int
  283. const (
  284. removeItem toRemove = iota // removes the given item
  285. removeMin // removes smallest item in the subtree
  286. removeMax // removes largest item in the subtree
  287. )
  288. // remove removes an item from the subtree rooted at this node.
  289. func (n *node) remove(item Item, minItems int, typ toRemove) Item {
  290. var i int
  291. var found bool
  292. switch typ {
  293. case removeMax:
  294. if len(n.children) == 0 {
  295. return n.items.pop()
  296. }
  297. i = len(n.items)
  298. case removeMin:
  299. if len(n.children) == 0 {
  300. return n.items.removeAt(0)
  301. }
  302. i = 0
  303. case removeItem:
  304. i, found = n.items.find(item)
  305. if len(n.children) == 0 {
  306. if found {
  307. return n.items.removeAt(i)
  308. }
  309. return nil
  310. }
  311. default:
  312. panic("invalid type")
  313. }
  314. // If we get to here, we have children.
  315. child := n.children[i]
  316. if len(child.items) <= minItems {
  317. return n.growChildAndRemove(i, item, minItems, typ)
  318. }
  319. // Either we had enough items to begin with, or we've done some
  320. // merging/stealing, because we've got enough now and we're ready to return
  321. // stuff.
  322. if found {
  323. // The item exists at index 'i', and the child we've selected can give us a
  324. // predecessor, since if we've gotten here it's got > minItems items in it.
  325. out := n.items[i]
  326. // We use our special-case 'remove' call with typ=maxItem to pull the
  327. // predecessor of item i (the rightmost leaf of our immediate left child)
  328. // and set it into where we pulled the item from.
  329. n.items[i] = child.remove(nil, minItems, removeMax)
  330. return out
  331. }
  332. // Final recursive call. Once we're here, we know that the item isn't in this
  333. // node and that the child is big enough to remove from.
  334. return child.remove(item, minItems, typ)
  335. }
  336. // growChildAndRemove grows child 'i' to make sure it's possible to remove an
  337. // item from it while keeping it at minItems, then calls remove to actually
  338. // remove it.
  339. //
  340. // Most documentation says we have to do two sets of special casing:
  341. // 1) item is in this node
  342. // 2) item is in child
  343. // In both cases, we need to handle the two subcases:
  344. // A) node has enough values that it can spare one
  345. // B) node doesn't have enough values
  346. // For the latter, we have to check:
  347. // a) left sibling has node to spare
  348. // b) right sibling has node to spare
  349. // c) we must merge
  350. // To simplify our code here, we handle cases #1 and #2 the same:
  351. // If a node doesn't have enough items, we make sure it does (using a,b,c).
  352. // We then simply redo our remove call, and the second time (regardless of
  353. // whether we're in case 1 or 2), we'll have enough items and can guarantee
  354. // that we hit case A.
  355. func (n *node) growChildAndRemove(i int, item Item, minItems int, typ toRemove) Item {
  356. child := n.children[i]
  357. if i > 0 && len(n.children[i-1].items) > minItems {
  358. // Steal from left child
  359. stealFrom := n.children[i-1]
  360. stolenItem := stealFrom.items.pop()
  361. child.items.insertAt(0, n.items[i-1])
  362. n.items[i-1] = stolenItem
  363. if len(stealFrom.children) > 0 {
  364. child.children.insertAt(0, stealFrom.children.pop())
  365. }
  366. } else if i < len(n.items) && len(n.children[i+1].items) > minItems {
  367. // steal from right child
  368. stealFrom := n.children[i+1]
  369. stolenItem := stealFrom.items.removeAt(0)
  370. child.items = append(child.items, n.items[i])
  371. n.items[i] = stolenItem
  372. if len(stealFrom.children) > 0 {
  373. child.children = append(child.children, stealFrom.children.removeAt(0))
  374. }
  375. } else {
  376. if i >= len(n.items) {
  377. i--
  378. child = n.children[i]
  379. }
  380. // merge with right child
  381. mergeItem := n.items.removeAt(i)
  382. mergeChild := n.children.removeAt(i + 1)
  383. child.items = append(child.items, mergeItem)
  384. child.items = append(child.items, mergeChild.items...)
  385. child.children = append(child.children, mergeChild.children...)
  386. n.t.freeNode(mergeChild)
  387. }
  388. return n.remove(item, minItems, typ)
  389. }
  390. // iterate provides a simple method for iterating over elements in the tree.
  391. // It could probably use some work to be extra-efficient (it calls from() a
  392. // little more than it should), but it works pretty well for now.
  393. //
  394. // It requires that 'from' and 'to' both return true for values we should hit
  395. // with the iterator. It should also be the case that 'from' returns true for
  396. // values less than or equal to values 'to' returns true for, and 'to'
  397. // returns true for values greater than or equal to those that 'from'
  398. // does.
  399. func (n *node) iterate(from, to func(Item) bool, iter ItemIterator) bool {
  400. for i, item := range n.items {
  401. if !from(item) {
  402. continue
  403. }
  404. if len(n.children) > 0 && !n.children[i].iterate(from, to, iter) {
  405. return false
  406. }
  407. if !to(item) {
  408. return false
  409. }
  410. if !iter(item) {
  411. return false
  412. }
  413. }
  414. if len(n.children) > 0 {
  415. return n.children[len(n.children)-1].iterate(from, to, iter)
  416. }
  417. return true
  418. }
  419. // Used for testing/debugging purposes.
  420. func (n *node) print(w io.Writer, level int) {
  421. fmt.Fprintf(w, "%sNODE:%v\n", strings.Repeat(" ", level), n.items)
  422. for _, c := range n.children {
  423. c.print(w, level+1)
  424. }
  425. }
  426. // BTree is an implementation of a B-Tree.
  427. //
  428. // BTree stores Item instances in an ordered structure, allowing easy insertion,
  429. // removal, and iteration.
  430. //
  431. // Write operations are not safe for concurrent mutation by multiple
  432. // goroutines, but Read operations are.
  433. type BTree struct {
  434. degree int
  435. length int
  436. root *node
  437. freelist *FreeList
  438. }
  439. // maxItems returns the max number of items to allow per node.
  440. func (t *BTree) maxItems() int {
  441. return t.degree*2 - 1
  442. }
  443. // minItems returns the min number of items to allow per node (ignored for the
  444. // root node).
  445. func (t *BTree) minItems() int {
  446. return t.degree - 1
  447. }
  448. func (t *BTree) newNode() (n *node) {
  449. n = t.freelist.newNode()
  450. n.t = t
  451. return
  452. }
  453. func (t *BTree) freeNode(n *node) {
  454. for i := range n.items {
  455. n.items[i] = nil // clear to allow GC
  456. }
  457. n.items = n.items[:0]
  458. for i := range n.children {
  459. n.children[i] = nil // clear to allow GC
  460. }
  461. n.children = n.children[:0]
  462. n.t = nil // clear to allow GC
  463. t.freelist.freeNode(n)
  464. }
  465. // ReplaceOrInsert adds the given item to the tree. If an item in the tree
  466. // already equals the given one, it is removed from the tree and returned.
  467. // Otherwise, nil is returned.
  468. //
  469. // nil cannot be added to the tree (will panic).
  470. func (t *BTree) ReplaceOrInsert(item Item) Item {
  471. if item == nil {
  472. panic("nil item being added to BTree")
  473. }
  474. if t.root == nil {
  475. t.root = t.newNode()
  476. t.root.items = append(t.root.items, item)
  477. t.length++
  478. return nil
  479. } else if len(t.root.items) >= t.maxItems() {
  480. item2, second := t.root.split(t.maxItems() / 2)
  481. oldroot := t.root
  482. t.root = t.newNode()
  483. t.root.items = append(t.root.items, item2)
  484. t.root.children = append(t.root.children, oldroot, second)
  485. }
  486. out := t.root.insert(item, t.maxItems())
  487. if out == nil {
  488. t.length++
  489. }
  490. return out
  491. }
  492. // Delete removes an item equal to the passed in item from the tree, returning
  493. // it. If no such item exists, returns nil.
  494. func (t *BTree) Delete(item Item) Item {
  495. return t.deleteItem(item, removeItem)
  496. }
  497. // DeleteMin removes the smallest item in the tree and returns it.
  498. // If no such item exists, returns nil.
  499. func (t *BTree) DeleteMin() Item {
  500. return t.deleteItem(nil, removeMin)
  501. }
  502. // DeleteMax removes the largest item in the tree and returns it.
  503. // If no such item exists, returns nil.
  504. func (t *BTree) DeleteMax() Item {
  505. return t.deleteItem(nil, removeMax)
  506. }
  507. func (t *BTree) deleteItem(item Item, typ toRemove) Item {
  508. if t.root == nil || len(t.root.items) == 0 {
  509. return nil
  510. }
  511. out := t.root.remove(item, t.minItems(), typ)
  512. if len(t.root.items) == 0 && len(t.root.children) > 0 {
  513. oldroot := t.root
  514. t.root = t.root.children[0]
  515. t.freeNode(oldroot)
  516. }
  517. if out != nil {
  518. t.length--
  519. }
  520. return out
  521. }
  522. // AscendRange calls the iterator for every value in the tree within the range
  523. // [greaterOrEqual, lessThan), until iterator returns false.
  524. func (t *BTree) AscendRange(greaterOrEqual, lessThan Item, iterator ItemIterator) {
  525. if t.root == nil {
  526. return
  527. }
  528. t.root.iterate(
  529. func(a Item) bool { return !a.Less(greaterOrEqual) },
  530. func(a Item) bool { return a.Less(lessThan) },
  531. iterator)
  532. }
  533. // AscendLessThan calls the iterator for every value in the tree within the range
  534. // [first, pivot), until iterator returns false.
  535. func (t *BTree) AscendLessThan(pivot Item, iterator ItemIterator) {
  536. if t.root == nil {
  537. return
  538. }
  539. t.root.iterate(
  540. func(a Item) bool { return true },
  541. func(a Item) bool { return a.Less(pivot) },
  542. iterator)
  543. }
  544. // AscendGreaterOrEqual calls the iterator for every value in the tree within
  545. // the range [pivot, last], until iterator returns false.
  546. func (t *BTree) AscendGreaterOrEqual(pivot Item, iterator ItemIterator) {
  547. if t.root == nil {
  548. return
  549. }
  550. t.root.iterate(
  551. func(a Item) bool { return !a.Less(pivot) },
  552. func(a Item) bool { return true },
  553. iterator)
  554. }
  555. // Ascend calls the iterator for every value in the tree within the range
  556. // [first, last], until iterator returns false.
  557. func (t *BTree) Ascend(iterator ItemIterator) {
  558. if t.root == nil {
  559. return
  560. }
  561. t.root.iterate(
  562. func(a Item) bool { return true },
  563. func(a Item) bool { return true },
  564. iterator)
  565. }
  566. // Get looks for the key item in the tree, returning it. It returns nil if
  567. // unable to find that item.
  568. func (t *BTree) Get(key Item) Item {
  569. if t.root == nil {
  570. return nil
  571. }
  572. return t.root.get(key)
  573. }
  574. // Min returns the smallest item in the tree, or nil if the tree is empty.
  575. func (t *BTree) Min() Item {
  576. return min(t.root)
  577. }
  578. // Max returns the largest item in the tree, or nil if the tree is empty.
  579. func (t *BTree) Max() Item {
  580. return max(t.root)
  581. }
  582. // Has returns true if the given key is in the tree.
  583. func (t *BTree) Has(key Item) bool {
  584. return t.Get(key) != nil
  585. }
  586. // Len returns the number of items currently in the tree.
  587. func (t *BTree) Len() int {
  588. return t.length
  589. }
  590. // Int implements the Item interface for integers.
  591. type Int int
  592. // Less returns true if int(a) < int(b).
  593. func (a Int) Less(b Item) bool {
  594. return a < b.(Int)
  595. }