btree.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. // Copyright 2014 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package btree implements in-memory B-Trees of arbitrary degree.
  15. //
  16. // btree implements an in-memory B-Tree for use as an ordered data structure.
  17. // It is not meant for persistent storage solutions.
  18. //
  19. // It has a flatter structure than an equivalent red-black or other binary tree,
  20. // which in some cases yields better memory usage and/or performance.
  21. // See some discussion on the matter here:
  22. // http://google-opensource.blogspot.com/2013/01/c-containers-that-save-memory-and-time.html
  23. // Note, though, that this project is in no way related to the C++ B-Tree
  24. // implmentation written about there.
  25. //
  26. // Within this tree, each node contains a slice of items and a (possibly nil)
  27. // slice of children. For basic numeric values or raw structs, this can cause
  28. // efficiency differences when compared to equivalent C++ template code that
  29. // stores values in arrays within the node:
  30. // * Due to the overhead of storing values as interfaces (each
  31. // value needs to be stored as the value itself, then 2 words for the
  32. // interface pointing to that value and its type), resulting in higher
  33. // memory use.
  34. // * Since interfaces can point to values anywhere in memory, values are
  35. // most likely not stored in contiguous blocks, resulting in a higher
  36. // number of cache misses.
  37. // These issues don't tend to matter, though, when working with strings or other
  38. // heap-allocated structures, since C++-equivalent structures also must store
  39. // pointers and also distribute their values across the heap.
  40. //
  41. // This implementation is designed to be a drop-in replacement to gollrb.LLRB
  42. // trees, (http://github.com/petar/gollrb), an excellent and probably the most
  43. // widely used ordered tree implementation in the Go ecosystem currently.
  44. // Its functions, therefore, exactly mirror those of
  45. // llrb.LLRB where possible. Unlike gollrb, though, we currently don't
  46. // support storing multiple equivalent values or backwards iteration.
  47. package btree
  48. import (
  49. "fmt"
  50. "io"
  51. "sort"
  52. "strings"
  53. )
  54. // Item represents a single object in the tree.
  55. type Item interface {
  56. // Less tests whether the current item is less than the given argument.
  57. //
  58. // This must provide a strict weak ordering.
  59. // If !a.Less(b) && !b.Less(a), we treat this to mean a == b (i.e. we can only
  60. // hold one of either a or b in the tree).
  61. Less(than Item) bool
  62. }
  63. // ItemIterator allows callers of Ascend* to iterate in-order over portions of
  64. // the tree. When this function returns false, iteration will stop and the
  65. // associated Ascend* function will immediately return.
  66. type ItemIterator func(i Item) bool
  67. // New creates a new B-Tree with the given degree.
  68. //
  69. // New(2), for example, will create a 2-3-4 tree (each node contains 1-3 items
  70. // and 2-4 children).
  71. func New(degree int) *BTree {
  72. if degree <= 1 {
  73. panic("bad degree")
  74. }
  75. return &BTree{
  76. degree: degree,
  77. freelist: make([]*node, 0, 32),
  78. }
  79. }
  80. // items stores items in a node.
  81. type items []Item
  82. // insertAt inserts a value into the given index, pushing all subsequent values
  83. // forward.
  84. func (s *items) insertAt(index int, item Item) {
  85. *s = append(*s, nil)
  86. if index < len(*s) {
  87. copy((*s)[index+1:], (*s)[index:])
  88. }
  89. (*s)[index] = item
  90. }
  91. // removeAt removes a value at a given index, pulling all subsequent values
  92. // back.
  93. func (s *items) removeAt(index int) Item {
  94. item := (*s)[index]
  95. copy((*s)[index:], (*s)[index+1:])
  96. *s = (*s)[:len(*s)-1]
  97. return item
  98. }
  99. // pop removes and returns the last element in the list.
  100. func (s *items) pop() (out Item) {
  101. index := len(*s) - 1
  102. out, *s = (*s)[index], (*s)[:index]
  103. return
  104. }
  105. // find returns the index where the given item should be inserted into this
  106. // list. 'found' is true if the item already exists in the list at the given
  107. // index.
  108. func (s items) find(item Item) (index int, found bool) {
  109. i := sort.Search(len(s), func(i int) bool {
  110. return item.Less(s[i])
  111. })
  112. if i > 0 && !s[i-1].Less(item) {
  113. return i - 1, true
  114. }
  115. return i, false
  116. }
  117. // children stores child nodes in a node.
  118. type children []*node
  119. // insertAt inserts a value into the given index, pushing all subsequent values
  120. // forward.
  121. func (s *children) insertAt(index int, n *node) {
  122. *s = append(*s, nil)
  123. if index < len(*s) {
  124. copy((*s)[index+1:], (*s)[index:])
  125. }
  126. (*s)[index] = n
  127. }
  128. // removeAt removes a value at a given index, pulling all subsequent values
  129. // back.
  130. func (s *children) removeAt(index int) *node {
  131. n := (*s)[index]
  132. copy((*s)[index:], (*s)[index+1:])
  133. *s = (*s)[:len(*s)-1]
  134. return n
  135. }
  136. // pop removes and returns the last element in the list.
  137. func (s *children) pop() (out *node) {
  138. index := len(*s) - 1
  139. out, *s = (*s)[index], (*s)[:index]
  140. return
  141. }
  142. // node is an internal node in a tree.
  143. //
  144. // It must at all times maintain the invariant that either
  145. // * len(children) == 0, len(items) unconstrained
  146. // * len(children) == len(items) + 1
  147. type node struct {
  148. items items
  149. children children
  150. t *BTree
  151. }
  152. // split splits the given node at the given index. The current node shrinks,
  153. // and this function returns the item that existed at that index and a new node
  154. // containing all items/children after it.
  155. func (n *node) split(i int) (Item, *node) {
  156. item := n.items[i]
  157. next := n.t.newNode()
  158. next.items = append(next.items, n.items[i+1:]...)
  159. n.items = n.items[:i]
  160. if len(n.children) > 0 {
  161. next.children = append(next.children, n.children[i+1:]...)
  162. n.children = n.children[:i+1]
  163. }
  164. return item, next
  165. }
  166. // maybeSplitChild checks if a child should be split, and if so splits it.
  167. // Returns whether or not a split occurred.
  168. func (n *node) maybeSplitChild(i, maxItems int) bool {
  169. if len(n.children[i].items) < maxItems {
  170. return false
  171. }
  172. first := n.children[i]
  173. item, second := first.split(maxItems / 2)
  174. n.items.insertAt(i, item)
  175. n.children.insertAt(i+1, second)
  176. return true
  177. }
  178. // insert inserts an item into the subtree rooted at this node, making sure
  179. // no nodes in the subtree exceed maxItems items. Should an equivalent item be
  180. // be found/replaced by insert, it will be returned.
  181. func (n *node) insert(item Item, maxItems int) Item {
  182. i, found := n.items.find(item)
  183. if found {
  184. out := n.items[i]
  185. n.items[i] = item
  186. return out
  187. }
  188. if len(n.children) == 0 {
  189. n.items.insertAt(i, item)
  190. return nil
  191. }
  192. if n.maybeSplitChild(i, maxItems) {
  193. inTree := n.items[i]
  194. switch {
  195. case item.Less(inTree):
  196. // no change, we want first split node
  197. case inTree.Less(item):
  198. i++ // we want second split node
  199. default:
  200. out := n.items[i]
  201. n.items[i] = item
  202. return out
  203. }
  204. }
  205. return n.children[i].insert(item, maxItems)
  206. }
  207. // get finds the given key in the subtree and returns it.
  208. func (n *node) get(key Item) Item {
  209. i, found := n.items.find(key)
  210. if found {
  211. return n.items[i]
  212. } else if len(n.children) > 0 {
  213. return n.children[i].get(key)
  214. }
  215. return nil
  216. }
  217. // toRemove details what item to remove in a node.remove call.
  218. type toRemove int
  219. const (
  220. removeItem toRemove = iota // removes the given item
  221. removeMin // removes smallest item in the subtree
  222. removeMax // removes largest item in the subtree
  223. )
  224. // remove removes an item from the subtree rooted at this node.
  225. func (n *node) remove(item Item, minItems int, typ toRemove) Item {
  226. var i int
  227. var found bool
  228. switch typ {
  229. case removeMax:
  230. if len(n.children) == 0 {
  231. return n.items.pop()
  232. }
  233. i = len(n.items)
  234. case removeMin:
  235. if len(n.children) == 0 {
  236. return n.items.removeAt(0)
  237. }
  238. i = 0
  239. case removeItem:
  240. i, found = n.items.find(item)
  241. if len(n.children) == 0 {
  242. if found {
  243. return n.items.removeAt(i)
  244. }
  245. return nil
  246. }
  247. default:
  248. panic("invalid type")
  249. }
  250. // If we get to here, we have children.
  251. child := n.children[i]
  252. if len(child.items) <= minItems {
  253. return n.growChildAndRemove(i, item, minItems, typ)
  254. }
  255. // Either we had enough items to begin with, or we've done some
  256. // merging/stealing, because we've got enough now and we're ready to return
  257. // stuff.
  258. if found {
  259. // The item exists at index 'i', and the child we've selected can give us a
  260. // predecessor, since if we've gotten here it's got > minItems items in it.
  261. out := n.items[i]
  262. // We use our special-case 'remove' call with typ=maxItem to pull the
  263. // predecessor of item i (the rightmost leaf of our immediate left child)
  264. // and set it into where we pulled the item from.
  265. n.items[i] = child.remove(nil, minItems, removeMax)
  266. return out
  267. }
  268. // Final recursive call. Once we're here, we know that the item isn't in this
  269. // node and that the child is big enough to remove from.
  270. return child.remove(item, minItems, typ)
  271. }
  272. // growChildAndRemove grows child 'i' to make sure it's possible to remove an
  273. // item from it while keeping it at minItems, then calls remove to actually
  274. // remove it.
  275. //
  276. // Most documentation says we have to do two sets of special casing:
  277. // 1) item is in this node
  278. // 2) item is in child
  279. // In both cases, we need to handle the two subcases:
  280. // A) node has enough values that it can spare one
  281. // B) node doesn't have enough values
  282. // For the latter, we have to check:
  283. // a) left sibling has node to spare
  284. // b) right sibling has node to spare
  285. // c) we must merge
  286. // To simplify our code here, we handle cases #1 and #2 the same:
  287. // If a node doesn't have enough items, we make sure it does (using a,b,c).
  288. // We then simply redo our remove call, and the second time (regardless of
  289. // whether we're in case 1 or 2), we'll have enough items and can guarantee
  290. // that we hit case A.
  291. func (n *node) growChildAndRemove(i int, item Item, minItems int, typ toRemove) Item {
  292. child := n.children[i]
  293. if i > 0 && len(n.children[i-1].items) > minItems {
  294. // Steal from left child
  295. stealFrom := n.children[i-1]
  296. stolenItem := stealFrom.items.pop()
  297. child.items.insertAt(0, n.items[i-1])
  298. n.items[i-1] = stolenItem
  299. if len(stealFrom.children) > 0 {
  300. child.children.insertAt(0, stealFrom.children.pop())
  301. }
  302. } else if i < len(n.items) && len(n.children[i+1].items) > minItems {
  303. // steal from right child
  304. stealFrom := n.children[i+1]
  305. stolenItem := stealFrom.items.removeAt(0)
  306. child.items = append(child.items, n.items[i])
  307. n.items[i] = stolenItem
  308. if len(stealFrom.children) > 0 {
  309. child.children = append(child.children, stealFrom.children.removeAt(0))
  310. }
  311. } else {
  312. if i >= len(n.items) {
  313. i--
  314. child = n.children[i]
  315. }
  316. // merge with right child
  317. mergeItem := n.items.removeAt(i)
  318. mergeChild := n.children.removeAt(i + 1)
  319. child.items = append(child.items, mergeItem)
  320. child.items = append(child.items, mergeChild.items...)
  321. child.children = append(child.children, mergeChild.children...)
  322. n.t.freeNode(mergeChild)
  323. }
  324. return n.remove(item, minItems, typ)
  325. }
  326. // iterate provides a simple method for iterating over elements in the tree.
  327. // It could probably use some work to be extra-efficient (it calls from() a
  328. // little more than it should), but it works pretty well for now.
  329. //
  330. // It requires that 'from' and 'to' both return true for values we should hit
  331. // with the iterator. It should also be the case that 'from' returns true for
  332. // values less than or equal to values 'to' returns true for, and 'to'
  333. // returns true for values greater than or equal to those that 'from'
  334. // does.
  335. func (n *node) iterate(from, to func(Item) bool, iter ItemIterator) bool {
  336. for i, item := range n.items {
  337. if !from(item) {
  338. continue
  339. }
  340. if len(n.children) > 0 && !n.children[i].iterate(from, to, iter) {
  341. return false
  342. }
  343. if !to(item) {
  344. return false
  345. }
  346. if !iter(item) {
  347. return false
  348. }
  349. }
  350. if len(n.children) > 0 {
  351. return n.children[len(n.children)-1].iterate(from, to, iter)
  352. }
  353. return true
  354. }
  355. // Used for testing/debugging purposes.
  356. func (n *node) print(w io.Writer, level int) {
  357. fmt.Fprintf(w, "%sNODE:%v\n", strings.Repeat(" ", level), n.items)
  358. for _, c := range n.children {
  359. c.print(w, level+1)
  360. }
  361. }
  362. // BTree is an implementation of a B-Tree.
  363. //
  364. // BTree stores Item instances in an ordered structure, allowing easy insertion,
  365. // removal, and iteration.
  366. //
  367. // Write operations are not safe for concurrent mutation by multiple
  368. // goroutines, but Read operations are.
  369. type BTree struct {
  370. degree int
  371. length int
  372. root *node
  373. freelist []*node
  374. }
  375. // maxItems returns the max number of items to allow per node.
  376. func (t *BTree) maxItems() int {
  377. return t.degree*2 - 1
  378. }
  379. // minItems returns the min number of items to allow per node (ignored for the
  380. // root node).
  381. func (t *BTree) minItems() int {
  382. return t.degree - 1
  383. }
  384. func (t *BTree) newNode() (n *node) {
  385. index := len(t.freelist) - 1
  386. if index < 0 {
  387. return &node{t: t}
  388. }
  389. t.freelist, n = t.freelist[:index], t.freelist[index]
  390. return
  391. }
  392. func (t *BTree) freeNode(n *node) {
  393. if len(t.freelist) < cap(t.freelist) {
  394. for i := range n.items {
  395. n.items[i] = nil // clear to allow GC
  396. }
  397. n.items = n.items[:0]
  398. for i := range n.children {
  399. n.children[i] = nil // clear to allow GC
  400. }
  401. n.children = n.children[:0]
  402. t.freelist = append(t.freelist, n)
  403. }
  404. }
  405. // ReplaceOrInsert adds the given item to the tree. If an item in the tree
  406. // already equals the given one, it is removed from the tree and returned.
  407. // Otherwise, nil is returned.
  408. //
  409. // nil cannot be added to the tree (will panic).
  410. func (t *BTree) ReplaceOrInsert(item Item) Item {
  411. if item == nil {
  412. panic("nil item being added to BTree")
  413. }
  414. if t.root == nil {
  415. t.root = t.newNode()
  416. t.root.items = append(t.root.items, item)
  417. t.length++
  418. return nil
  419. } else if len(t.root.items) >= t.maxItems() {
  420. item2, second := t.root.split(t.maxItems() / 2)
  421. oldroot := t.root
  422. t.root = t.newNode()
  423. t.root.items = append(t.root.items, item2)
  424. t.root.children = append(t.root.children, oldroot, second)
  425. }
  426. out := t.root.insert(item, t.maxItems())
  427. if out == nil {
  428. t.length++
  429. }
  430. return out
  431. }
  432. // Delete removes an item equal to the passed in item from the tree, returning
  433. // it. If no such item exists, returns nil.
  434. func (t *BTree) Delete(item Item) Item {
  435. return t.deleteItem(item, removeItem)
  436. }
  437. // DeleteMin removes the smallest item in the tree and returns it.
  438. // If no such item exists, returns nil.
  439. func (t *BTree) DeleteMin() Item {
  440. return t.deleteItem(nil, removeMin)
  441. }
  442. // DeleteMax removes the largest item in the tree and returns it.
  443. // If no such item exists, returns nil.
  444. func (t *BTree) DeleteMax() Item {
  445. return t.deleteItem(nil, removeMax)
  446. }
  447. func (t *BTree) deleteItem(item Item, typ toRemove) Item {
  448. if t.root == nil || len(t.root.items) == 0 {
  449. return nil
  450. }
  451. out := t.root.remove(item, t.minItems(), typ)
  452. if len(t.root.items) == 0 && len(t.root.children) > 0 {
  453. oldroot := t.root
  454. t.root = t.root.children[0]
  455. t.freeNode(oldroot)
  456. }
  457. if out != nil {
  458. t.length--
  459. }
  460. return out
  461. }
  462. // AscendRange calls the iterator for every value in the tree within the range
  463. // [greaterOrEqual, lessThan), until iterator returns false.
  464. func (t *BTree) AscendRange(greaterOrEqual, lessThan Item, iterator ItemIterator) {
  465. if t.root == nil {
  466. return
  467. }
  468. t.root.iterate(
  469. func(a Item) bool { return !a.Less(greaterOrEqual) },
  470. func(a Item) bool { return a.Less(lessThan) },
  471. iterator)
  472. }
  473. // AscendLessThan calls the iterator for every value in the tree within the range
  474. // [first, pivot), until iterator returns false.
  475. func (t *BTree) AscendLessThan(pivot Item, iterator ItemIterator) {
  476. if t.root == nil {
  477. return
  478. }
  479. t.root.iterate(
  480. func(a Item) bool { return true },
  481. func(a Item) bool { return a.Less(pivot) },
  482. iterator)
  483. }
  484. // AscendGreaterOrEqual calls the iterator for every value in the tree within
  485. // the range [pivot, last], until iterator returns false.
  486. func (t *BTree) AscendGreaterOrEqual(pivot Item, iterator ItemIterator) {
  487. if t.root == nil {
  488. return
  489. }
  490. t.root.iterate(
  491. func(a Item) bool { return !a.Less(pivot) },
  492. func(a Item) bool { return true },
  493. iterator)
  494. }
  495. // Ascend calls the iterator for every value in the tree within the range
  496. // [first, last], until iterator returns false.
  497. func (t *BTree) Ascend(iterator ItemIterator) {
  498. if t.root == nil {
  499. return
  500. }
  501. t.root.iterate(
  502. func(a Item) bool { return true },
  503. func(a Item) bool { return true },
  504. iterator)
  505. }
  506. // Get looks for the key item in the tree, returning it. It returns nil if
  507. // unable to find that item.
  508. func (t *BTree) Get(key Item) Item {
  509. if t.root == nil {
  510. return nil
  511. }
  512. return t.root.get(key)
  513. }
  514. // Has returns true if the given key is in the tree.
  515. func (t *BTree) Has(key Item) bool {
  516. return t.Get(key) != nil
  517. }
  518. // Len returns the number of items currently in the tree.
  519. func (t *BTree) Len() int {
  520. return t.length
  521. }
  522. // Int implements the Item interface for integers.
  523. type Int int
  524. // Less returns true if int(a) < int(b).
  525. func (a Int) Less(b Item) bool {
  526. return a < b.(Int)
  527. }