diff options
| author | Jonathan S <gereeter+code@gmail.com> | 2016-01-20 21:12:39 -0600 |
|---|---|---|
| committer | Jonathan S <gereeter+code@gmail.com> | 2016-02-05 19:49:13 -0600 |
| commit | 762d4ac734e096e8e4ea9fdeb23cf6ce9965d2c3 (patch) | |
| tree | dd19ba21be1aedb637135158c7303207350e07d5 /src | |
| parent | 34af2de4096b3b1c5d3a5b70171c6e27822aaefb (diff) | |
| download | rust-762d4ac734e096e8e4ea9fdeb23cf6ce9965d2c3.tar.gz rust-762d4ac734e096e8e4ea9fdeb23cf6ce9965d2c3.zip | |
Start documenting BTreeMap's node interface
Diffstat (limited to 'src')
| -rw-r--r-- | src/libcollections/btree/node.rs | 152 |
1 files changed, 149 insertions, 3 deletions
diff --git a/src/libcollections/btree/node.rs b/src/libcollections/btree/node.rs index f07962811fd..cfca717f992 100644 --- a/src/libcollections/btree/node.rs +++ b/src/libcollections/btree/node.rs @@ -31,6 +31,16 @@ // Since Rust doesn't acutally have dependent types and polymorphic recursion, // we make do with lots of unsafety. +// A major goal of this module is to avoid complexity by treating the tree as a generic (if +// weirdly shaped) container and avoiding dealing with most of the B-Tree invariants. As such, +// this module doesn't care whether the entries are sorted, which nodes can be underfull, or +// even what underfull means. However, we do rely on a few invariants: +// +// - Trees must have uniform depth/height. This means that every path down to a leaf from a +// given node has exactly the same length. +// - A node of length `n` has `n` keys, `n` values, and (in an internal node) `n + 1` edges. +// This implies that even an empty node has at least one edge. + use alloc::heap; use core::marker::PhantomData; use core::mem; @@ -43,17 +53,39 @@ use boxed::Box; const B: usize = 6; pub const CAPACITY: usize = 2 * B - 1; +/// The underlying representation of leaf nodes. Note that is often unsafe to actually store +/// these, since only the first `len` keys and values are assumed to initialized. See also +/// rust-lang/rfcs#197, which would make this structure significantly more safe by avoiding +/// accidentally dropping unused and uninitialized keys and values. struct LeafNode<K, V> { + // The arrays storing the actual data of the node. Only the first `len` elements of each + // array are initialized and valid. keys: [K; CAPACITY], vals: [V; CAPACITY], + + // We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`. + // This either points to an actual node or is null. parent: *const InternalNode<K, V>, + + // The index into the parent node's `edges` array. `*node.parent.edges[node.parent_idx]` + // should be the same thing as `node`. + // This is only guaranteed to be initialized when `parent` is nonnull. parent_idx: u16, + + // The number of keys and values this node stores. + // This is at the end of the node's representation and next to `parent_idx` to encourage + // the compiler to join `len` and `parent_idx` into the same 32-bit word, reducing space + // overhead. len: u16, } impl<K, V> LeafNode<K, V> { + /// Create a new `LeafNode`. Unsafe because all nodes should really be hidden behind + /// `BoxedNode`, preventing accidental dropping of uninitialized keys and values. unsafe fn new() -> Self { LeafNode { + // As a general policy, we leave fields uninitialized if they can be, as this should + // be both slightly faster and easier to track in Valgrind. keys: mem::uninitialized(), vals: mem::uninitialized(), parent: ptr::null(), @@ -63,15 +95,25 @@ impl<K, V> LeafNode<K, V> { } } -// We use repr(C) so that a pointer to an internal node can be -// directly used as a pointer to a leaf node +/// The underlying representation of internal nodes. As with `LeafNode`s, these should be hidden +/// behind `BoxedNode`s to prevent dropping uninitialized keys and values. Any pointer to an +/// `InternalNode` can be directly casted to a pointer to the underlying `LeafNode` portion of the +/// node, allowing code to act on leaf and internal nodes generically without having to even check +/// which of the two a pointer is pointing at. This property is enabled by the use of `repr(C)`. #[repr(C)] struct InternalNode<K, V> { data: LeafNode<K, V>, + + // `len + 1` of these are considered to be initialized and valid. edges: [BoxedNode<K, V>; 2 * B], } impl<K, V> InternalNode<K, V> { + /// Create a new `InternalNode`. This is unsafe for two reasons. First, it returns an + /// `InternalNode` by value, risking dropping of uninitialized fields. Second, an invariant of + /// internal nodes is that `len + 1` edges are initialized and valid, meaning that even when + /// the node is empty (having `len` 0), there must be one initialized and valid edge. This + /// function does not set up such an edge. unsafe fn new() -> Self { InternalNode { data: LeafNode::new(), @@ -80,8 +122,12 @@ impl<K, V> InternalNode<K, V> { } } +/// An owned pointer to a node. This basically is either `Box<LeafNode<K, V>>` or +/// `Box<InternalNode<K, V>>`. However, it contains no information as to which of the two types +/// of nodes is acutally behind the box, and, partially due to this lack of information, has no +/// destructor. struct BoxedNode<K, V> { - ptr: Unique<LeafNode<K, V>> // we don't know if this points to a leaf node or an internal node + ptr: Unique<LeafNode<K, V>> } impl<K, V> BoxedNode<K, V> { @@ -229,6 +275,7 @@ impl<K, V> Root<K, V> { pub struct NodeRef<BorrowType, K, V, Type> { height: usize, node: NonZero<*const LeafNode<K, V>>, + // This is null unless the borrow type is `Mut` root: *const Root<K, V>, _marker: PhantomData<(BorrowType, Type)> } @@ -268,10 +315,14 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Internal> { impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> { + /// Find the length of the node. This is the number of keys or values. In an + /// internal node, the number of edges is `len() + 1`. pub fn len(&self) -> usize { self.as_leaf().len as usize } + /// Remove any static information about whether this node is a `Leaf` or an + /// `Internal` node. pub fn forget_type(self) -> NodeRef<BorrowType, K, V, marker::LeafOrInternal> { NodeRef { height: self.height, @@ -281,6 +332,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> { } } + /// Temporarily take out another, immutable reference to the same node. fn reborrow<'a>(&'a self) -> NodeRef<marker::Immut<'a>, K, V, Type> { NodeRef { height: self.height, @@ -304,6 +356,13 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> { self.reborrow().into_slices().1 } + /// Find the parent of the current node. Returns `Ok(handle)` if the current + /// node actually has a parent, where `handle` points to the edge of the parent + /// that points to the current node. Returns `Err(self)` if the current node has + /// no parent, giving back the original `NodeRef`. + /// + /// `edge.descend().ascend().unwrap()` and `node.ascend().unwrap().descend()` should + /// both, upon success, do nothing. pub fn ascend(self) -> Result< Handle< NodeRef< @@ -344,6 +403,9 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> { } impl<K, V> NodeRef<marker::Owned, K, V, marker::Leaf> { + /// Similar to `ascend`, get a reference to a node's parent node, but also + /// deallocate the current node in the process. This is unsafe because the + /// current node will still be accessible despite being deallocated. pub unsafe fn deallocate_and_ascend(self) -> Option< Handle< NodeRef< @@ -362,6 +424,9 @@ impl<K, V> NodeRef<marker::Owned, K, V, marker::Leaf> { } impl<K, V> NodeRef<marker::Owned, K, V, marker::Internal> { + /// Similar to `ascend`, get a reference to a node's parent node, but also + /// deallocate the current node in the process. This is unsafe because the + /// current node will still be accessible despite being deallocated. pub unsafe fn deallocate_and_ascend(self) -> Option< Handle< NodeRef< @@ -384,6 +449,8 @@ impl<K, V> NodeRef<marker::Owned, K, V, marker::Internal> { } impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> { + /// Unsafely assert to the compiler some static information about whether this + /// node is a `Leaf`. unsafe fn cast_unchecked<NewType>(&mut self) -> NodeRef<marker::Mut, K, V, NewType> { @@ -395,6 +462,16 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> { } } + /// Temporarily take out another, mutable reference to the same node. Beware, as + /// this method is very dangerous, doubly so since it may not immediately appear + /// dangerous. + /// + /// Because mutable pointers can roam anywhere around the tree and can even (through + /// `into_root_mut`) mess with the root of the tree, the result of `reborrow_mut` + /// can easily be used to make the original mutable pointer dangling, or, in the case + /// of a reborrowed handle, out of bounds. + // FIXME(@gereeter) consider adding yet another type parameter to `NodeRef` that restricts + // the use of `ascend` and `into_root_mut` on reborrowed pointers, preventing this unsafety. unsafe fn reborrow_mut(&mut self) -> NodeRef<marker::Mut, K, V, Type> { NodeRef { height: self.height, @@ -437,6 +514,8 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Immut<'a>, K, V, Type> { } impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Mut<'a>, K, V, Type> { + /// Get a mutable reference to the root itself. This is useful primarily when the + /// height of the tree needs to be adjusted. Never call this on a reborrowed pointer. pub fn into_root_mut(self) -> &'a mut Root<K, V> { unsafe { &mut *(self.root as *mut Root<K, V>) @@ -460,6 +539,7 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Mut<'a>, K, V, Type> { } impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Leaf> { + /// Add a key/value pair the end of the node. pub fn push(&mut self, key: K, val: V) { // Necessary for correctness, but this is an internal module debug_assert!(self.len() < CAPACITY); @@ -474,6 +554,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Leaf> { self.as_leaf_mut().len += 1; } + /// Add a key/value pair to the beginning of the node. pub fn push_front(&mut self, key: K, val: V) { // Necessary for correctness, but this is an internal module debug_assert!(self.len() < CAPACITY); @@ -488,6 +569,8 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Leaf> { } impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Internal> { + /// Add a key/value pair and an edge to go to the right of that pair to + /// the end of the node. pub fn push(&mut self, key: K, val: V, edge: Root<K, V>) { // Necessary for correctness, but this is an internal module debug_assert!(edge.height == self.height - 1); @@ -506,6 +589,8 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Internal> { } } + /// Add a key/value pair and an edge to go to the left of that pair to + /// the beginning of the node. pub fn push_front(&mut self, key: K, val: V, edge: Root<K, V>) { // Necessary for correctness, but this is an internal module debug_assert!(edge.height == self.height - 1); @@ -534,6 +619,8 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Internal> { } impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> { + /// Remove a key/value pair from the end of this node. If this is an internal node, + /// also remove the edge that was to the right of that pair. pub fn pop(&mut self) -> (K, V, Option<Root<K, V>>) { // Necessary for correctness, but this is an internal module debug_assert!(self.len() > 0); @@ -558,6 +645,8 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> { } } + /// Remove a key/value pair from the beginning of this node. If this is an internal node, + /// also remove the edge that was to the left of that pair. pub fn pop_front(&mut self) -> (K, V, Option<Root<K, V>>) { // Necessary for correctness, but this is an internal module debug_assert!(self.len() > 0); @@ -597,6 +686,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> { } impl<BorrowType, K, V> NodeRef<BorrowType, K, V, marker::LeafOrInternal> { + /// Check whether a node is an `Internal` node or a `Leaf` node. pub fn force(self) -> ForceResult< NodeRef<BorrowType, K, V, marker::Leaf>, NodeRef<BorrowType, K, V, marker::Internal> @@ -619,6 +709,9 @@ impl<BorrowType, K, V> NodeRef<BorrowType, K, V, marker::LeafOrInternal> { } } +/// A reference to a specific key/value pair or edge within a node. The `Node` parameter +/// must be a `NodeRef`, while the `Type` can either be `KV` (signifying a handle on a key/value +/// pair) or `Edge` (signifying a handle on an edge). pub struct Handle<Node, Type> { node: Node, idx: usize, @@ -626,6 +719,8 @@ pub struct Handle<Node, Type> { } impl<Node: Copy, Type> Copy for Handle<Node, Type> { } +// We don't need the full generality of `#[derive(Clone)]`, as the only time `Node` will be +// `Clone`able is when it is an immutable reference and therefore `Copy`. impl<Node: Copy, Type> Clone for Handle<Node, Type> { fn clone(&self) -> Self { *self @@ -633,12 +728,14 @@ impl<Node: Copy, Type> Clone for Handle<Node, Type> { } impl<Node, Type> Handle<Node, Type> { + /// Retrieve the node that contains the edge of key/value pair this handle pointes to. pub fn into_node(self) -> Node { self.node } } impl<BorrowType, K, V, NodeType> Handle<NodeRef<BorrowType, K, V, NodeType>, marker::KV> { + /// Create a new handle to a key/value pair in `node`. `idx` must be less than `node.len()`. pub fn new_kv(node: NodeRef<BorrowType, K, V, NodeType>, idx: usize) -> Self { // Necessary for correctness, but in a private module debug_assert!(idx < node.len()); @@ -670,6 +767,7 @@ impl<BorrowType, K, V, NodeType, HandleType> PartialEq impl<BorrowType, K, V, NodeType, HandleType> Handle<NodeRef<BorrowType, K, V, NodeType>, HandleType> { + /// Temporarily take out another, immutable handle on the same location. pub fn reborrow(&self) -> Handle<NodeRef<marker::Immut, K, V, NodeType>, HandleType> { @@ -685,6 +783,16 @@ impl<BorrowType, K, V, NodeType, HandleType> impl<'a, K, V, NodeType, HandleType> Handle<NodeRef<marker::Mut<'a>, K, V, NodeType>, HandleType> { + /// Temporarily take out another, mutable handle on the same location. Beware, as + /// this method is very dangerous, doubly so since it may not immediately appear + /// dangerous. + /// + /// Because mutable pointers can roam anywhere around the tree and can even (through + /// `into_root_mut`) mess with the root of the tree, the result of `reborrow_mut` + /// can easily be used to make the original mutable pointer dangling, or, in the case + /// of a reborrowed handle, out of bounds. + // FIXME(@gereeter) consider adding yet another type parameter to `NodeRef` that restricts + // the use of `ascend` and `into_root_mut` on reborrowed pointers, preventing this unsafety. pub unsafe fn reborrow_mut(&mut self) -> Handle<NodeRef<marker::Mut, K, V, NodeType>, HandleType> { @@ -700,6 +808,8 @@ impl<'a, K, V, NodeType, HandleType> impl<BorrowType, K, V, NodeType> Handle<NodeRef<BorrowType, K, V, NodeType>, marker::Edge> { + /// Create a new handle to an edge in `node`. `idx` must be less than or equal to + /// `node.len()`. pub fn new_edge(node: NodeRef<BorrowType, K, V, NodeType>, idx: usize) -> Self { // Necessary for correctness, but in a private module debug_assert!(idx <= node.len()); @@ -733,6 +843,10 @@ impl<BorrowType, K, V, NodeType> } impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::Edge> { + /// Insert a new key/value pair into the space defined by this edge, assuming that there + /// is enough space in the node for the new pair to fit. + /// + /// The returned pointer points to the inserted value. fn insert_fit(&mut self, key: K, val: V) -> *mut V { // Necessary for correctness, but in a private module debug_assert!(self.node.len() < CAPACITY); @@ -747,6 +861,10 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::Edge } } + /// Insert a new key/value pair into the space defined by this edge, possibly splitting + /// the node if there isn't enough room. + /// + /// The returned pointer points to the inserted value. pub fn insert(mut self, key: K, val: V) -> (InsertResult<'a, K, V, marker::Leaf>, *mut V) { @@ -774,6 +892,8 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::Edge } impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::Edge> { + /// Fix the parent pointer and index in the child node below this edge. This is useful + /// when the ordering of edges has been changed, such as in the various `insert` methods. fn correct_parent_link(mut self) { let idx = self.idx as u16; let ptr = self.node.as_internal_mut() as *mut _; @@ -782,18 +902,24 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker:: child.as_leaf_mut().parent_idx = idx; } + /// Unsafely assert to the compiler some static information about whether the underlying + /// node of this handle is a `Leaf`. unsafe fn cast_unchecked<NewType>(&mut self) -> Handle<NodeRef<marker::Mut, K, V, NewType>, marker::Edge> { Handle::new_edge(self.node.cast_unchecked(), self.idx) } + /// Insert a new key/value pair into the space defined by this edge, as well as a new edge + /// to go to the right of the new pair, assuming that there is enough space in the node for + /// the new pair to fit. fn insert_fit(&mut self, key: K, val: V, edge: Root<K, V>) { // Necessary for correctness, but in an internal module debug_assert!(self.node.len() < CAPACITY); debug_assert!(edge.height == self.node.height - 1); unsafe { + // This cast is a lie, but it allows us to reuse the key/value insertion logic. self.cast_unchecked::<marker::Leaf>().insert_fit(key, val); slice_insert( @@ -811,6 +937,9 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker:: } } + /// Insert a new key/value pair into the space defined by this edge, as well as a new edge + /// to go to the right of the new pair, possibly splitting the node if there isn't enough + /// room. pub fn insert(mut self, key: K, val: V, edge: Root<K, V>) -> InsertResult<'a, K, V, marker::Internal> { @@ -843,6 +972,10 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker:: impl<BorrowType, K, V> Handle<NodeRef<BorrowType, K, V, marker::Internal>, marker::Edge> { + /// Find the node pointed to by this edge. + /// + /// `edge.descend().ascend().unwrap()` and `node.ascend().unwrap().descend()` should + /// both, upon success, do nothing. pub fn descend(self) -> NodeRef<BorrowType, K, V, marker::LeafOrInternal> { NodeRef { height: self.node.height - 1, @@ -885,6 +1018,9 @@ impl<'a, K, V, NodeType> Handle<NodeRef<marker::Mut<'a>, K, V, NodeType>, marker } impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::KV> { + /// Split the underlying node into three parts. The node is truncated in place to what is + /// to the left of this handle, and the key and value pointed at by this handle and the + /// rest of the node to the right of this handle are returned separately. pub fn split(mut self) -> (NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, K, V, Root<K, V>) { unsafe { @@ -920,6 +1056,8 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::KV> } } + /// Remove the key/value pair pointed to by this handle, returning the edge representing + /// the empty space left behind. pub fn remove(mut self) -> (Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::Edge>, K, V) { unsafe { @@ -932,6 +1070,9 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::KV> } impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::KV> { + /// Split the underlying node into three parts. The node is truncated in place to what is + /// to the left of this handle, and the key and value pointed at by this handle and the + /// rest of the node to the right of this handle are returned separately. pub fn split(mut self) -> (NodeRef<marker::Mut<'a>, K, V, marker::Internal>, K, V, Root<K, V>) { unsafe { @@ -979,6 +1120,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker:: } } + /// Return whether it is valid to call `.merge()`. pub fn can_merge(&self) -> bool { ( self.reborrow() @@ -993,6 +1135,9 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker:: ) <= CAPACITY } + /// Combine the node immediately to the left of this handle, the key/value pair pointed + /// to by this handle, and the node immediately to the right of this handle into one new + /// child of the underlying node, returning an edge referencing that new child. pub fn merge(mut self) -> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::Edge> { let self1 = unsafe { ptr::read(&self) }; @@ -1068,6 +1213,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker:: impl<BorrowType, K, V, HandleType> Handle<NodeRef<BorrowType, K, V, marker::LeafOrInternal>, HandleType> { + /// Check whether the underlying node is an `Internal` node or a `Leaf` node. pub fn force(self) -> ForceResult< Handle<NodeRef<BorrowType, K, V, marker::Leaf>, HandleType>, Handle<NodeRef<BorrowType, K, V, marker::Internal>, HandleType> |
