diff options
| author | Ralf Jung <post@ralfj.de> | 2018-12-07 16:52:10 +0100 |
|---|---|---|
| committer | Ralf Jung <post@ralfj.de> | 2018-12-09 17:44:52 +0100 |
| commit | 0e70c269feece979427bcc8795afc43efc1e47c1 (patch) | |
| tree | 203c53e0ffefbfa4675f606df4b89fb2b45f9317 /src/liballoc | |
| parent | 1ccb5b219d50b1bc96dbb85e82a8473f16422582 (diff) | |
| download | rust-0e70c269feece979427bcc8795afc43efc1e47c1.tar.gz rust-0e70c269feece979427bcc8795afc43efc1e47c1.zip | |
fix BTree creating shared references that are not entirely in-bounds
Diffstat (limited to 'src/liballoc')
| -rw-r--r-- | src/liballoc/collections/btree/node.rs | 115 |
1 files changed, 86 insertions, 29 deletions
diff --git a/src/liballoc/collections/btree/node.rs b/src/liballoc/collections/btree/node.rs index 215689dfc48..13cbcee2f8e 100644 --- a/src/liballoc/collections/btree/node.rs +++ b/src/liballoc/collections/btree/node.rs @@ -58,9 +58,34 @@ pub const CAPACITY: usize = 2 * B - 1; /// these should always be put behind pointers, and specifically behind `BoxedNode` in the owned /// case. /// -/// We put the metadata first so that its position is the same for every `K` and `V`, in order -/// to statically allocate a single dummy node to avoid allocations. This struct is `repr(C)` to -/// prevent them from being reordered. +/// We have a separate type for the header and rely on it matching the prefix of `LeafNode`, in +/// order to statically allocate a single dummy node to avoid allocations. This struct is +/// `repr(C)` to prevent them from being reordered. `LeafNode` does not just contain a +/// `NodeHeader` because we do not want unnecessary padding between `len` and the keys. +/// Crucially, `NodeHeader` can be safely transmuted to different K and V. (This is exploited +/// by `as_header`.) +/// See `into_key_slice` for an explanation of K2. K2 cannot be safely transmuted around +/// because the size of `NodeHeader` depends on its alignment! +#[repr(C)] +struct NodeHeader<K, V, K2 = ()> { + /// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`. + /// This either points to an actual node or is null. + parent: *const InternalNode<K, V>, + + /// This node's index into the parent node's `edges` array. + /// `*node.parent.edges[node.parent_idx]` should be the same thing as `node`. + /// This is only guaranteed to be initialized when `parent` is non-null. + parent_idx: MaybeUninit<u16>, + + /// The number of keys and values this node stores. + /// + /// This next to `parent_idx` to encourage the compiler to join `len` and + /// `parent_idx` into the same 32-bit word, reducing space overhead. + len: u16, + + /// See `into_key_slice`. + keys_start: [K2; 0], +} #[repr(C)] struct LeafNode<K, V> { /// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`. @@ -98,24 +123,25 @@ impl<K, V> LeafNode<K, V> { len: 0 } } +} +impl<K, V> NodeHeader<K, V> { fn is_shared_root(&self) -> bool { ptr::eq(self, &EMPTY_ROOT_NODE as *const _ as *const _) } } // We need to implement Sync here in order to make a static instance. -unsafe impl Sync for LeafNode<(), ()> {} +unsafe impl Sync for NodeHeader<(), ()> {} // An empty node used as a placeholder for the root node, to avoid allocations. -// We use () in order to save space, since no operation on an empty tree will +// We use just a header in order to save space, since no operation on an empty tree will // ever take a pointer past the first key. -static EMPTY_ROOT_NODE: LeafNode<(), ()> = LeafNode { +static EMPTY_ROOT_NODE: NodeHeader<(), ()> = NodeHeader { parent: ptr::null(), parent_idx: MaybeUninit::uninitialized(), len: 0, - keys: MaybeUninit::uninitialized(), - vals: MaybeUninit::uninitialized(), + keys_start: [], }; /// The underlying representation of internal nodes. As with `LeafNode`s, these should be hidden @@ -306,6 +332,11 @@ impl<K, V> Root<K, V> { /// `Leaf`, the `NodeRef` points to a leaf node, when this is `Internal` the /// `NodeRef` points to an internal node, and when this is `LeafOrInternal` the /// `NodeRef` could be pointing to either type of node. +/// Note that in case of a leaf node, this might still be the shared root! Only turn +/// this into a `LeafNode` reference if you know it is not a root! Shared references +/// must be dereferencable *for the entire size of their pointee*, so `&InternalNode` +/// pointing to the shared root is UB. +/// Turning this into a `NodeHeader` is always safe. pub struct NodeRef<BorrowType, K, V, Type> { height: usize, node: NonNull<LeafNode<K, V>>, @@ -352,7 +383,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> { /// Finds the length of the node. This is the number of keys or values. In an /// internal node, the number of edges is `len() + 1`. pub fn len(&self) -> usize { - self.as_leaf().len as usize + self.as_header().len as usize } /// Returns the height of this node in the whole tree. Zero height denotes the @@ -382,14 +413,19 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> { } } - fn as_leaf(&self) -> &LeafNode<K, V> { + /// Assert that this is indeed a proper leaf node, and not the shared root. + unsafe fn as_leaf(&self) -> &LeafNode<K, V> { + self.node.as_ref() + } + + fn as_header(&self) -> &NodeHeader<K, V> { unsafe { - self.node.as_ref() + &*(self.node.as_ptr() as *const NodeHeader<K, V>) } } pub fn is_shared_root(&self) -> bool { - self.as_leaf().is_shared_root() + self.as_header().is_shared_root() } pub fn keys(&self) -> &[K] { @@ -418,7 +454,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> { >, Self > { - let parent_as_leaf = self.as_leaf().parent as *const LeafNode<K, V>; + let parent_as_leaf = self.as_header().parent as *const LeafNode<K, V>; if let Some(non_zero) = NonNull::new(parent_as_leaf as *mut _) { Ok(Handle { node: NodeRef { @@ -427,7 +463,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> { root: self.root, _marker: PhantomData }, - idx: unsafe { usize::from(*self.as_leaf().parent_idx.get_ref()) }, + idx: unsafe { usize::from(*self.as_header().parent_idx.get_ref()) }, _marker: PhantomData }) } else { @@ -535,9 +571,8 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> { } fn as_leaf_mut(&mut self) -> &mut LeafNode<K, V> { - unsafe { - self.node.as_mut() - } + // We are mutable, so we cannot be the root, so this is okay. + unsafe { self.node.as_mut() } } fn keys_mut(&mut self) -> &mut [K] { @@ -551,28 +586,50 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> { impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Immut<'a>, K, V, Type> { fn into_key_slice(self) -> &'a [K] { - // When taking a pointer to the keys, if our key has a stricter - // alignment requirement than the shared root does, then the pointer - // would be out of bounds, which LLVM assumes will not happen. If the - // alignment is more strict, we need to make an empty slice that doesn't - // use an out of bounds pointer. + // We have to be careful here because we might be pointing to the shared root. + // In that case, we must not create an `&LeafNode`. We could just return + // an empty slice whenever the lenght is 0 (this includes the shared root), + // but we want to avoid that run-time check. + // Instead, we create a slice pointing into the node whenever possible. + // We can sometimes do this even for the shared root, as the slice will be + // empty. We cannot *always* do this because if the type is too highly + // aligned, the offset of `keys` in a "full node" might be outside the bounds + // of the header! So we do an alignment check first, that will be + // evaluated at compile-time, and only do any run-time check in the rare case + // that the alignment is very big. if mem::align_of::<K>() > mem::align_of::<LeafNode<(), ()>>() && self.is_shared_root() { &[] } else { - // Here either it's not the root, or the alignment is less strict, - // in which case the keys pointer will point "one-past-the-end" of - // the node, which is allowed by LLVM. + // Thanks to the alignment check above, we know that `keys` will be + // in-bounds of some allocation even if this is the shared root! + // (We might be one-past-the-end, but that is allowed by LLVM.) + // Getting the pointer is tricky though. `NodeHeader` does not have a `keys` + // field because we want its size to not depend on the alignment of `K` + // (needed becuase `as_header` should be safe). We cannot call `as_leaf` + // because we might be the shared root. + // For this reason, `NodeHeader` has this `K2` parameter (that's usually `()` + // and hence just adds a size-0-align-1 field, not affecting layout). + // We know that we can transmute `NodeHeader<K, V, ()>` to `NodeHeader<K, V, K>` + // because we did the alignment check above, and hence `NodeHeader<K, V, K>` + // is not bigger than `NodeHeader<K, V, ()>`! Then we can use `NodeHeader<K, V, K>` + // to compute the pointer where the keys start. + // This entire hack will become unnecessary once + // <https://github.com/rust-lang/rfcs/pull/2582> lands, then we can just take a raw + // pointer to the `keys` field of `*const InternalNode<K, V>`. + + // This is a non-debug-assert because it can be completely compile-time evaluated. + assert!(mem::size_of::<NodeHeader<K, V>>() == mem::size_of::<NodeHeader<K, V, K>>()); + let header = self.as_header() as *const _ as *const NodeHeader<K, V, K>; + let keys = unsafe { &(*header).keys_start as *const _ as *const K }; unsafe { - slice::from_raw_parts( - self.as_leaf().keys.as_ptr() as *const K, - self.len() - ) + slice::from_raw_parts(keys, self.len()) } } } fn into_val_slice(self) -> &'a [V] { debug_assert!(!self.is_shared_root()); + // We cannot be the root, so `as_leaf` is okay unsafe { slice::from_raw_parts( self.as_leaf().vals.as_ptr() as *const V, |
