fix BTree creating shared references that are not entirely in-bounds

author: Ralf Jung <post@ralfj.de> 2018-12-07 16:52:10 +0100
committer: Ralf Jung <post@ralfj.de> 2018-12-09 17:44:52 +0100
commit: 0e70c269feece979427bcc8795afc43efc1e47c1 (patch)
tree: 203c53e0ffefbfa4675f606df4b89fb2b45f9317 /src/liballoc
parent: 1ccb5b219d50b1bc96dbb85e82a8473f16422582 (diff)
download: rust-0e70c269feece979427bcc8795afc43efc1e47c1.tar.gz
rust-0e70c269feece979427bcc8795afc43efc1e47c1.zip
1 files changed, 86 insertions, 29 deletions
diff --git a/src/liballoc/collections/btree/node.rs b/src/liballoc/collections/btree/node.rs
index 215689dfc48..13cbcee2f8e 100644
--- a/src/liballoc/collections/btree/node.rs
+++ b/src/liballoc/collections/btree/node.rs
@@ -58,9 +58,34 @@ pub const CAPACITY: usize = 2 * B - 1;
 /// these should always be put behind pointers, and specifically behind `BoxedNode` in the owned
 /// case.
 ///
-/// We put the metadata first so that its position is the same for every `K` and `V`, in order
-/// to statically allocate a single dummy node to avoid allocations. This struct is `repr(C)` to
-/// prevent them from being reordered.
+/// We have a separate type for the header and rely on it matching the prefix of `LeafNode`, in
+/// order to statically allocate a single dummy node to avoid allocations. This struct is
+/// `repr(C)` to prevent them from being reordered.  `LeafNode` does not just contain a
+/// `NodeHeader` because we do not want unnecessary padding between `len` and the keys.
+/// Crucially, `NodeHeader` can be safely transmuted to different K and V.  (This is exploited
+/// by `as_header`.)
+/// See `into_key_slice` for an explanation of K2.  K2 cannot be safely transmuted around
+/// because the size of `NodeHeader` depends on its alignment!
+#[repr(C)]
+struct NodeHeader<K, V, K2 = ()> {
+    /// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
+    /// This either points to an actual node or is null.
+    parent: *const InternalNode<K, V>,
+
+    /// This node's index into the parent node's `edges` array.
+    /// `*node.parent.edges[node.parent_idx]` should be the same thing as `node`.
+    /// This is only guaranteed to be initialized when `parent` is non-null.
+    parent_idx: MaybeUninit<u16>,
+
+    /// The number of keys and values this node stores.
+    ///
+    /// This next to `parent_idx` to encourage the compiler to join `len` and
+    /// `parent_idx` into the same 32-bit word, reducing space overhead.
+    len: u16,
+
+    /// See `into_key_slice`.
+    keys_start: [K2; 0],
+}
 #[repr(C)]
 struct LeafNode<K, V> {
     /// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
@@ -98,24 +123,25 @@ impl<K, V> LeafNode<K, V> {
             len: 0
         }
     }
+}
 
+impl<K, V> NodeHeader<K, V> {
     fn is_shared_root(&self) -> bool {
         ptr::eq(self, &EMPTY_ROOT_NODE as *const _ as *const _)
     }
 }
 
 // We need to implement Sync here in order to make a static instance.
-unsafe impl Sync for LeafNode<(), ()> {}
+unsafe impl Sync for NodeHeader<(), ()> {}
 
 // An empty node used as a placeholder for the root node, to avoid allocations.
-// We use () in order to save space, since no operation on an empty tree will
+// We use just a header in order to save space, since no operation on an empty tree will
 // ever take a pointer past the first key.
-static EMPTY_ROOT_NODE: LeafNode<(), ()> = LeafNode {
+static EMPTY_ROOT_NODE: NodeHeader<(), ()> = NodeHeader {
     parent: ptr::null(),
     parent_idx: MaybeUninit::uninitialized(),
     len: 0,
-    keys: MaybeUninit::uninitialized(),
-    vals: MaybeUninit::uninitialized(),
+    keys_start: [],
 };
 
 /// The underlying representation of internal nodes. As with `LeafNode`s, these should be hidden
@@ -306,6 +332,11 @@ impl<K, V> Root<K, V> {
 ///   `Leaf`, the `NodeRef` points to a leaf node, when this is `Internal` the
 ///   `NodeRef` points to an internal node, and when this is `LeafOrInternal` the
 ///   `NodeRef` could be pointing to either type of node.
+///   Note that in case of a leaf node, this might still be the shared root!  Only turn
+///   this into a `LeafNode` reference if you know it is not a root!  Shared references
+///   must be dereferencable *for the entire size of their pointee*, so `&InternalNode`
+///   pointing to the shared root is UB.
+///   Turning this into a `NodeHeader` is always safe.
 pub struct NodeRef<BorrowType, K, V, Type> {
     height: usize,
     node: NonNull<LeafNode<K, V>>,
@@ -352,7 +383,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
     /// Finds the length of the node. This is the number of keys or values. In an
     /// internal node, the number of edges is `len() + 1`.
     pub fn len(&self) -> usize {
-        self.as_leaf().len as usize
+        self.as_header().len as usize
     }
 
     /// Returns the height of this node in the whole tree. Zero height denotes the
@@ -382,14 +413,19 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
         }
     }
 
-    fn as_leaf(&self) -> &LeafNode<K, V> {
+    /// Assert that this is indeed a proper leaf node, and not the shared root.
+    unsafe fn as_leaf(&self) -> &LeafNode<K, V> {
+        self.node.as_ref()
+    }
+
+    fn as_header(&self) -> &NodeHeader<K, V> {
         unsafe {
-            self.node.as_ref()
+            &*(self.node.as_ptr() as *const NodeHeader<K, V>)
         }
     }
 
     pub fn is_shared_root(&self) -> bool {
-        self.as_leaf().is_shared_root()
+        self.as_header().is_shared_root()
     }
 
     pub fn keys(&self) -> &[K] {
@@ -418,7 +454,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
         >,
         Self
     > {
-        let parent_as_leaf = self.as_leaf().parent as *const LeafNode<K, V>;
+        let parent_as_leaf = self.as_header().parent as *const LeafNode<K, V>;
         if let Some(non_zero) = NonNull::new(parent_as_leaf as *mut _) {
             Ok(Handle {
                 node: NodeRef {
@@ -427,7 +463,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
                     root: self.root,
                     _marker: PhantomData
                 },
-                idx: unsafe { usize::from(*self.as_leaf().parent_idx.get_ref()) },
+                idx: unsafe { usize::from(*self.as_header().parent_idx.get_ref()) },
                 _marker: PhantomData
             })
         } else {
@@ -535,9 +571,8 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
     }
 
     fn as_leaf_mut(&mut self) -> &mut LeafNode<K, V> {
-        unsafe {
-            self.node.as_mut()
-        }
+        // We are mutable, so we cannot be the root, so this is okay.
+        unsafe { self.node.as_mut() }
     }
 
     fn keys_mut(&mut self) -> &mut [K] {
@@ -551,28 +586,50 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
 
 impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Immut<'a>, K, V, Type> {
     fn into_key_slice(self) -> &'a [K] {
-        // When taking a pointer to the keys, if our key has a stricter
-        // alignment requirement than the shared root does, then the pointer
-        // would be out of bounds, which LLVM assumes will not happen. If the
-        // alignment is more strict, we need to make an empty slice that doesn't
-        // use an out of bounds pointer.
+        // We have to be careful here because we might be pointing to the shared root.
+        // In that case, we must not create an `&LeafNode`.  We could just return
+        // an empty slice whenever the lenght is 0 (this includes the shared root),
+        // but we want to avoid that run-time check.
+        // Instead, we create a slice pointing into the node whenever possible.
+        // We can sometimes do this even for the shared root, as the slice will be
+        // empty.  We cannot *always* do this because if the type is too highly
+        // aligned, the offset of `keys` in a "full node" might be outside the bounds
+        // of the header!  So we do an alignment check first, that will be
+        // evaluated at compile-time, and only do any run-time check in the rare case
+        // that the alignment is very big.
         if mem::align_of::<K>() > mem::align_of::<LeafNode<(), ()>>() && self.is_shared_root() {
             &[]
         } else {
-            // Here either it's not the root, or the alignment is less strict,
-            // in which case the keys pointer will point "one-past-the-end" of
-            // the node, which is allowed by LLVM.
+            // Thanks to the alignment check above, we know that `keys` will be
+            // in-bounds of some allocation even if this is the shared root!
+            // (We might be one-past-the-end, but that is allowed by LLVM.)
+            // Getting the pointer is tricky though.  `NodeHeader` does not have a `keys`
+            // field because we want its size to not depend on the alignment of `K`
+            // (needed becuase `as_header` should be safe).  We cannot call `as_leaf`
+            // because we might be the shared root.
+            // For this reason, `NodeHeader` has this `K2` parameter (that's usually `()`
+            // and hence just adds a size-0-align-1 field, not affecting layout).
+            // We know that we can transmute `NodeHeader<K, V, ()>` to `NodeHeader<K, V, K>`
+            // because we did the alignment check above, and hence `NodeHeader<K, V, K>`
+            // is not bigger than `NodeHeader<K, V, ()>`!  Then we can use `NodeHeader<K, V, K>`
+            // to compute the pointer where the keys start.
+            // This entire hack will become unnecessary once
+            // <https://github.com/rust-lang/rfcs/pull/2582> lands, then we can just take a raw
+            // pointer to the `keys` field of `*const InternalNode<K, V>`.
+
+            // This is a non-debug-assert because it can be completely compile-time evaluated.
+            assert!(mem::size_of::<NodeHeader<K, V>>() == mem::size_of::<NodeHeader<K, V, K>>());
+            let header = self.as_header() as *const _ as *const NodeHeader<K, V, K>;
+            let keys = unsafe { &(*header).keys_start as *const _ as *const K };
             unsafe {
-                slice::from_raw_parts(
-                    self.as_leaf().keys.as_ptr() as *const K,
-                    self.len()
-                )
+                slice::from_raw_parts(keys, self.len())
             }
         }
     }
 
     fn into_val_slice(self) -> &'a [V] {
         debug_assert!(!self.is_shared_root());
+        // We cannot be the root, so `as_leaf` is okay
         unsafe {
             slice::from_raw_parts(
                 self.as_leaf().vals.as_ptr() as *const V,
author	Ralf Jung <post@ralfj.de>	2018-12-07 16:52:10 +0100
committer	Ralf Jung <post@ralfj.de>	2018-12-09 17:44:52 +0100
commit	0e70c269feece979427bcc8795afc43efc1e47c1 (patch)
tree	203c53e0ffefbfa4675f606df4b89fb2b45f9317 /src/liballoc
parent	1ccb5b219d50b1bc96dbb85e82a8473f16422582 (diff)
download	rust-0e70c269feece979427bcc8795afc43efc1e47c1.tar.gz rust-0e70c269feece979427bcc8795afc43efc1e47c1.zip