about summary refs log tree commit diff
diff options
context:
space:
mode:
authorDylan DPC <99973273+Dylan-DPC@users.noreply.github.com>2022-04-05 01:53:31 +0200
committerGitHub <noreply@github.com>2022-04-05 01:53:31 +0200
commit3bf33b9060065b69f22c50d0ec924eaf51da830a (patch)
treeb65be5cb64ea858cc7fc1a100cc024660b1d11cc
parenta5c81695a95e2a1a8e2ea4310bf670a9c1734387 (diff)
parent0252fc9619805e59a32e8cf9a13591df69a56a5b (diff)
downloadrust-3bf33b9060065b69f22c50d0ec924eaf51da830a.tar.gz
rust-3bf33b9060065b69f22c50d0ec924eaf51da830a.zip
Rollup merge of #95588 - RalfJung:strict-provenance, r=scottmcm
explicitly distinguish pointer::addr and pointer::expose_addr

``@bgeron`` pointed out that the current docs promise that `ptr.addr()` and `ptr as usize` are equivalent. I don't think that is a promise we want to make. (Conceptually, `ptr as usize` might 'escape' the provenance to enable future `usize as ptr` casts, but `ptr.addr()` dertainly does not do that.)

So I propose we word the docs a bit more carefully here. ``@Gankra`` what do you think?
-rw-r--r--library/core/src/ptr/const_ptr.rs54
-rw-r--r--library/core/src/ptr/mod.rs128
-rw-r--r--library/core/src/ptr/mut_ptr.rs54
3 files changed, 217 insertions, 19 deletions
diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs
index 209ea5cb043..f862912432e 100644
--- a/library/core/src/ptr/const_ptr.rs
+++ b/library/core/src/ptr/const_ptr.rs
@@ -152,9 +152,17 @@ impl<T: ?Sized> *const T {
 
     /// Gets the "address" portion of the pointer.
     ///
-    /// This is equivalent to `self as usize`, which semantically discards
-    /// *provenance* and *address-space* information. To properly restore that information,
-    /// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
+    /// This is similar to `self as usize`, which semantically discards *provenance* and
+    /// *address-space* information. However, unlike `self as usize`, casting the returned address
+    /// back to a pointer yields [`invalid`][], which is undefined behavior to dereference. To
+    /// properly restore the lost information and obtain a dereferencable pointer, use
+    /// [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
+    ///
+    /// If using those APIs is not possible because there is no way to preserve a pointer with the
+    /// required provenance, use [`expose_addr`][pointer::expose_addr] and
+    /// [`from_exposed_addr`][from_exposed_addr] instead. However, note that this makes
+    /// your code less portable and less amenable to tools that check for compliance with the Rust
+    /// memory model.
     ///
     /// On most platforms this will produce a value with the same bytes as the original
     /// pointer, because all the bytes are dedicated to describing the address.
@@ -162,8 +170,9 @@ impl<T: ?Sized> *const T {
     /// perform a change of representation to produce a value containing only the address
     /// portion of the pointer. What that means is up to the platform to define.
     ///
-    /// This API and its claimed semantics are part of the Strict Provenance experiment,
-    /// see the [module documentation][crate::ptr] for details.
+    /// This API and its claimed semantics are part of the Strict Provenance experiment, and as such
+    /// might change in the future (including possibly weakening this so it becomes wholly
+    /// equivalent to `self as usize`). See the [module documentation][crate::ptr] for details.
     #[must_use]
     #[inline]
     #[unstable(feature = "strict_provenance", issue = "95228")]
@@ -175,6 +184,41 @@ impl<T: ?Sized> *const T {
         self as usize
     }
 
+    /// Gets the "address" portion of the pointer, and 'exposes' the "provenance" part for future
+    /// use in [`from_exposed_addr`][].
+    ///
+    /// This is equivalent to `self as usize`, which semantically discards *provenance* and
+    /// *address-space* information. Furthermore, this (like the `as` cast) has the implicit
+    /// side-effect of marking the provenance as 'exposed', so on platforms that support it you can
+    /// later call [`from_exposed_addr`][] to reconstitute the original pointer including its
+    /// provenance. (Reconstructing address space information, if required, is your responsibility.)
+    ///
+    /// Using this method means that code is *not* following Strict Provenance rules. Supporting
+    /// [`from_exposed_addr`][] complicates specification and reasoning and may not be supported by
+    /// tools that help you to stay conformant with the Rust memory model, so it is recommended to
+    /// use [`addr`][pointer::addr] wherever possible.
+    ///
+    /// On most platforms this will produce a value with the same bytes as the original pointer,
+    /// because all the bytes are dedicated to describing the address. Platforms which need to store
+    /// additional information in the pointer may not support this operation, since the 'expose'
+    /// side-effect which is required for [`from_exposed_addr`][] to work is typically not
+    /// available.
+    ///
+    /// This API and its claimed semantics are part of the Strict Provenance experiment, see the
+    /// [module documentation][crate::ptr] for details.
+    ///
+    /// [`from_exposed_addr`]: from_exposed_addr
+    #[must_use]
+    #[inline]
+    #[unstable(feature = "strict_provenance", issue = "95228")]
+    pub fn expose_addr(self) -> usize
+    where
+        T: Sized,
+    {
+        // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
+        self as usize
+    }
+
     /// Creates a new pointer with the given address.
     ///
     /// This performs the same operation as an `addr as ptr` cast, but copies
diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs
index b93c2ea9615..ba8b0670147 100644
--- a/library/core/src/ptr/mod.rs
+++ b/library/core/src/ptr/mod.rs
@@ -70,7 +70,7 @@
 //! interpretation of provenance. It's ok if your code doesn't strictly conform to it.**
 //!
 //! [Strict Provenance][] is an experimental set of APIs that help tools that try
-//! to validate the memory-safety of your program's execution. Notably this includes [miri][]
+//! to validate the memory-safety of your program's execution. Notably this includes [Miri][]
 //! and [CHERI][], which can detect when you access out of bounds memory or otherwise violate
 //! Rust's memory model.
 //!
@@ -136,7 +136,7 @@
 //!
 //! The strict provenance experiment is mostly only interested in exploring stricter *spatial*
 //! provenance. In this sense it can be thought of as a subset of the more ambitious and
-//! formal [Stacked Borrows][] research project, which is what tools like [miri][] are based on.
+//! formal [Stacked Borrows][] research project, which is what tools like [Miri][] are based on.
 //! In particular, Stacked Borrows is necessary to properly describe what borrows are allowed
 //! to do and when they become invalidated. This necessarily involves much more complex
 //! *temporal* reasoning than simply identifying allocations. Adjusting APIs and code
@@ -170,7 +170,8 @@
 //! Under Strict Provenance, a usize *cannot* accurately represent a pointer, and converting from
 //! a pointer to a usize is generally an operation which *only* extracts the address. It is
 //! therefore *impossible* to construct a valid pointer from a usize because there is no way
-//! to restore the address-space and provenance.
+//! to restore the address-space and provenance. In other words, pointer-integer-pointer
+//! roundtrips are not possible (in the sense that the resulting pointer is not dereferencable).
 //!
 //! The key insight to making this model *at all* viable is the [`with_addr`][] method:
 //!
@@ -194,10 +195,10 @@
 //! and then immediately converting back to a pointer. To make this use case more ergonomic,
 //! we provide the [`map_addr`][] method.
 //!
-//! To help make it clear that code is "following" Strict Provenance semantics, we also
-//! provide an [`addr`][] method which is currently equivalent to `ptr as usize`. In the
-//! future we may provide a lint for pointer<->integer casts to help you audit if your
-//! code conforms to strict provenance.
+//! To help make it clear that code is "following" Strict Provenance semantics, we also provide an
+//! [`addr`][] method which promises that the returned address is not part of a
+//! pointer-usize-pointer roundtrip. In the future we may provide a lint for pointer<->integer
+//! casts to help you audit if your code conforms to strict provenance.
 //!
 //!
 //! ## Using Strict Provenance
@@ -310,6 +311,41 @@
 //!   For instance, ARM explicitly supports high-bit tagging, and so CHERI on ARM inherits
 //!   that and should support it.
 //!
+//! ## Pointer-usize-pointer roundtrips and 'exposed' provenance
+//!
+//! **This section is *non-normative* and is part of the [Strict Provenance] experiment.**
+//!
+//! As discussed above, pointer-usize-pointer roundtrips are not possible under [Strict Provenance].
+//! However, there exists legacy Rust code that is full of such roundtrips, and legacy platform APIs
+//! regularly assume that `usize` can capture all the information that makes up a pointer. There
+//! also might be code that cannot be ported to Strict Provenance (which is something we would [like
+//! to hear about][Strict Provenance]).
+//!
+//! For situations like this, there is a fallback plan, a way to 'opt out' of Strict Provenance.
+//! However, note that this makes your code a lot harder to specify, and the code will not work
+//! (well) with tools like [Miri] and [CHERI].
+//!
+//! This fallback plan is provided by the [`expose_addr`] and [`from_exposed_addr`] methods (which
+//! are equivalent to `as` casts between pointers and integers). [`expose_addr`] is a lot like
+//! [`addr`], but additionally adds the provenance of the pointer to a global list of 'exposed'
+//! provenances. (This list is purely conceptual, it exists for the purpose of specifying Rust but
+//! is not materialized in actual executions, except in tools like [Miri].) [`from_exposed_addr`]
+//! can be used to construct a pointer with one of these previously 'exposed' provenances.
+//! [`from_exposed_addr`] takes only `addr: usize` as arguments, so unlike in [`with_addr`] there is
+//! no indication of what the correct provenance for the returned pointer is -- and that is exactly
+//! what makes pointer-usize-pointer roundtrips so tricky to rigorously specify! There is no
+//! algorithm that decides which provenance will be used. You can think of this as "guessing" the
+//! right provenance, and the guess will be "maximally in your favor", in the sense that if there is
+//! any way to avoid undefined behavior, then that is the guess that will be taken. However, if
+//! there is *no* previously 'exposed' provenance that justifies the way the returned pointer will
+//! be used, the program has undefined behavior.
+//!
+//! Using [`expose_addr`] or [`from_exposed_addr`] (or the equivalent `as` casts) means that code is
+//! *not* following Strict Provenance rules. The goal of the Strict Provenance experiment is to
+//! determine whether it is possible to use Rust without [`expose_addr`] and [`from_exposed_addr`].
+//! If this is successful, it would be a major win for avoiding specification complexity and to
+//! facilitate adoption of tools like [CHERI] and [Miri] that can be a big help in increasing the
+//! confidence in (unsafe) Rust code.
 //!
 //! [aliasing]: ../../nomicon/aliasing.html
 //! [book]: ../../book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer
@@ -322,7 +358,9 @@
 //! [`map_addr`]: pointer::map_addr
 //! [`addr`]: pointer::addr
 //! [`ptr::invalid`]: core::ptr::invalid
-//! [miri]: https://github.com/rust-lang/miri
+//! [`expose_addr`]: pointer::expose_addr
+//! [`from_exposed_addr`]: from_exposed_addr
+//! [Miri]: https://github.com/rust-lang/miri
 //! [CHERI]: https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/
 //! [Strict Provenance]: https://github.com/rust-lang/rust/issues/95228
 //! [Stacked Borrows]: https://plv.mpi-sws.org/rustbelt/stacked-borrows/
@@ -547,6 +585,78 @@ pub const fn invalid_mut<T>(addr: usize) -> *mut T {
     addr as *mut T
 }
 
+/// Convert an address back to a pointer, picking up a previously 'exposed' provenance.
+///
+/// This is equivalent to `addr as *const T`. The provenance of the returned pointer is that of *any*
+/// pointer that was previously passed to [`expose_addr`][pointer::expose_addr] or a `ptr as usize`
+/// cast. If there is no previously 'exposed' provenance that justifies the way this pointer will be
+/// used, the program has undefined behavior. Note that there is no algorithm that decides which
+/// provenance will be used. You can think of this as "guessing" the right provenance, and the guess
+/// will be "maximally in your favor", in the sense that if there is any way to avoid undefined
+/// behavior, then that is the guess that will be taken.
+///
+/// On platforms with multiple address spaces, it is your responsibility to ensure that the
+/// address makes sense in the address space that this pointer will be used with.
+///
+/// Using this method means that code is *not* following strict provenance rules. "Guessing" a
+/// suitable provenance complicates specification and reasoning and may not be supported by
+/// tools that help you to stay conformant with the Rust memory model, so it is recommended to
+/// use [`with_addr`][pointer::with_addr] wherever possible.
+///
+/// On most platforms this will produce a value with the same bytes as the address. Platforms
+/// which need to store additional information in a pointer may not support this operation,
+/// since it is generally not possible to actually *compute* which provenance the returned
+/// pointer has to pick up.
+///
+/// This API and its claimed semantics are part of the Strict Provenance experiment, see the
+/// [module documentation][crate::ptr] for details.
+#[must_use]
+#[inline]
+#[unstable(feature = "strict_provenance", issue = "95228")]
+pub fn from_exposed_addr<T>(addr: usize) -> *const T
+where
+    T: Sized,
+{
+    // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
+    addr as *const T
+}
+
+/// Convert an address back to a mutable pointer, picking up a previously 'exposed' provenance.
+///
+/// This is equivalent to `addr as *mut T`. The provenance of the returned pointer is that of *any*
+/// pointer that was previously passed to [`expose_addr`][pointer::expose_addr] or a `ptr as usize`
+/// cast. If there is no previously 'exposed' provenance that justifies the way this pointer will be
+/// used, the program has undefined behavior. Note that there is no algorithm that decides which
+/// provenance will be used. You can think of this as "guessing" the right provenance, and the guess
+/// will be "maximally in your favor", in the sense that if there is any way to avoid undefined
+/// behavior, then that is the guess that will be taken.
+///
+/// On platforms with multiple address spaces, it is your responsibility to ensure that the
+/// address makes sense in the address space that this pointer will be used with.
+///
+/// Using this method means that code is *not* following strict provenance rules. "Guessing" a
+/// suitable provenance complicates specification and reasoning and may not be supported by
+/// tools that help you to stay conformant with the Rust memory model, so it is recommended to
+/// use [`with_addr`][pointer::with_addr] wherever possible.
+///
+/// On most platforms this will produce a value with the same bytes as the address. Platforms
+/// which need to store additional information in a pointer may not support this operation,
+/// since it is generally not possible to actually *compute* which provenance the returned
+/// pointer has to pick up.
+///
+/// This API and its claimed semantics are part of the Strict Provenance experiment, see the
+/// [module documentation][crate::ptr] for details.
+#[must_use]
+#[inline]
+#[unstable(feature = "strict_provenance", issue = "95228")]
+pub fn from_exposed_addr_mut<T>(addr: usize) -> *mut T
+where
+    T: Sized,
+{
+    // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
+    addr as *mut T
+}
+
 /// Forms a raw slice from a pointer and a length.
 ///
 /// The `len` argument is the number of **elements**, not the number of bytes.
@@ -763,7 +873,7 @@ pub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
         );
     }
 
-    // NOTE(scottmcm) MIRI is disabled here as reading in smaller units is a
+    // NOTE(scottmcm) Miri is disabled here as reading in smaller units is a
     // pessimization for it.  Also, if the type contains any unaligned pointers,
     // copying those over multiple reads is difficult to support.
     #[cfg(not(miri))]
diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs
index bfc89625935..5db9c3e941e 100644
--- a/library/core/src/ptr/mut_ptr.rs
+++ b/library/core/src/ptr/mut_ptr.rs
@@ -156,9 +156,17 @@ impl<T: ?Sized> *mut T {
 
     /// Gets the "address" portion of the pointer.
     ///
-    /// This is equivalent to `self as usize`, which semantically discards
-    /// *provenance* and *address-space* information. To properly restore that information,
-    /// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
+    /// This is similar to `self as usize`, which semantically discards *provenance* and
+    /// *address-space* information. However, unlike `self as usize`, casting the returned address
+    /// back to a pointer yields [`invalid`][], which is undefined behavior to dereference. To
+    /// properly restore the lost information and obtain a dereferencable pointer, use
+    /// [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
+    ///
+    /// If using those APIs is not possible because there is no way to preserve a pointer with the
+    /// required provenance, use [`expose_addr`][pointer::expose_addr] and
+    /// [`from_exposed_addr_mut`][from_exposed_addr_mut] instead. However, note that this makes
+    /// your code less portable and less amenable to tools that check for compliance with the Rust
+    /// memory model.
     ///
     /// On most platforms this will produce a value with the same bytes as the original
     /// pointer, because all the bytes are dedicated to describing the address.
@@ -166,8 +174,9 @@ impl<T: ?Sized> *mut T {
     /// perform a change of representation to produce a value containing only the address
     /// portion of the pointer. What that means is up to the platform to define.
     ///
-    /// This API and its claimed semantics are part of the Strict Provenance experiment,
-    /// see the [module documentation][crate::ptr] for details.
+    /// This API and its claimed semantics are part of the Strict Provenance experiment, and as such
+    /// might change in the future (including possibly weakening this so it becomes wholly
+    /// equivalent to `self as usize`). See the [module documentation][crate::ptr] for details.
     #[must_use]
     #[inline]
     #[unstable(feature = "strict_provenance", issue = "95228")]
@@ -179,6 +188,41 @@ impl<T: ?Sized> *mut T {
         self as usize
     }
 
+    /// Gets the "address" portion of the pointer, and 'exposes' the "provenance" part for future
+    /// use in [`from_exposed_addr`][].
+    ///
+    /// This is equivalent to `self as usize`, which semantically discards *provenance* and
+    /// *address-space* information. Furthermore, this (like the `as` cast) has the implicit
+    /// side-effect of marking the provenance as 'exposed', so on platforms that support it you can
+    /// later call [`from_exposed_addr_mut`][] to reconstitute the original pointer including its
+    /// provenance. (Reconstructing address space information, if required, is your responsibility.)
+    ///
+    /// Using this method means that code is *not* following Strict Provenance rules. Supporting
+    /// [`from_exposed_addr_mut`][] complicates specification and reasoning and may not be supported
+    /// by tools that help you to stay conformant with the Rust memory model, so it is recommended
+    /// to use [`addr`][pointer::addr] wherever possible.
+    ///
+    /// On most platforms this will produce a value with the same bytes as the original pointer,
+    /// because all the bytes are dedicated to describing the address. Platforms which need to store
+    /// additional information in the pointer may not support this operation, since the 'expose'
+    /// side-effect which is required for [`from_exposed_addr_mut`][] to work is typically not
+    /// available.
+    ///
+    /// This API and its claimed semantics are part of the Strict Provenance experiment, see the
+    /// [module documentation][crate::ptr] for details.
+    ///
+    /// [`from_exposed_addr_mut`]: from_exposed_addr_mut
+    #[must_use]
+    #[inline]
+    #[unstable(feature = "strict_provenance", issue = "95228")]
+    pub fn expose_addr(self) -> usize
+    where
+        T: Sized,
+    {
+        // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
+        self as usize
+    }
+
     /// Creates a new pointer with the given address.
     ///
     /// This performs the same operation as an `addr as ptr` cast, but copies