about summary refs log tree commit diff
diff options
context:
space:
mode:
authorScott McMurray <scottmcm@users.noreply.github.com>2023-03-12 15:52:34 -0700
committerScott McMurray <scottmcm@users.noreply.github.com>2023-03-12 15:52:34 -0700
commit87696fd5a1a1b7cd75cd9a66896deae0ab56cfb5 (patch)
treeddaeb15423dcf90b4432e0cca80d2cc4c6c59c4a
parent1f70bb8c43054d3948ffb579ba3809822a889c62 (diff)
downloadrust-87696fd5a1a1b7cd75cd9a66896deae0ab56cfb5.tar.gz
rust-87696fd5a1a1b7cd75cd9a66896deae0ab56cfb5.zip
Add a better approach comment in `ptr::read` to justify the intrinsic
-rw-r--r--library/core/src/ptr/mod.rs43
1 files changed, 32 insertions, 11 deletions
diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs
index 04b67a56db5..86929e2c488 100644
--- a/library/core/src/ptr/mod.rs
+++ b/library/core/src/ptr/mod.rs
@@ -1135,17 +1135,31 @@ pub const unsafe fn replace<T>(dst: *mut T, mut src: T) -> T {
 #[rustc_const_unstable(feature = "const_ptr_read", issue = "80377")]
 #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
 pub const unsafe fn read<T>(src: *const T) -> T {
-    // We are calling the intrinsics directly to avoid function calls in the generated code
-    // as `intrinsics::copy_nonoverlapping` is a wrapper function.
-    #[cfg(bootstrap)]
-    extern "rust-intrinsic" {
-        #[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.63.0")]
-        fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
-    }
+    // It would be semantically correct to implement this via `copy_nonoverlapping`
+    // and `MaybeUninit`, as was done before PR #109035.
+
+    // However, it switched to intrinsic that lowers to `_0 = *src` in MIR in
+    // order to address a few implementation issues:
+    //
+    // - Using `MaybeUninit::assume_init` after a `copy_nonoverlapping` was not
+    //   turning the untyped copy into a typed load. As such, the generated
+    //   `load` in LLVM didn't get various metadata, such as `!range` (#73258),
+    //   `!nonnull`, and `!noundef`, resulting in poorer optimization.
+    // - Going through the extra local resulted in multiple extra copies, even
+    //   in optimized MIR.  (Ignoring StorageLive/Dead, the intrinsic is one
+    //   MIR statement, while the previous implementation was eight.)  LLVM
+    //   could sometimes optimize them away, but because `read` is at the core
+    //   of so many things, not having them in the first place improves what we
+    //   hand off to the backend.  For example, `mem::replace::<Big>` previously
+    //   emitted 4 `alloca` and 6 `memcpy`s, but is now 1 `alloc` and 3 `memcpy`s.
+    // - In general, this approach keeps us from getting any more bugs (like
+    //   #106369) that boil down to "`read(p)` is worse than `*p`", as this
+    //   makes them look identical to the backend (or other MIR consumers).
+    //
+    // Future enhancements to MIR optimizations might well allow this to return
+    // to the previous implementation, rather than using an intrinsic.
 
     // SAFETY: the caller must guarantee that `src` is valid for reads.
-    // `src` cannot overlap `tmp` because `tmp` was just allocated on
-    // the stack as a separate allocated object.
     unsafe {
         assert_unsafe_precondition!(
             "ptr::read requires that the pointer argument is aligned and non-null",
@@ -1154,14 +1168,21 @@ pub const unsafe fn read<T>(src: *const T) -> T {
 
         #[cfg(bootstrap)]
         {
+            // We are calling the intrinsics directly to avoid function calls in the
+            // generated code as `intrinsics::copy_nonoverlapping` is a wrapper function.
+            extern "rust-intrinsic" {
+                #[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.63.0")]
+                fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
+            }
+
+            // `src` cannot overlap `tmp` because `tmp` was just allocated on
+            // the stack as a separate allocated object.
             let mut tmp = MaybeUninit::<T>::uninit();
             copy_nonoverlapping(src, tmp.as_mut_ptr(), 1);
             tmp.assume_init()
         }
         #[cfg(not(bootstrap))]
         {
-            // This uses a dedicated intrinsic, not `copy_nonoverlapping`,
-            // so that it gets a *typed* copy, not an *untyped* one.
             crate::intrinsics::read_via_copy(src)
         }
     }