about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRalf Jung <post@ralfj.de>2022-08-27 14:54:02 -0400
committerRalf Jung <post@ralfj.de>2022-08-27 18:37:44 -0400
commit2e172473daefd24631faf3906bd411798d7d8a17 (patch)
tree4bbfce7ca26338ca0db6ac3821acafd58b479d5c
parente63a6257118effd270223ae38306013dfd477516 (diff)
downloadrust-2e172473daefd24631faf3906bd411798d7d8a17.tar.gz
rust-2e172473daefd24631faf3906bd411798d7d8a17.zip
interpret: make read-pointer-as-bytes *always* work in Miri
and show some extra information when it happens in CTFE
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs5
-rw-r--r--compiler/rustc_const_eval/src/const_eval/error.rs13
-rw-r--r--compiler/rustc_const_eval/src/const_eval/eval_queries.rs8
-rw-r--r--compiler/rustc_const_eval/src/interpret/intrinsics.rs21
-rw-r--r--compiler/rustc_const_eval/src/interpret/memory.rs24
-rw-r--r--compiler/rustc_const_eval/src/interpret/operand.rs2
-rw-r--r--compiler/rustc_const_eval/src/interpret/place.rs4
-rw-r--r--compiler/rustc_const_eval/src/interpret/projection.rs4
-rw-r--r--compiler/rustc_const_eval/src/interpret/validity.rs35
-rw-r--r--compiler/rustc_middle/src/mir/interpret/allocation.rs156
-rw-r--r--compiler/rustc_middle/src/mir/interpret/error.rs13
-rw-r--r--compiler/rustc_middle/src/mir/interpret/pointer.rs7
-rw-r--r--compiler/rustc_middle/src/mir/interpret/value.rs2
-rw-r--r--compiler/rustc_middle/src/mir/mod.rs2
-rw-r--r--compiler/rustc_middle/src/ty/print/pretty.rs2
15 files changed, 153 insertions, 145 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index a32b413d45f..1f358b1bbb9 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -186,7 +186,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                         let size = Size::from_bytes(
                             4 * ret_lane_count, /* size_of([u32; ret_lane_count]) */
                         );
-                        alloc.inner().get_bytes(fx, alloc_range(offset, size)).unwrap()
+                        alloc
+                            .inner()
+                            .get_bytes_strip_provenance(fx, alloc_range(offset, size))
+                            .unwrap()
                     }
                     _ => unreachable!("{:?}", idx_const),
                 };
diff --git a/compiler/rustc_const_eval/src/const_eval/error.rs b/compiler/rustc_const_eval/src/const_eval/error.rs
index eb81f43c3fe..63a3fef2e62 100644
--- a/compiler/rustc_const_eval/src/const_eval/error.rs
+++ b/compiler/rustc_const_eval/src/const_eval/error.rs
@@ -10,6 +10,7 @@ use rustc_span::{Span, Symbol};
 use super::InterpCx;
 use crate::interpret::{
     struct_error, ErrorHandled, FrameInfo, InterpError, InterpErrorInfo, Machine, MachineStopType,
+    UnsupportedOpInfo,
 };
 
 /// The CTFE machine has some custom error kinds.
@@ -153,6 +154,18 @@ impl<'tcx> ConstEvalErr<'tcx> {
             if let Some(span_msg) = span_msg {
                 err.span_label(self.span, span_msg);
             }
+            // Add some more context for select error types.
+            match self.error {
+                InterpError::Unsupported(
+                    UnsupportedOpInfo::ReadPointerAsBytes
+                    | UnsupportedOpInfo::PartialPointerOverwrite(_)
+                    | UnsupportedOpInfo::PartialPointerCopy(_),
+                ) => {
+                    err.help("this code performed an operation that depends on the underlying bytes representing a pointer");
+                    err.help("the absolute address of a pointer is not known at compile-time, so such operations are not supported");
+                }
+                _ => {}
+            }
             // Add spans for the stacktrace. Don't print a single-line backtrace though.
             if self.stacktrace.len() > 1 {
                 // Helper closure to print duplicated lines.
diff --git a/compiler/rustc_const_eval/src/const_eval/eval_queries.rs b/compiler/rustc_const_eval/src/const_eval/eval_queries.rs
index e13ad1c95bd..6a737d120e2 100644
--- a/compiler/rustc_const_eval/src/const_eval/eval_queries.rs
+++ b/compiler/rustc_const_eval/src/const_eval/eval_queries.rs
@@ -2,8 +2,8 @@ use super::{CompileTimeEvalContext, CompileTimeInterpreter, ConstEvalErr};
 use crate::interpret::eval_nullary_intrinsic;
 use crate::interpret::{
     intern_const_alloc_recursive, Allocation, ConstAlloc, ConstValue, CtfeValidationMode, GlobalId,
-    Immediate, InternKind, InterpCx, InterpResult, MPlaceTy, MemoryKind, OpTy, RefTracking,
-    StackPopCleanup,
+    Immediate, InternKind, InterpCx, InterpError, InterpResult, MPlaceTy, MemoryKind, OpTy,
+    RefTracking, StackPopCleanup,
 };
 
 use rustc_hir::def::DefKind;
@@ -385,7 +385,9 @@ pub fn eval_to_allocation_raw_provider<'tcx>(
                     ecx.tcx,
                     "it is undefined behavior to use this value",
                     |diag| {
-                        diag.note(NOTE_ON_UNDEFINED_BEHAVIOR_ERROR);
+                        if matches!(err.error, InterpError::UndefinedBehavior(_)) {
+                            diag.note(NOTE_ON_UNDEFINED_BEHAVIOR_ERROR);
+                        }
                         diag.note(&format!(
                             "the raw bytes of the constant ({}",
                             display_allocation(
diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs
index 6f3bd3bf4c5..a8ec8447f64 100644
--- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs
+++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs
@@ -687,10 +687,23 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?;
         assert!(!layout.is_unsized());
 
-        let lhs = self.read_pointer(lhs)?;
-        let rhs = self.read_pointer(rhs)?;
-        let lhs_bytes = self.read_bytes_ptr(lhs, layout.size)?;
-        let rhs_bytes = self.read_bytes_ptr(rhs, layout.size)?;
+        let get_bytes = |this: &InterpCx<'mir, 'tcx, M>,
+                         op: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
+                         size|
+         -> InterpResult<'tcx, &[u8]> {
+            let ptr = this.read_pointer(op)?;
+            let Some(alloc_ref) = self.get_ptr_alloc(ptr, size, Align::ONE)? else {
+                // zero-sized access
+                return Ok(&[]);
+            };
+            if alloc_ref.has_provenance() {
+                throw_ub_format!("`raw_eq` on bytes with provenance");
+            }
+            alloc_ref.get_bytes_strip_provenance()
+        };
+
+        let lhs_bytes = get_bytes(self, lhs, layout.size)?;
+        let rhs_bytes = get_bytes(self, rhs, layout.size)?;
         Ok(Scalar::from_bool(lhs_bytes == rhs_bytes))
     }
 }
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
index f84c6017dbf..69dbc9592fa 100644
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -953,10 +953,10 @@ impl<'tcx, 'a, Prov: Provenance, Extra> AllocRef<'a, 'tcx, Prov, Extra> {
     }
 
     /// `range` is relative to this allocation reference, not the base of the allocation.
-    pub fn check_bytes(&self, range: AllocRange) -> InterpResult<'tcx> {
+    pub fn get_bytes_strip_provenance<'b>(&'b self) -> InterpResult<'tcx, &'a [u8]> {
         Ok(self
             .alloc
-            .check_bytes(&self.tcx, self.range.subrange(range))
+            .get_bytes_strip_provenance(&self.tcx, self.range)
             .map_err(|e| e.to_interp_error(self.alloc_id))?)
     }
 
@@ -967,10 +967,11 @@ impl<'tcx, 'a, Prov: Provenance, Extra> AllocRef<'a, 'tcx, Prov, Extra> {
 }
 
 impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
-    /// Reads the given number of bytes from memory. Returns them as a slice.
+    /// Reads the given number of bytes from memory, and strips their provenance if possible.
+    /// Returns them as a slice.
     ///
     /// Performs appropriate bounds checks.
-    pub fn read_bytes_ptr(
+    pub fn read_bytes_ptr_strip_provenance(
         &self,
         ptr: Pointer<Option<M::Provenance>>,
         size: Size,
@@ -983,7 +984,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         // (We are staying inside the bounds here so all is good.)
         Ok(alloc_ref
             .alloc
-            .get_bytes(&alloc_ref.tcx, alloc_ref.range)
+            .get_bytes_strip_provenance(&alloc_ref.tcx, alloc_ref.range)
             .map_err(|e| e.to_interp_error(alloc_ref.alloc_id))?)
     }
 
@@ -1078,12 +1079,15 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             return Ok(());
         };
 
-        // This checks provenance edges on the src, which needs to happen before
+        // Checks provenance edges on the src, which needs to happen before
         // `prepare_provenance_copy`.
-        let src_bytes = src_alloc
-            .get_bytes_with_uninit_and_ptr(&tcx, src_range)
-            .map_err(|e| e.to_interp_error(src_alloc_id))?
-            .as_ptr(); // raw ptr, so we can also get a ptr to the destination allocation
+        if src_alloc.range_has_provenance(&tcx, alloc_range(src_range.start, Size::ZERO)) {
+            throw_unsup!(PartialPointerCopy(Pointer::new(src_alloc_id, src_range.start)));
+        }
+        if src_alloc.range_has_provenance(&tcx, alloc_range(src_range.end(), Size::ZERO)) {
+            throw_unsup!(PartialPointerCopy(Pointer::new(src_alloc_id, src_range.end())));
+        }
+        let src_bytes = src_alloc.get_bytes_unchecked(src_range).as_ptr(); // raw ptr, so we can also get a ptr to the destination allocation
         // first copy the provenance to a temporary buffer, because
         // `get_bytes_mut` will clear the provenance, which is correct,
         // since we don't want to keep any provenance at the target.
diff --git a/compiler/rustc_const_eval/src/interpret/operand.rs b/compiler/rustc_const_eval/src/interpret/operand.rs
index e80a82acd58..6ec9b174f82 100644
--- a/compiler/rustc_const_eval/src/interpret/operand.rs
+++ b/compiler/rustc_const_eval/src/interpret/operand.rs
@@ -415,7 +415,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
     /// Turn the wide MPlace into a string (must already be dereferenced!)
     pub fn read_str(&self, mplace: &MPlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx, &str> {
         let len = mplace.len(self)?;
-        let bytes = self.read_bytes_ptr(mplace.ptr, Size::from_bytes(len))?;
+        let bytes = self.read_bytes_ptr_strip_provenance(mplace.ptr, Size::from_bytes(len))?;
         let str = std::str::from_utf8(bytes).map_err(|err| err_ub!(InvalidStr(err)))?;
         Ok(str)
     }
diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs
index 7aa76fe1dae..d7841ba6670 100644
--- a/compiler/rustc_const_eval/src/interpret/place.rs
+++ b/compiler/rustc_const_eval/src/interpret/place.rs
@@ -2,8 +2,6 @@
 //! into a place.
 //! All high-level functions to write to memory work on places as destinations.
 
-use std::hash::Hash;
-
 use rustc_ast::Mutability;
 use rustc_middle::mir;
 use rustc_middle::ty;
@@ -290,7 +288,7 @@ impl<'tcx, Prov: Provenance> PlaceTy<'tcx, Prov> {
 // FIXME: Working around https://github.com/rust-lang/rust/issues/54385
 impl<'mir, 'tcx: 'mir, Prov, M> InterpCx<'mir, 'tcx, M>
 where
-    Prov: Provenance + Eq + Hash + 'static,
+    Prov: Provenance + 'static,
     M: Machine<'mir, 'tcx, Provenance = Prov>,
 {
     /// Take a value, which represents a (thin or wide) reference, and make it a place.
diff --git a/compiler/rustc_const_eval/src/interpret/projection.rs b/compiler/rustc_const_eval/src/interpret/projection.rs
index 742339f2b0a..a800553f34e 100644
--- a/compiler/rustc_const_eval/src/interpret/projection.rs
+++ b/compiler/rustc_const_eval/src/interpret/projection.rs
@@ -7,8 +7,6 @@
 //! but we still need to do bounds checking and adjust the layout. To not duplicate that with MPlaceTy, we actually
 //! implement the logic on OpTy, and MPlaceTy calls that.
 
-use std::hash::Hash;
-
 use rustc_middle::mir;
 use rustc_middle::ty;
 use rustc_middle::ty::layout::LayoutOf;
@@ -22,7 +20,7 @@ use super::{
 // FIXME: Working around https://github.com/rust-lang/rust/issues/54385
 impl<'mir, 'tcx: 'mir, Prov, M> InterpCx<'mir, 'tcx, M>
 where
-    Prov: Provenance + Eq + Hash + 'static,
+    Prov: Provenance + 'static,
     M: Machine<'mir, 'tcx, Provenance = Prov>,
 {
     //# Field access
diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs
index e1555f68737..ee55b4a0750 100644
--- a/compiler/rustc_const_eval/src/interpret/validity.rs
+++ b/compiler/rustc_const_eval/src/interpret/validity.rs
@@ -20,8 +20,8 @@ use rustc_target::abi::{Abi, Scalar as ScalarAbi, Size, VariantIdx, Variants, Wr
 use std::hash::Hash;
 
 use super::{
-    alloc_range, CheckInAllocMsg, GlobalAlloc, ImmTy, Immediate, InterpCx, InterpResult, MPlaceTy,
-    Machine, MemPlaceMeta, OpTy, Scalar, ValueVisitor,
+    CheckInAllocMsg, GlobalAlloc, ImmTy, Immediate, InterpCx, InterpResult, MPlaceTy, Machine,
+    MemPlaceMeta, OpTy, Scalar, ValueVisitor,
 };
 
 macro_rules! throw_validation_failure {
@@ -312,7 +312,6 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
         Ok(try_validation!(
             self.ecx.read_immediate(op),
             self.path,
-            err_unsup!(ReadPointerAsBytes) => { "(potentially part of) a pointer" } expected { "{expected}" },
             err_ub!(InvalidUninitBytes(None)) => { "uninitialized memory" } expected { "{expected}" }
         ))
     }
@@ -345,11 +344,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
                 // FIXME: check if the type/trait match what ty::Dynamic says?
             }
             ty::Slice(..) | ty::Str => {
-                let _len = try_validation!(
-                    meta.unwrap_meta().to_machine_usize(self.ecx),
-                    self.path,
-                    err_unsup!(ReadPointerAsBytes) => { "non-integer slice length in wide pointer" },
-                );
+                let _len = meta.unwrap_meta().to_machine_usize(self.ecx)?;
                 // We do not check that `len * elem_size <= isize::MAX`:
                 // that is only required for references, and there it falls out of the
                 // "dereferenceable" check performed by Stacked Borrows.
@@ -669,8 +664,6 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M>
                     { "{:x}", val } expected { "a valid enum tag" },
                 err_ub!(InvalidUninitBytes(None)) =>
                     { "uninitialized bytes" } expected { "a valid enum tag" },
-                err_unsup!(ReadPointerAsBytes) =>
-                    { "a pointer" } expected { "a valid enum tag" },
             )
             .1)
         })
@@ -810,10 +803,9 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M>
                 let mplace = op.assert_mem_place(); // strings are unsized and hence never immediate
                 let len = mplace.len(self.ecx)?;
                 try_validation!(
-                    self.ecx.read_bytes_ptr(mplace.ptr, Size::from_bytes(len)),
+                    self.ecx.read_bytes_ptr_strip_provenance(mplace.ptr, Size::from_bytes(len)),
                     self.path,
                     err_ub!(InvalidUninitBytes(..)) => { "uninitialized data in `str`" },
-                    err_unsup!(ReadPointerAsBytes) => { "a pointer in `str`" },
                 );
             }
             ty::Array(tys, ..) | ty::Slice(tys)
@@ -861,9 +853,9 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M>
                 // We also accept uninit, for consistency with the slow path.
                 let alloc = self.ecx.get_ptr_alloc(mplace.ptr, size, mplace.align)?.expect("we already excluded size 0");
 
-                match alloc.check_bytes(alloc_range(Size::ZERO, size)) {
+                match alloc.get_bytes_strip_provenance() {
                     // In the happy case, we needn't check anything else.
-                    Ok(()) => {}
+                    Ok(_) => {}
                     // Some error happened, try to provide a more detailed description.
                     Err(err) => {
                         // For some errors we might be able to provide extra information.
@@ -881,9 +873,6 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M>
 
                                 throw_validation_failure!(self.path, { "uninitialized bytes" })
                             }
-                            err_unsup!(ReadPointerAsBytes) => {
-                                throw_validation_failure!(self.path, { "a pointer" } expected { "plain (non-pointer) bytes" })
-                            }
 
                             // Propagate upwards (that will also check for unexpected errors).
                             _ => return Err(err),
@@ -924,14 +913,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             Ok(()) => Ok(()),
             // Pass through validation failures.
             Err(err) if matches!(err.kind(), err_ub!(ValidationFailure { .. })) => Err(err),
-            // Also pass through InvalidProgram, those just indicate that we could not
-            // validate and each caller will know best what to do with them.
-            Err(err) if matches!(err.kind(), InterpError::InvalidProgram(_)) => Err(err),
-            // Avoid other errors as those do not show *where* in the value the issue lies.
-            Err(err) => {
+            // Complain about any other kind of UB error -- those are bad because we'd like to
+            // report them in a way that shows *where* in the value the issue lies.
+            Err(err) if matches!(err.kind(), InterpError::UndefinedBehavior(_)) => {
                 err.print_backtrace();
-                bug!("Unexpected error during validation: {}", err);
+                bug!("Unexpected Undefined Behavior error during validation: {}", err);
             }
+            // Pass through everything else.
+            Err(err) => Err(err),
         }
     }
 
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs
index cc39e434225..37ec04b07f8 100644
--- a/compiler/rustc_middle/src/mir/interpret/allocation.rs
+++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs
@@ -130,6 +130,8 @@ pub enum AllocError {
     ReadPointerAsBytes,
     /// Partially overwriting a pointer.
     PartialPointerOverwrite(Size),
+    /// Partially copying a pointer.
+    PartialPointerCopy(Size),
     /// Using uninitialized data where it is not allowed.
     InvalidUninitBytes(Option<UninitBytesAccess>),
 }
@@ -152,6 +154,9 @@ impl AllocError {
             PartialPointerOverwrite(offset) => InterpError::Unsupported(
                 UnsupportedOpInfo::PartialPointerOverwrite(Pointer::new(alloc_id, offset)),
             ),
+            PartialPointerCopy(offset) => InterpError::Unsupported(
+                UnsupportedOpInfo::PartialPointerCopy(Pointer::new(alloc_id, offset)),
+            ),
             InvalidUninitBytes(info) => InterpError::UndefinedBehavior(
                 UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))),
             ),
@@ -322,62 +327,35 @@ impl<Prov, Extra> Allocation<Prov, Extra> {
 /// Byte accessors.
 impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
     /// This is the entirely abstraction-violating way to just grab the raw bytes without
-    /// caring about provenance. It just deduplicates some code between `read_scalar`
-    /// and `get_bytes_internal`.
-    fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
-        &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
-    }
-
-    /// The last argument controls whether we error out when there are uninitialized or pointer
-    /// bytes. However, we *always* error when there is provenance overlapping the edges of the
-    /// range.
-    ///
-    /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
+    /// caring about provenance or initialization.
     ///
     /// This function also guarantees that the resulting pointer will remain stable
     /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
     /// on that.
-    ///
-    /// It is the caller's responsibility to check bounds and alignment beforehand.
-    fn get_bytes_internal(
-        &self,
-        cx: &impl HasDataLayout,
-        range: AllocRange,
-        check_init_and_ptr: bool,
-    ) -> AllocResult<&[u8]> {
-        if check_init_and_ptr {
-            self.check_init(range)?;
-            self.check_provenance(cx, range)?;
-        } else {
-            // We still don't want provenance on the *edges*.
-            self.check_provenance_edges(cx, range)?;
-        }
-
-        Ok(self.get_bytes_even_more_internal(range))
+    #[inline]
+    pub fn get_bytes_unchecked(&self, range: AllocRange) -> &[u8] {
+        &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
     }
 
-    /// Checks that these bytes are initialized and not pointer bytes, and then return them
-    /// as a slice.
+    /// Checks that these bytes are initialized, and then strip provenance (if possible) and return
+    /// them.
     ///
     /// It is the caller's responsibility to check bounds and alignment beforehand.
     /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
     /// on `InterpCx` instead.
     #[inline]
-    pub fn get_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult<&[u8]> {
-        self.get_bytes_internal(cx, range, true)
-    }
-
-    /// It is the caller's responsibility to handle uninitialized and pointer bytes.
-    /// However, this still checks that there is no provenance on the *edges*.
-    ///
-    /// It is the caller's responsibility to check bounds and alignment beforehand.
-    #[inline]
-    pub fn get_bytes_with_uninit_and_ptr(
+    pub fn get_bytes_strip_provenance(
         &self,
         cx: &impl HasDataLayout,
         range: AllocRange,
     ) -> AllocResult<&[u8]> {
-        self.get_bytes_internal(cx, range, false)
+        self.check_init(range)?;
+        if !Prov::OFFSET_IS_ADDR {
+            if self.range_has_provenance(cx, range) {
+                return Err(AllocError::ReadPointerAsBytes);
+            }
+        }
+        Ok(self.get_bytes_unchecked(range))
     }
 
     /// Just calling this already marks everything as defined and removes provenance,
@@ -415,13 +393,6 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
 
 /// Reading and writing.
 impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
-    /// Validates that this memory range is initiailized and contains no provenance.
-    pub fn check_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
-        // This implicitly does all the checking we are asking for.
-        self.get_bytes(cx, range)?;
-        Ok(())
-    }
-
     /// Reads a *non-ZST* scalar.
     ///
     /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
@@ -438,43 +409,53 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
         range: AllocRange,
         read_provenance: bool,
     ) -> AllocResult<Scalar<Prov>> {
-        if read_provenance {
-            assert_eq!(range.size, cx.data_layout().pointer_size);
-        }
-
         // First and foremost, if anything is uninit, bail.
         if self.is_init(range).is_err() {
             return Err(AllocError::InvalidUninitBytes(None));
         }
 
-        // If we are doing a pointer read, and there is provenance exactly where we
-        // are reading, then we can put data and provenance back together and return that.
-        if read_provenance && let Some(&prov) = self.provenance.get(&range.start) {
-            // We already checked init and provenance, so we can use this function.
-            let bytes = self.get_bytes_even_more_internal(range);
-            let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
-            let ptr = Pointer::new(prov, Size::from_bytes(bits));
-            return Ok(Scalar::from_pointer(ptr, cx));
-        }
+        // Get the integer part of the result. We HAVE TO check provenance before returning this!
+        let bytes = self.get_bytes_unchecked(range);
+        let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
 
-        // If we are *not* reading a pointer, and we can just ignore provenance,
-        // then do exactly that.
-        if !read_provenance && Prov::OFFSET_IS_ADDR {
-            // We just strip provenance.
-            let bytes = self.get_bytes_even_more_internal(range);
-            let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
-            return Ok(Scalar::from_uint(bits, range.size));
+        if read_provenance {
+            assert_eq!(range.size, cx.data_layout().pointer_size);
+
+            // When reading data with provenance, the easy case is finding provenance exactly where we
+            // are reading, then we can put data and provenance back together and return that.
+            if let Some(&prov) = self.provenance.get(&range.start) {
+                // Now we can return the bits, with their appropriate provenance.
+                let ptr = Pointer::new(prov, Size::from_bytes(bits));
+                return Ok(Scalar::from_pointer(ptr, cx));
+            }
+
+            // If we can work on pointers byte-wise, join the byte-wise provenances.
+            if Prov::OFFSET_IS_ADDR {
+                let mut prov = self.offset_get_provenance(cx, range.start);
+                for offset in 1..range.size.bytes() {
+                    let this_prov =
+                        self.offset_get_provenance(cx, range.start + Size::from_bytes(offset));
+                    prov = Prov::join(prov, this_prov);
+                }
+                // Now use this provenance.
+                let ptr = Pointer::new(prov, Size::from_bytes(bits));
+                return Ok(Scalar::from_maybe_pointer(ptr, cx));
+            }
+        } else {
+            // We are *not* reading a pointer.
+            // If we can just ignore provenance, do exactly that.
+            if Prov::OFFSET_IS_ADDR {
+                // We just strip provenance.
+                return Ok(Scalar::from_uint(bits, range.size));
+            }
         }
 
-        // It's complicated. Better make sure there is no provenance anywhere.
-        // FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
-        // `read_pointer` is true and we ideally would distinguish the following two cases:
-        // - The entire `range` is covered by the same provenance, stored in two separate entries of
-        //   the provenance map. Then we should return a pointer with that provenance.
-        // - The range has inhomogeneous provenance. Then we should return just the
-        //   underlying bits.
-        let bytes = self.get_bytes(cx, range)?;
-        let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
+        // Fallback path for when we cannot treat provenance bytewise or ignore it.
+        assert!(!Prov::OFFSET_IS_ADDR);
+        if self.range_has_provenance(cx, range) {
+            return Err(AllocError::ReadPointerAsBytes);
+        }
+        // There is no provenance, we can just return the bits.
         Ok(Scalar::from_uint(bits, range.size))
     }
 
@@ -534,6 +515,13 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> {
         self.provenance.range(Size::from_bytes(start)..range.end())
     }
 
+    /// Get the provenance of a single byte.
+    fn offset_get_provenance(&self, cx: &impl HasDataLayout, offset: Size) -> Option<Prov> {
+        let prov = self.range_get_provenance(cx, alloc_range(offset, Size::from_bytes(1)));
+        assert!(prov.len() <= 1);
+        prov.first().map(|(_offset, prov)| *prov)
+    }
+
     /// Returns whether this allocation has progrnance overlapping with the given range.
     ///
     /// Note: this function exists to allow `range_get_provenance` to be private, in order to somewhat
@@ -543,12 +531,6 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> {
         !self.range_get_provenance(cx, range).is_empty()
     }
 
-    /// Checks that there is no provenance overlapping with the given range.
-    #[inline(always)]
-    fn check_provenance(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
-        if self.range_has_provenance(cx, range) { Err(AllocError::ReadPointerAsBytes) } else { Ok(()) }
-    }
-
     /// Removes all provenance inside the given range.
     /// If there is provenance overlapping with the edges, it
     /// are removed as well *and* the bytes they cover are marked as
@@ -606,14 +588,6 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> {
 
         Ok(())
     }
-
-    /// Errors if there is provenance overlapping with the edges of the given memory range.
-    #[inline]
-    fn check_provenance_edges(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
-        self.check_provenance(cx, alloc_range(range.start, Size::ZERO))?;
-        self.check_provenance(cx, alloc_range(range.end(), Size::ZERO))?;
-        Ok(())
-    }
 }
 
 /// Stores the provenance information of pointers stored in memory.
diff --git a/compiler/rustc_middle/src/mir/interpret/error.rs b/compiler/rustc_middle/src/mir/interpret/error.rs
index cecb55578d3..e4039cc7c68 100644
--- a/compiler/rustc_middle/src/mir/interpret/error.rs
+++ b/compiler/rustc_middle/src/mir/interpret/error.rs
@@ -401,14 +401,18 @@ impl fmt::Display for UndefinedBehaviorInfo {
 pub enum UnsupportedOpInfo {
     /// Free-form case. Only for errors that are never caught!
     Unsupported(String),
-    /// Encountered a pointer where we needed raw bytes.
-    ReadPointerAsBytes,
     /// Overwriting parts of a pointer; the resulting state cannot be represented in our
     /// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>.
     PartialPointerOverwrite(Pointer<AllocId>),
+    /// Attempting to `copy` parts of a pointer to somewhere else; the resulting state cannot be
+    /// represented in our `Allocation` data structure. See
+    /// <https://github.com/rust-lang/miri/issues/2181>.
+    PartialPointerCopy(Pointer<AllocId>),
     //
     // The variants below are only reachable from CTFE/const prop, miri will never emit them.
     //
+    /// Encountered a pointer where we needed raw bytes.
+    ReadPointerAsBytes,
     /// Accessing thread local statics
     ThreadLocalStatic(DefId),
     /// Accessing an unsupported extern static.
@@ -420,10 +424,13 @@ impl fmt::Display for UnsupportedOpInfo {
         use UnsupportedOpInfo::*;
         match self {
             Unsupported(ref msg) => write!(f, "{msg}"),
-            ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"),
             PartialPointerOverwrite(ptr) => {
                 write!(f, "unable to overwrite parts of a pointer in memory at {ptr:?}")
             }
+            PartialPointerCopy(ptr) => {
+                write!(f, "unable to copy parts of a pointer from memory at {ptr:?}")
+            }
+            ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"),
             ThreadLocalStatic(did) => write!(f, "cannot access thread local static ({did:?})"),
             ReadExternStatic(did) => write!(f, "cannot read from extern static ({did:?})"),
         }
diff --git a/compiler/rustc_middle/src/mir/interpret/pointer.rs b/compiler/rustc_middle/src/mir/interpret/pointer.rs
index 384954cbbd5..5fa802236ed 100644
--- a/compiler/rustc_middle/src/mir/interpret/pointer.rs
+++ b/compiler/rustc_middle/src/mir/interpret/pointer.rs
@@ -125,6 +125,9 @@ pub trait Provenance: Copy + fmt::Debug {
     /// Otherwise this function is best-effort (but must agree with `Machine::ptr_get_alloc`).
     /// (Identifying the offset in that allocation, however, is harder -- use `Memory::ptr_get_alloc` for that.)
     fn get_alloc_id(self) -> Option<AllocId>;
+
+    /// Defines the 'join' of provenance: what happens when doing a pointer load and different bytes have different provenance.
+    fn join(left: Option<Self>, right: Option<Self>) -> Option<Self>;
 }
 
 impl Provenance for AllocId {
@@ -152,6 +155,10 @@ impl Provenance for AllocId {
     fn get_alloc_id(self) -> Option<AllocId> {
         Some(self)
     }
+
+    fn join(_left: Option<Self>, _right: Option<Self>) -> Option<Self> {
+        panic!("merging provenance is not supported when `OFFSET_IS_ADDR` is false")
+    }
 }
 
 /// Represents a pointer in the Miri engine.
diff --git a/compiler/rustc_middle/src/mir/interpret/value.rs b/compiler/rustc_middle/src/mir/interpret/value.rs
index 1ba16025e32..d4fad7f1ecd 100644
--- a/compiler/rustc_middle/src/mir/interpret/value.rs
+++ b/compiler/rustc_middle/src/mir/interpret/value.rs
@@ -507,7 +507,7 @@ pub fn get_slice_bytes<'tcx>(cx: &impl HasDataLayout, val: ConstValue<'tcx>) ->
     if let ConstValue::Slice { data, start, end } = val {
         let len = end - start;
         data.inner()
-            .get_bytes(
+            .get_bytes_strip_provenance(
                 cx,
                 AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(len) },
             )
diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs
index 4e886ff1592..75327cff368 100644
--- a/compiler/rustc_middle/src/mir/mod.rs
+++ b/compiler/rustc_middle/src/mir/mod.rs
@@ -2719,7 +2719,7 @@ fn pretty_print_const_value<'tcx>(
                 let n = n.kind().try_to_bits(tcx.data_layout.pointer_size).unwrap();
                 // cast is ok because we already checked for pointer size (32 or 64 bit) above
                 let range = AllocRange { start: offset, size: Size::from_bytes(n) };
-                let byte_str = alloc.inner().get_bytes(&tcx, range).unwrap();
+                let byte_str = alloc.inner().get_bytes_strip_provenance(&tcx, range).unwrap();
                 fmt.write_str("*")?;
                 pretty_print_byte_str(fmt, byte_str)?;
                 return Ok(());
diff --git a/compiler/rustc_middle/src/ty/print/pretty.rs b/compiler/rustc_middle/src/ty/print/pretty.rs
index cc55b7e8611..2dfbcb7fa31 100644
--- a/compiler/rustc_middle/src/ty/print/pretty.rs
+++ b/compiler/rustc_middle/src/ty/print/pretty.rs
@@ -1275,7 +1275,7 @@ pub trait PrettyPrinter<'tcx>:
                                     let range =
                                         AllocRange { start: offset, size: Size::from_bytes(len) };
                                     if let Ok(byte_str) =
-                                        alloc.inner().get_bytes(&self.tcx(), range)
+                                        alloc.inner().get_bytes_strip_provenance(&self.tcx(), range)
                                     {
                                         p!(pretty_print_byte_str(byte_str))
                                     } else {