about summary refs log tree commit diff
diff options
context:
space:
mode:
authorDylan DPC <99973273+Dylan-DPC@users.noreply.github.com>2022-04-19 22:57:43 +0200
committerGitHub <noreply@github.com>2022-04-19 22:57:43 +0200
commit9d9d5910afb7e94a995def430ce7b9e55c36897d (patch)
treebef355e33e3dd0b65b86a076458ae67b1aae6d47
parentf7d8f5b1e19f0f8cb45f8085ab3a3933359aa4d5 (diff)
parent55f0977a6baee4a3469826a45ee1308a65498129 (diff)
downloadrust-9d9d5910afb7e94a995def430ce7b9e55c36897d.tar.gz
rust-9d9d5910afb7e94a995def430ce7b9e55c36897d.zip
Rollup merge of #96165 - RalfJung:miri-provenance-cleanup, r=oli-obk
Miri provenance cleanup

Reviewing https://github.com/rust-lang/rust/pull/95826 by ``@carbotaniuman`` made me realize that we could clean things up a little here.

``@carbotaniuman`` please let me know if you're okay with landing this (it will create a lot of conflicts with your PR), or if you'd prefer incorporating the ideas from this PR into yours. I think we want to end up in a situation where the function you called `ptr_reify_alloc` returns just two things, a concrete tag and an offset. Getting an `AllocId` from a concrete tag should be infallible like now. However a concrete tag and `Tag` don't have to be the same type.

r? ``@oli-obk``
-rw-r--r--compiler/rustc_const_eval/src/const_eval/mod.rs2
-rw-r--r--compiler/rustc_const_eval/src/interpret/machine.rs28
-rw-r--r--compiler/rustc_const_eval/src/interpret/memory.rs73
-rw-r--r--compiler/rustc_const_eval/src/interpret/validity.rs2
-rw-r--r--compiler/rustc_middle/src/mir/interpret/pointer.rs17
5 files changed, 76 insertions, 46 deletions
diff --git a/compiler/rustc_const_eval/src/const_eval/mod.rs b/compiler/rustc_const_eval/src/const_eval/mod.rs
index 80270f82563..68f9bee593f 100644
--- a/compiler/rustc_const_eval/src/const_eval/mod.rs
+++ b/compiler/rustc_const_eval/src/const_eval/mod.rs
@@ -38,7 +38,7 @@ pub(crate) fn const_caller_location(
     if intern_const_alloc_recursive(&mut ecx, InternKind::Constant, &loc_place).is_err() {
         bug!("intern_const_alloc_recursive should not error in this case")
     }
-    ConstValue::Scalar(Scalar::from_pointer(loc_place.ptr.into_pointer_or_addr().unwrap(), &tcx))
+    ConstValue::Scalar(Scalar::from_maybe_pointer(loc_place.ptr, &tcx))
 }
 
 /// Convert an evaluated constant to a type level constant
diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs
index ddfbcbdd22e..7721485771b 100644
--- a/compiler/rustc_const_eval/src/interpret/machine.rs
+++ b/compiler/rustc_const_eval/src/interpret/machine.rs
@@ -88,6 +88,10 @@ pub trait Machine<'mir, 'tcx>: Sized {
     /// Pointers are "tagged" with provenance information; typically the `AllocId` they belong to.
     type PointerTag: Provenance + Eq + Hash + 'static;
 
+    /// When getting the AllocId of a pointer, some extra data is also obtained from the tag
+    /// that is passed to memory access hooks so they can do things with it.
+    type TagExtra: Copy + 'static;
+
     /// Machines can define extra (non-instance) things that represent values of function pointers.
     /// For example, Miri uses this to return a function pointer from `dlsym`
     /// that can later be called to execute the right thing.
@@ -122,6 +126,8 @@ pub trait Machine<'mir, 'tcx>: Sized {
 
     /// Whether, when checking alignment, we should `force_int` and thus support
     /// custom alignment logic based on whatever the integer address happens to be.
+    ///
+    /// Requires PointerTag::OFFSET_IS_ADDR to be true.
     fn force_int_for_alignment_check(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool;
 
     /// Whether to enforce the validity invariant
@@ -285,11 +291,14 @@ pub trait Machine<'mir, 'tcx>: Sized {
         addr: u64,
     ) -> Pointer<Option<Self::PointerTag>>;
 
-    /// Convert a pointer with provenance into an allocation-offset pair.
+    /// Convert a pointer with provenance into an allocation-offset pair
+    /// and extra provenance info.
+    ///
+    /// The returned `AllocId` must be the same as `ptr.provenance.get_alloc_id()`.
     fn ptr_get_alloc(
         ecx: &InterpCx<'mir, 'tcx, Self>,
         ptr: Pointer<Self::PointerTag>,
-    ) -> (AllocId, Size);
+    ) -> (AllocId, Size, Self::TagExtra);
 
     /// Called to initialize the "extra" state of an allocation and make the pointers
     /// it contains (in relocations) tagged.  The way we construct allocations is
@@ -321,7 +330,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
         _tcx: TyCtxt<'tcx>,
         _machine: &Self,
         _alloc_extra: &Self::AllocExtra,
-        _tag: Self::PointerTag,
+        _tag: (AllocId, Self::TagExtra),
         _range: AllocRange,
     ) -> InterpResult<'tcx> {
         Ok(())
@@ -333,7 +342,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
         _tcx: TyCtxt<'tcx>,
         _machine: &mut Self,
         _alloc_extra: &mut Self::AllocExtra,
-        _tag: Self::PointerTag,
+        _tag: (AllocId, Self::TagExtra),
         _range: AllocRange,
     ) -> InterpResult<'tcx> {
         Ok(())
@@ -345,7 +354,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
         _tcx: TyCtxt<'tcx>,
         _machine: &mut Self,
         _alloc_extra: &mut Self::AllocExtra,
-        _tag: Self::PointerTag,
+        _tag: (AllocId, Self::TagExtra),
         _range: AllocRange,
     ) -> InterpResult<'tcx> {
         Ok(())
@@ -397,6 +406,8 @@ pub trait Machine<'mir, 'tcx>: Sized {
 // (CTFE and ConstProp) use the same instance.  Here, we share that code.
 pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
     type PointerTag = AllocId;
+    type TagExtra = ();
+
     type ExtraFnVal = !;
 
     type MemoryMap =
@@ -474,9 +485,12 @@ pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
     }
 
     #[inline(always)]
-    fn ptr_get_alloc(_ecx: &InterpCx<$mir, $tcx, Self>, ptr: Pointer<AllocId>) -> (AllocId, Size) {
+    fn ptr_get_alloc(
+        _ecx: &InterpCx<$mir, $tcx, Self>,
+        ptr: Pointer<AllocId>,
+    ) -> (AllocId, Size, Self::TagExtra) {
         // We know `offset` is relative to the allocation, so we can use `into_parts`.
         let (alloc_id, offset) = ptr.into_parts();
-        (alloc_id, offset)
+        (alloc_id, offset, ())
     }
 }
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
index 9ae50d0df80..e8ee0fe6ea6 100644
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -158,8 +158,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         &self,
         ptr: Pointer<AllocId>,
     ) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
-        // We know `offset` is relative to the allocation, so we can use `into_parts`.
-        let (alloc_id, offset) = ptr.into_parts();
+        let alloc_id = ptr.provenance;
         // We need to handle `extern static`.
         match self.tcx.get_global_alloc(alloc_id) {
             Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => {
@@ -171,7 +170,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             _ => {}
         }
         // And we need to get the tag.
-        Ok(M::tag_alloc_base_pointer(self, Pointer::new(alloc_id, offset)))
+        Ok(M::tag_alloc_base_pointer(self, ptr))
     }
 
     pub fn create_fn_alloc_ptr(
@@ -238,7 +237,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         new_align: Align,
         kind: MemoryKind<M::MemoryKind>,
     ) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
-        let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
+        let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
         if offset.bytes() != 0 {
             throw_ub_format!(
                 "reallocating {:?} which does not point to the beginning of an object",
@@ -255,14 +254,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         };
         // This will also call the access hooks.
         self.mem_copy(
-            ptr.into(),
+            ptr,
             Align::ONE,
             new_ptr.into(),
             Align::ONE,
             old_size.min(new_size),
             /*nonoverlapping*/ true,
         )?;
-        self.deallocate_ptr(ptr.into(), old_size_and_align, kind)?;
+        self.deallocate_ptr(ptr, old_size_and_align, kind)?;
 
         Ok(new_ptr)
     }
@@ -274,7 +273,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         old_size_and_align: Option<(Size, Align)>,
         kind: MemoryKind<M::MemoryKind>,
     ) -> InterpResult<'tcx> {
-        let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
+        let (alloc_id, offset, tag) = self.ptr_get_alloc_id(ptr)?;
         trace!("deallocating: {}", alloc_id);
 
         if offset.bytes() != 0 {
@@ -330,7 +329,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             *self.tcx,
             &mut self.machine,
             &mut alloc.extra,
-            ptr.provenance,
+            (alloc_id, tag),
             alloc_range(Size::ZERO, size),
         )?;
 
@@ -350,17 +349,17 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         ptr: Pointer<Option<M::PointerTag>>,
         size: Size,
         align: Align,
-    ) -> InterpResult<'tcx, Option<(AllocId, Size, Pointer<M::PointerTag>)>> {
+    ) -> InterpResult<'tcx, Option<(AllocId, Size, M::TagExtra)>> {
         let align = M::enforce_alignment(&self).then_some(align);
         self.check_and_deref_ptr(
             ptr,
             size,
             align,
             CheckInAllocMsg::MemoryAccessTest,
-            |alloc_id, offset, ptr| {
+            |alloc_id, offset, tag| {
                 let (size, align) =
                     self.get_alloc_size_and_align(alloc_id, AllocCheck::Dereferenceable)?;
-                Ok((size, align, (alloc_id, offset, ptr)))
+                Ok((size, align, (alloc_id, offset, tag)))
             },
         )
     }
@@ -401,11 +400,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         size: Size,
         align: Option<Align>,
         msg: CheckInAllocMsg,
-        alloc_size: impl FnOnce(
-            AllocId,
-            Size,
-            Pointer<M::PointerTag>,
-        ) -> InterpResult<'tcx, (Size, Align, T)>,
+        alloc_size: impl FnOnce(AllocId, Size, M::TagExtra) -> InterpResult<'tcx, (Size, Align, T)>,
     ) -> InterpResult<'tcx, Option<T>> {
         fn check_offset_align(offset: u64, align: Align) -> InterpResult<'static> {
             if offset % align.bytes() == 0 {
@@ -433,8 +428,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
                 }
                 None
             }
-            Ok((alloc_id, offset, ptr)) => {
-                let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, ptr)?;
+            Ok((alloc_id, offset, tag)) => {
+                let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, tag)?;
                 // Test bounds. This also ensures non-null.
                 // It is sufficient to check this for the end pointer. Also check for overflow!
                 if offset.checked_add(size, &self.tcx).map_or(true, |end| end > alloc_size) {
@@ -450,10 +445,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
                 // we want the error to be about the bounds.
                 if let Some(align) = align {
                     if M::force_int_for_alignment_check(self) {
-                        let addr = Scalar::from_pointer(ptr, &self.tcx)
-                            .to_machine_usize(&self.tcx)
-                            .expect("ptr-to-int cast for align check should never fail");
-                        check_offset_align(addr, align)?;
+                        // `force_int_for_alignment_check` can only be true if `OFFSET_IS_ADDR` is true.
+                        check_offset_align(ptr.addr().bytes(), align)?;
                     } else {
                         // Check allocation alignment and offset alignment.
                         if alloc_align.bytes() < align.bytes() {
@@ -569,14 +562,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             size,
             align,
             CheckInAllocMsg::MemoryAccessTest,
-            |alloc_id, offset, ptr| {
+            |alloc_id, offset, tag| {
                 let alloc = self.get_alloc_raw(alloc_id)?;
-                Ok((alloc.size(), alloc.align, (alloc_id, offset, ptr, alloc)))
+                Ok((alloc.size(), alloc.align, (alloc_id, offset, tag, alloc)))
             },
         )?;
-        if let Some((alloc_id, offset, ptr, alloc)) = ptr_and_alloc {
+        if let Some((alloc_id, offset, tag, alloc)) = ptr_and_alloc {
             let range = alloc_range(offset, size);
-            M::memory_read(*self.tcx, &self.machine, &alloc.extra, ptr.provenance, range)?;
+            M::memory_read(*self.tcx, &self.machine, &alloc.extra, (alloc_id, tag), range)?;
             Ok(Some(AllocRef { alloc, range, tcx: *self.tcx, alloc_id }))
         } else {
             // Even in this branch we have to be sure that we actually access the allocation, in
@@ -631,13 +624,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         align: Align,
     ) -> InterpResult<'tcx, Option<AllocRefMut<'a, 'tcx, M::PointerTag, M::AllocExtra>>> {
         let parts = self.get_ptr_access(ptr, size, align)?;
-        if let Some((alloc_id, offset, ptr)) = parts {
+        if let Some((alloc_id, offset, tag)) = parts {
             let tcx = *self.tcx;
             // FIXME: can we somehow avoid looking up the allocation twice here?
             // We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
             let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
             let range = alloc_range(offset, size);
-            M::memory_written(tcx, machine, &mut alloc.extra, ptr.provenance, range)?;
+            M::memory_written(tcx, machine, &mut alloc.extra, (alloc_id, tag), range)?;
             Ok(Some(AllocRefMut { alloc, range, tcx, alloc_id }))
         } else {
             Ok(None)
@@ -732,7 +725,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         ptr: Pointer<Option<M::PointerTag>>,
     ) -> InterpResult<'tcx, FnVal<'tcx, M::ExtraFnVal>> {
         trace!("get_fn({:?})", ptr);
-        let (alloc_id, offset, _ptr) = self.ptr_get_alloc_id(ptr)?;
+        let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
         if offset.bytes() != 0 {
             throw_ub!(InvalidFunctionPointer(Pointer::new(alloc_id, offset)))
         }
@@ -1012,16 +1005,16 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         // and once below to get the underlying `&[mut] Allocation`.
 
         // Source alloc preparations and access hooks.
-        let Some((src_alloc_id, src_offset, src)) = src_parts else {
+        let Some((src_alloc_id, src_offset, src_tag)) = src_parts else {
             // Zero-sized *source*, that means dst is also zero-sized and we have nothing to do.
             return Ok(());
         };
         let src_alloc = self.get_alloc_raw(src_alloc_id)?;
         let src_range = alloc_range(src_offset, size);
-        M::memory_read(*tcx, &self.machine, &src_alloc.extra, src.provenance, src_range)?;
+        M::memory_read(*tcx, &self.machine, &src_alloc.extra, (src_alloc_id, src_tag), src_range)?;
         // We need the `dest` ptr for the next operation, so we get it now.
         // We already did the source checks and called the hooks so we are good to return early.
-        let Some((dest_alloc_id, dest_offset, dest)) = dest_parts else {
+        let Some((dest_alloc_id, dest_offset, dest_tag)) = dest_parts else {
             // Zero-sized *destination*.
             return Ok(());
         };
@@ -1043,7 +1036,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         // Destination alloc preparations and access hooks.
         let (dest_alloc, extra) = self.get_alloc_raw_mut(dest_alloc_id)?;
         let dest_range = alloc_range(dest_offset, size * num_copies);
-        M::memory_written(*tcx, extra, &mut dest_alloc.extra, dest.provenance, dest_range)?;
+        M::memory_written(
+            *tcx,
+            extra,
+            &mut dest_alloc.extra,
+            (dest_alloc_id, dest_tag),
+            dest_range,
+        )?;
         let dest_bytes = dest_alloc
             .get_bytes_mut_ptr(&tcx, dest_range)
             .map_err(|e| e.to_interp_error(dest_alloc_id))?
@@ -1164,11 +1163,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
     pub fn ptr_try_get_alloc_id(
         &self,
         ptr: Pointer<Option<M::PointerTag>>,
-    ) -> Result<(AllocId, Size, Pointer<M::PointerTag>), u64> {
+    ) -> Result<(AllocId, Size, M::TagExtra), u64> {
         match ptr.into_pointer_or_addr() {
             Ok(ptr) => {
-                let (alloc_id, offset) = M::ptr_get_alloc(self, ptr);
-                Ok((alloc_id, offset, ptr))
+                let (alloc_id, offset, extra) = M::ptr_get_alloc(self, ptr);
+                Ok((alloc_id, offset, extra))
             }
             Err(addr) => Err(addr.bytes()),
         }
@@ -1179,7 +1178,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
     pub fn ptr_get_alloc_id(
         &self,
         ptr: Pointer<Option<M::PointerTag>>,
-    ) -> InterpResult<'tcx, (AllocId, Size, Pointer<M::PointerTag>)> {
+    ) -> InterpResult<'tcx, (AllocId, Size, M::TagExtra)> {
         self.ptr_try_get_alloc_id(ptr).map_err(|offset| {
             err_ub!(DanglingIntPointer(offset, CheckInAllocMsg::InboundsTest)).into()
         })
diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs
index 4a0aa41de73..71d29be97d5 100644
--- a/compiler/rustc_const_eval/src/interpret/validity.rs
+++ b/compiler/rustc_const_eval/src/interpret/validity.rs
@@ -432,7 +432,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
         if let Some(ref mut ref_tracking) = self.ref_tracking {
             // Proceed recursively even for ZST, no reason to skip them!
             // `!` is a ZST and we want to validate it.
-            if let Ok((alloc_id, _offset, _ptr)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
+            if let Ok((alloc_id, _offset, _tag)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
                 // Special handling for pointers to statics (irrespective of their type).
                 let alloc_kind = self.ecx.tcx.get_global_alloc(alloc_id);
                 if let Some(GlobalAlloc::Static(did)) = alloc_kind {
diff --git a/compiler/rustc_middle/src/mir/interpret/pointer.rs b/compiler/rustc_middle/src/mir/interpret/pointer.rs
index 813c0912f53..c71aea417ec 100644
--- a/compiler/rustc_middle/src/mir/interpret/pointer.rs
+++ b/compiler/rustc_middle/src/mir/interpret/pointer.rs
@@ -163,6 +163,9 @@ pub struct Pointer<Tag = AllocId> {
 }
 
 static_assert_size!(Pointer, 16);
+// `Option<Tag>` pointers are also passed around quite a bit
+// (but not stored in permanent machine state).
+static_assert_size!(Pointer<Option<AllocId>>, 16);
 
 // We want the `Debug` output to be readable as it is used by `derive(Debug)` for
 // all the Miri types.
@@ -198,12 +201,26 @@ impl<Tag> From<Pointer<Tag>> for Pointer<Option<Tag>> {
 }
 
 impl<Tag> Pointer<Option<Tag>> {
+    /// Convert this pointer that *might* have a tag into a pointer that *definitely* has a tag, or
+    /// an absolute address.
+    ///
+    /// This is rarely what you want; call `ptr_try_get_alloc_id` instead.
     pub fn into_pointer_or_addr(self) -> Result<Pointer<Tag>, Size> {
         match self.provenance {
             Some(tag) => Ok(Pointer::new(tag, self.offset)),
             None => Err(self.offset),
         }
     }
+
+    /// Returns the absolute address the pointer points to.
+    /// Only works if Tag::OFFSET_IS_ADDR is true!
+    pub fn addr(self) -> Size
+    where
+        Tag: Provenance,
+    {
+        assert!(Tag::OFFSET_IS_ADDR);
+        self.offset
+    }
 }
 
 impl<Tag> Pointer<Option<Tag>> {