about summary refs log tree commit diff
path: root/compiler/rustc_middle/src/mir
diff options
context:
space:
mode:
authorRalf Jung <post@ralfj.de>2022-08-27 14:54:02 -0400
committerRalf Jung <post@ralfj.de>2022-08-27 18:37:44 -0400
commit2e172473daefd24631faf3906bd411798d7d8a17 (patch)
tree4bbfce7ca26338ca0db6ac3821acafd58b479d5c /compiler/rustc_middle/src/mir
parente63a6257118effd270223ae38306013dfd477516 (diff)
downloadrust-2e172473daefd24631faf3906bd411798d7d8a17.tar.gz
rust-2e172473daefd24631faf3906bd411798d7d8a17.zip
interpret: make read-pointer-as-bytes *always* work in Miri
and show some extra information when it happens in CTFE
Diffstat (limited to 'compiler/rustc_middle/src/mir')
-rw-r--r--compiler/rustc_middle/src/mir/interpret/allocation.rs156
-rw-r--r--compiler/rustc_middle/src/mir/interpret/error.rs13
-rw-r--r--compiler/rustc_middle/src/mir/interpret/pointer.rs7
-rw-r--r--compiler/rustc_middle/src/mir/interpret/value.rs2
-rw-r--r--compiler/rustc_middle/src/mir/mod.rs2
5 files changed, 84 insertions, 96 deletions
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs
index cc39e434225..37ec04b07f8 100644
--- a/compiler/rustc_middle/src/mir/interpret/allocation.rs
+++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs
@@ -130,6 +130,8 @@ pub enum AllocError {
     ReadPointerAsBytes,
     /// Partially overwriting a pointer.
     PartialPointerOverwrite(Size),
+    /// Partially copying a pointer.
+    PartialPointerCopy(Size),
     /// Using uninitialized data where it is not allowed.
     InvalidUninitBytes(Option<UninitBytesAccess>),
 }
@@ -152,6 +154,9 @@ impl AllocError {
             PartialPointerOverwrite(offset) => InterpError::Unsupported(
                 UnsupportedOpInfo::PartialPointerOverwrite(Pointer::new(alloc_id, offset)),
             ),
+            PartialPointerCopy(offset) => InterpError::Unsupported(
+                UnsupportedOpInfo::PartialPointerCopy(Pointer::new(alloc_id, offset)),
+            ),
             InvalidUninitBytes(info) => InterpError::UndefinedBehavior(
                 UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))),
             ),
@@ -322,62 +327,35 @@ impl<Prov, Extra> Allocation<Prov, Extra> {
 /// Byte accessors.
 impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
     /// This is the entirely abstraction-violating way to just grab the raw bytes without
-    /// caring about provenance. It just deduplicates some code between `read_scalar`
-    /// and `get_bytes_internal`.
-    fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
-        &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
-    }
-
-    /// The last argument controls whether we error out when there are uninitialized or pointer
-    /// bytes. However, we *always* error when there is provenance overlapping the edges of the
-    /// range.
-    ///
-    /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
+    /// caring about provenance or initialization.
     ///
     /// This function also guarantees that the resulting pointer will remain stable
     /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
     /// on that.
-    ///
-    /// It is the caller's responsibility to check bounds and alignment beforehand.
-    fn get_bytes_internal(
-        &self,
-        cx: &impl HasDataLayout,
-        range: AllocRange,
-        check_init_and_ptr: bool,
-    ) -> AllocResult<&[u8]> {
-        if check_init_and_ptr {
-            self.check_init(range)?;
-            self.check_provenance(cx, range)?;
-        } else {
-            // We still don't want provenance on the *edges*.
-            self.check_provenance_edges(cx, range)?;
-        }
-
-        Ok(self.get_bytes_even_more_internal(range))
+    #[inline]
+    pub fn get_bytes_unchecked(&self, range: AllocRange) -> &[u8] {
+        &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
     }
 
-    /// Checks that these bytes are initialized and not pointer bytes, and then return them
-    /// as a slice.
+    /// Checks that these bytes are initialized, and then strip provenance (if possible) and return
+    /// them.
     ///
     /// It is the caller's responsibility to check bounds and alignment beforehand.
     /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
     /// on `InterpCx` instead.
     #[inline]
-    pub fn get_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult<&[u8]> {
-        self.get_bytes_internal(cx, range, true)
-    }
-
-    /// It is the caller's responsibility to handle uninitialized and pointer bytes.
-    /// However, this still checks that there is no provenance on the *edges*.
-    ///
-    /// It is the caller's responsibility to check bounds and alignment beforehand.
-    #[inline]
-    pub fn get_bytes_with_uninit_and_ptr(
+    pub fn get_bytes_strip_provenance(
         &self,
         cx: &impl HasDataLayout,
         range: AllocRange,
     ) -> AllocResult<&[u8]> {
-        self.get_bytes_internal(cx, range, false)
+        self.check_init(range)?;
+        if !Prov::OFFSET_IS_ADDR {
+            if self.range_has_provenance(cx, range) {
+                return Err(AllocError::ReadPointerAsBytes);
+            }
+        }
+        Ok(self.get_bytes_unchecked(range))
     }
 
     /// Just calling this already marks everything as defined and removes provenance,
@@ -415,13 +393,6 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
 
 /// Reading and writing.
 impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
-    /// Validates that this memory range is initiailized and contains no provenance.
-    pub fn check_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
-        // This implicitly does all the checking we are asking for.
-        self.get_bytes(cx, range)?;
-        Ok(())
-    }
-
     /// Reads a *non-ZST* scalar.
     ///
     /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
@@ -438,43 +409,53 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
         range: AllocRange,
         read_provenance: bool,
     ) -> AllocResult<Scalar<Prov>> {
-        if read_provenance {
-            assert_eq!(range.size, cx.data_layout().pointer_size);
-        }
-
         // First and foremost, if anything is uninit, bail.
         if self.is_init(range).is_err() {
             return Err(AllocError::InvalidUninitBytes(None));
         }
 
-        // If we are doing a pointer read, and there is provenance exactly where we
-        // are reading, then we can put data and provenance back together and return that.
-        if read_provenance && let Some(&prov) = self.provenance.get(&range.start) {
-            // We already checked init and provenance, so we can use this function.
-            let bytes = self.get_bytes_even_more_internal(range);
-            let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
-            let ptr = Pointer::new(prov, Size::from_bytes(bits));
-            return Ok(Scalar::from_pointer(ptr, cx));
-        }
+        // Get the integer part of the result. We HAVE TO check provenance before returning this!
+        let bytes = self.get_bytes_unchecked(range);
+        let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
 
-        // If we are *not* reading a pointer, and we can just ignore provenance,
-        // then do exactly that.
-        if !read_provenance && Prov::OFFSET_IS_ADDR {
-            // We just strip provenance.
-            let bytes = self.get_bytes_even_more_internal(range);
-            let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
-            return Ok(Scalar::from_uint(bits, range.size));
+        if read_provenance {
+            assert_eq!(range.size, cx.data_layout().pointer_size);
+
+            // When reading data with provenance, the easy case is finding provenance exactly where we
+            // are reading, then we can put data and provenance back together and return that.
+            if let Some(&prov) = self.provenance.get(&range.start) {
+                // Now we can return the bits, with their appropriate provenance.
+                let ptr = Pointer::new(prov, Size::from_bytes(bits));
+                return Ok(Scalar::from_pointer(ptr, cx));
+            }
+
+            // If we can work on pointers byte-wise, join the byte-wise provenances.
+            if Prov::OFFSET_IS_ADDR {
+                let mut prov = self.offset_get_provenance(cx, range.start);
+                for offset in 1..range.size.bytes() {
+                    let this_prov =
+                        self.offset_get_provenance(cx, range.start + Size::from_bytes(offset));
+                    prov = Prov::join(prov, this_prov);
+                }
+                // Now use this provenance.
+                let ptr = Pointer::new(prov, Size::from_bytes(bits));
+                return Ok(Scalar::from_maybe_pointer(ptr, cx));
+            }
+        } else {
+            // We are *not* reading a pointer.
+            // If we can just ignore provenance, do exactly that.
+            if Prov::OFFSET_IS_ADDR {
+                // We just strip provenance.
+                return Ok(Scalar::from_uint(bits, range.size));
+            }
         }
 
-        // It's complicated. Better make sure there is no provenance anywhere.
-        // FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
-        // `read_pointer` is true and we ideally would distinguish the following two cases:
-        // - The entire `range` is covered by the same provenance, stored in two separate entries of
-        //   the provenance map. Then we should return a pointer with that provenance.
-        // - The range has inhomogeneous provenance. Then we should return just the
-        //   underlying bits.
-        let bytes = self.get_bytes(cx, range)?;
-        let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
+        // Fallback path for when we cannot treat provenance bytewise or ignore it.
+        assert!(!Prov::OFFSET_IS_ADDR);
+        if self.range_has_provenance(cx, range) {
+            return Err(AllocError::ReadPointerAsBytes);
+        }
+        // There is no provenance, we can just return the bits.
         Ok(Scalar::from_uint(bits, range.size))
     }
 
@@ -534,6 +515,13 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> {
         self.provenance.range(Size::from_bytes(start)..range.end())
     }
 
+    /// Get the provenance of a single byte.
+    fn offset_get_provenance(&self, cx: &impl HasDataLayout, offset: Size) -> Option<Prov> {
+        let prov = self.range_get_provenance(cx, alloc_range(offset, Size::from_bytes(1)));
+        assert!(prov.len() <= 1);
+        prov.first().map(|(_offset, prov)| *prov)
+    }
+
     /// Returns whether this allocation has progrnance overlapping with the given range.
     ///
     /// Note: this function exists to allow `range_get_provenance` to be private, in order to somewhat
@@ -543,12 +531,6 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> {
         !self.range_get_provenance(cx, range).is_empty()
     }
 
-    /// Checks that there is no provenance overlapping with the given range.
-    #[inline(always)]
-    fn check_provenance(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
-        if self.range_has_provenance(cx, range) { Err(AllocError::ReadPointerAsBytes) } else { Ok(()) }
-    }
-
     /// Removes all provenance inside the given range.
     /// If there is provenance overlapping with the edges, it
     /// are removed as well *and* the bytes they cover are marked as
@@ -606,14 +588,6 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> {
 
         Ok(())
     }
-
-    /// Errors if there is provenance overlapping with the edges of the given memory range.
-    #[inline]
-    fn check_provenance_edges(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
-        self.check_provenance(cx, alloc_range(range.start, Size::ZERO))?;
-        self.check_provenance(cx, alloc_range(range.end(), Size::ZERO))?;
-        Ok(())
-    }
 }
 
 /// Stores the provenance information of pointers stored in memory.
diff --git a/compiler/rustc_middle/src/mir/interpret/error.rs b/compiler/rustc_middle/src/mir/interpret/error.rs
index cecb55578d3..e4039cc7c68 100644
--- a/compiler/rustc_middle/src/mir/interpret/error.rs
+++ b/compiler/rustc_middle/src/mir/interpret/error.rs
@@ -401,14 +401,18 @@ impl fmt::Display for UndefinedBehaviorInfo {
 pub enum UnsupportedOpInfo {
     /// Free-form case. Only for errors that are never caught!
     Unsupported(String),
-    /// Encountered a pointer where we needed raw bytes.
-    ReadPointerAsBytes,
     /// Overwriting parts of a pointer; the resulting state cannot be represented in our
     /// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>.
     PartialPointerOverwrite(Pointer<AllocId>),
+    /// Attempting to `copy` parts of a pointer to somewhere else; the resulting state cannot be
+    /// represented in our `Allocation` data structure. See
+    /// <https://github.com/rust-lang/miri/issues/2181>.
+    PartialPointerCopy(Pointer<AllocId>),
     //
     // The variants below are only reachable from CTFE/const prop, miri will never emit them.
     //
+    /// Encountered a pointer where we needed raw bytes.
+    ReadPointerAsBytes,
     /// Accessing thread local statics
     ThreadLocalStatic(DefId),
     /// Accessing an unsupported extern static.
@@ -420,10 +424,13 @@ impl fmt::Display for UnsupportedOpInfo {
         use UnsupportedOpInfo::*;
         match self {
             Unsupported(ref msg) => write!(f, "{msg}"),
-            ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"),
             PartialPointerOverwrite(ptr) => {
                 write!(f, "unable to overwrite parts of a pointer in memory at {ptr:?}")
             }
+            PartialPointerCopy(ptr) => {
+                write!(f, "unable to copy parts of a pointer from memory at {ptr:?}")
+            }
+            ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"),
             ThreadLocalStatic(did) => write!(f, "cannot access thread local static ({did:?})"),
             ReadExternStatic(did) => write!(f, "cannot read from extern static ({did:?})"),
         }
diff --git a/compiler/rustc_middle/src/mir/interpret/pointer.rs b/compiler/rustc_middle/src/mir/interpret/pointer.rs
index 384954cbbd5..5fa802236ed 100644
--- a/compiler/rustc_middle/src/mir/interpret/pointer.rs
+++ b/compiler/rustc_middle/src/mir/interpret/pointer.rs
@@ -125,6 +125,9 @@ pub trait Provenance: Copy + fmt::Debug {
     /// Otherwise this function is best-effort (but must agree with `Machine::ptr_get_alloc`).
     /// (Identifying the offset in that allocation, however, is harder -- use `Memory::ptr_get_alloc` for that.)
     fn get_alloc_id(self) -> Option<AllocId>;
+
+    /// Defines the 'join' of provenance: what happens when doing a pointer load and different bytes have different provenance.
+    fn join(left: Option<Self>, right: Option<Self>) -> Option<Self>;
 }
 
 impl Provenance for AllocId {
@@ -152,6 +155,10 @@ impl Provenance for AllocId {
     fn get_alloc_id(self) -> Option<AllocId> {
         Some(self)
     }
+
+    fn join(_left: Option<Self>, _right: Option<Self>) -> Option<Self> {
+        panic!("merging provenance is not supported when `OFFSET_IS_ADDR` is false")
+    }
 }
 
 /// Represents a pointer in the Miri engine.
diff --git a/compiler/rustc_middle/src/mir/interpret/value.rs b/compiler/rustc_middle/src/mir/interpret/value.rs
index 1ba16025e32..d4fad7f1ecd 100644
--- a/compiler/rustc_middle/src/mir/interpret/value.rs
+++ b/compiler/rustc_middle/src/mir/interpret/value.rs
@@ -507,7 +507,7 @@ pub fn get_slice_bytes<'tcx>(cx: &impl HasDataLayout, val: ConstValue<'tcx>) ->
     if let ConstValue::Slice { data, start, end } = val {
         let len = end - start;
         data.inner()
-            .get_bytes(
+            .get_bytes_strip_provenance(
                 cx,
                 AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(len) },
             )
diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs
index 4e886ff1592..75327cff368 100644
--- a/compiler/rustc_middle/src/mir/mod.rs
+++ b/compiler/rustc_middle/src/mir/mod.rs
@@ -2719,7 +2719,7 @@ fn pretty_print_const_value<'tcx>(
                 let n = n.kind().try_to_bits(tcx.data_layout.pointer_size).unwrap();
                 // cast is ok because we already checked for pointer size (32 or 64 bit) above
                 let range = AllocRange { start: offset, size: Size::from_bytes(n) };
-                let byte_str = alloc.inner().get_bytes(&tcx, range).unwrap();
+                let byte_str = alloc.inner().get_bytes_strip_provenance(&tcx, range).unwrap();
                 fmt.write_str("*")?;
                 pretty_print_byte_str(fmt, byte_str)?;
                 return Ok(());