diff options
| author | Ralf Jung <post@ralfj.de> | 2022-08-27 14:54:02 -0400 |
|---|---|---|
| committer | Ralf Jung <post@ralfj.de> | 2022-08-27 18:37:44 -0400 |
| commit | 2e172473daefd24631faf3906bd411798d7d8a17 (patch) | |
| tree | 4bbfce7ca26338ca0db6ac3821acafd58b479d5c /compiler/rustc_middle/src/mir | |
| parent | e63a6257118effd270223ae38306013dfd477516 (diff) | |
| download | rust-2e172473daefd24631faf3906bd411798d7d8a17.tar.gz rust-2e172473daefd24631faf3906bd411798d7d8a17.zip | |
interpret: make read-pointer-as-bytes *always* work in Miri
and show some extra information when it happens in CTFE
Diffstat (limited to 'compiler/rustc_middle/src/mir')
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/allocation.rs | 156 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/error.rs | 13 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/pointer.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/value.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/mod.rs | 2 |
5 files changed, 84 insertions, 96 deletions
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index cc39e434225..37ec04b07f8 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -130,6 +130,8 @@ pub enum AllocError { ReadPointerAsBytes, /// Partially overwriting a pointer. PartialPointerOverwrite(Size), + /// Partially copying a pointer. + PartialPointerCopy(Size), /// Using uninitialized data where it is not allowed. InvalidUninitBytes(Option<UninitBytesAccess>), } @@ -152,6 +154,9 @@ impl AllocError { PartialPointerOverwrite(offset) => InterpError::Unsupported( UnsupportedOpInfo::PartialPointerOverwrite(Pointer::new(alloc_id, offset)), ), + PartialPointerCopy(offset) => InterpError::Unsupported( + UnsupportedOpInfo::PartialPointerCopy(Pointer::new(alloc_id, offset)), + ), InvalidUninitBytes(info) => InterpError::UndefinedBehavior( UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))), ), @@ -322,62 +327,35 @@ impl<Prov, Extra> Allocation<Prov, Extra> { /// Byte accessors. impl<Prov: Provenance, Extra> Allocation<Prov, Extra> { /// This is the entirely abstraction-violating way to just grab the raw bytes without - /// caring about provenance. It just deduplicates some code between `read_scalar` - /// and `get_bytes_internal`. - fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] { - &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()] - } - - /// The last argument controls whether we error out when there are uninitialized or pointer - /// bytes. However, we *always* error when there is provenance overlapping the edges of the - /// range. - /// - /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead, + /// caring about provenance or initialization. /// /// This function also guarantees that the resulting pointer will remain stable /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies /// on that. - /// - /// It is the caller's responsibility to check bounds and alignment beforehand. - fn get_bytes_internal( - &self, - cx: &impl HasDataLayout, - range: AllocRange, - check_init_and_ptr: bool, - ) -> AllocResult<&[u8]> { - if check_init_and_ptr { - self.check_init(range)?; - self.check_provenance(cx, range)?; - } else { - // We still don't want provenance on the *edges*. - self.check_provenance_edges(cx, range)?; - } - - Ok(self.get_bytes_even_more_internal(range)) + #[inline] + pub fn get_bytes_unchecked(&self, range: AllocRange) -> &[u8] { + &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()] } - /// Checks that these bytes are initialized and not pointer bytes, and then return them - /// as a slice. + /// Checks that these bytes are initialized, and then strip provenance (if possible) and return + /// them. /// /// It is the caller's responsibility to check bounds and alignment beforehand. /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods /// on `InterpCx` instead. #[inline] - pub fn get_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult<&[u8]> { - self.get_bytes_internal(cx, range, true) - } - - /// It is the caller's responsibility to handle uninitialized and pointer bytes. - /// However, this still checks that there is no provenance on the *edges*. - /// - /// It is the caller's responsibility to check bounds and alignment beforehand. - #[inline] - pub fn get_bytes_with_uninit_and_ptr( + pub fn get_bytes_strip_provenance( &self, cx: &impl HasDataLayout, range: AllocRange, ) -> AllocResult<&[u8]> { - self.get_bytes_internal(cx, range, false) + self.check_init(range)?; + if !Prov::OFFSET_IS_ADDR { + if self.range_has_provenance(cx, range) { + return Err(AllocError::ReadPointerAsBytes); + } + } + Ok(self.get_bytes_unchecked(range)) } /// Just calling this already marks everything as defined and removes provenance, @@ -415,13 +393,6 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> { /// Reading and writing. impl<Prov: Provenance, Extra> Allocation<Prov, Extra> { - /// Validates that this memory range is initiailized and contains no provenance. - pub fn check_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { - // This implicitly does all the checking we are asking for. - self.get_bytes(cx, range)?; - Ok(()) - } - /// Reads a *non-ZST* scalar. /// /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine @@ -438,43 +409,53 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> { range: AllocRange, read_provenance: bool, ) -> AllocResult<Scalar<Prov>> { - if read_provenance { - assert_eq!(range.size, cx.data_layout().pointer_size); - } - // First and foremost, if anything is uninit, bail. if self.is_init(range).is_err() { return Err(AllocError::InvalidUninitBytes(None)); } - // If we are doing a pointer read, and there is provenance exactly where we - // are reading, then we can put data and provenance back together and return that. - if read_provenance && let Some(&prov) = self.provenance.get(&range.start) { - // We already checked init and provenance, so we can use this function. - let bytes = self.get_bytes_even_more_internal(range); - let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); - let ptr = Pointer::new(prov, Size::from_bytes(bits)); - return Ok(Scalar::from_pointer(ptr, cx)); - } + // Get the integer part of the result. We HAVE TO check provenance before returning this! + let bytes = self.get_bytes_unchecked(range); + let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); - // If we are *not* reading a pointer, and we can just ignore provenance, - // then do exactly that. - if !read_provenance && Prov::OFFSET_IS_ADDR { - // We just strip provenance. - let bytes = self.get_bytes_even_more_internal(range); - let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); - return Ok(Scalar::from_uint(bits, range.size)); + if read_provenance { + assert_eq!(range.size, cx.data_layout().pointer_size); + + // When reading data with provenance, the easy case is finding provenance exactly where we + // are reading, then we can put data and provenance back together and return that. + if let Some(&prov) = self.provenance.get(&range.start) { + // Now we can return the bits, with their appropriate provenance. + let ptr = Pointer::new(prov, Size::from_bytes(bits)); + return Ok(Scalar::from_pointer(ptr, cx)); + } + + // If we can work on pointers byte-wise, join the byte-wise provenances. + if Prov::OFFSET_IS_ADDR { + let mut prov = self.offset_get_provenance(cx, range.start); + for offset in 1..range.size.bytes() { + let this_prov = + self.offset_get_provenance(cx, range.start + Size::from_bytes(offset)); + prov = Prov::join(prov, this_prov); + } + // Now use this provenance. + let ptr = Pointer::new(prov, Size::from_bytes(bits)); + return Ok(Scalar::from_maybe_pointer(ptr, cx)); + } + } else { + // We are *not* reading a pointer. + // If we can just ignore provenance, do exactly that. + if Prov::OFFSET_IS_ADDR { + // We just strip provenance. + return Ok(Scalar::from_uint(bits, range.size)); + } } - // It's complicated. Better make sure there is no provenance anywhere. - // FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then - // `read_pointer` is true and we ideally would distinguish the following two cases: - // - The entire `range` is covered by the same provenance, stored in two separate entries of - // the provenance map. Then we should return a pointer with that provenance. - // - The range has inhomogeneous provenance. Then we should return just the - // underlying bits. - let bytes = self.get_bytes(cx, range)?; - let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); + // Fallback path for when we cannot treat provenance bytewise or ignore it. + assert!(!Prov::OFFSET_IS_ADDR); + if self.range_has_provenance(cx, range) { + return Err(AllocError::ReadPointerAsBytes); + } + // There is no provenance, we can just return the bits. Ok(Scalar::from_uint(bits, range.size)) } @@ -534,6 +515,13 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> { self.provenance.range(Size::from_bytes(start)..range.end()) } + /// Get the provenance of a single byte. + fn offset_get_provenance(&self, cx: &impl HasDataLayout, offset: Size) -> Option<Prov> { + let prov = self.range_get_provenance(cx, alloc_range(offset, Size::from_bytes(1))); + assert!(prov.len() <= 1); + prov.first().map(|(_offset, prov)| *prov) + } + /// Returns whether this allocation has progrnance overlapping with the given range. /// /// Note: this function exists to allow `range_get_provenance` to be private, in order to somewhat @@ -543,12 +531,6 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> { !self.range_get_provenance(cx, range).is_empty() } - /// Checks that there is no provenance overlapping with the given range. - #[inline(always)] - fn check_provenance(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { - if self.range_has_provenance(cx, range) { Err(AllocError::ReadPointerAsBytes) } else { Ok(()) } - } - /// Removes all provenance inside the given range. /// If there is provenance overlapping with the edges, it /// are removed as well *and* the bytes they cover are marked as @@ -606,14 +588,6 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> { Ok(()) } - - /// Errors if there is provenance overlapping with the edges of the given memory range. - #[inline] - fn check_provenance_edges(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { - self.check_provenance(cx, alloc_range(range.start, Size::ZERO))?; - self.check_provenance(cx, alloc_range(range.end(), Size::ZERO))?; - Ok(()) - } } /// Stores the provenance information of pointers stored in memory. diff --git a/compiler/rustc_middle/src/mir/interpret/error.rs b/compiler/rustc_middle/src/mir/interpret/error.rs index cecb55578d3..e4039cc7c68 100644 --- a/compiler/rustc_middle/src/mir/interpret/error.rs +++ b/compiler/rustc_middle/src/mir/interpret/error.rs @@ -401,14 +401,18 @@ impl fmt::Display for UndefinedBehaviorInfo { pub enum UnsupportedOpInfo { /// Free-form case. Only for errors that are never caught! Unsupported(String), - /// Encountered a pointer where we needed raw bytes. - ReadPointerAsBytes, /// Overwriting parts of a pointer; the resulting state cannot be represented in our /// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>. PartialPointerOverwrite(Pointer<AllocId>), + /// Attempting to `copy` parts of a pointer to somewhere else; the resulting state cannot be + /// represented in our `Allocation` data structure. See + /// <https://github.com/rust-lang/miri/issues/2181>. + PartialPointerCopy(Pointer<AllocId>), // // The variants below are only reachable from CTFE/const prop, miri will never emit them. // + /// Encountered a pointer where we needed raw bytes. + ReadPointerAsBytes, /// Accessing thread local statics ThreadLocalStatic(DefId), /// Accessing an unsupported extern static. @@ -420,10 +424,13 @@ impl fmt::Display for UnsupportedOpInfo { use UnsupportedOpInfo::*; match self { Unsupported(ref msg) => write!(f, "{msg}"), - ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"), PartialPointerOverwrite(ptr) => { write!(f, "unable to overwrite parts of a pointer in memory at {ptr:?}") } + PartialPointerCopy(ptr) => { + write!(f, "unable to copy parts of a pointer from memory at {ptr:?}") + } + ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"), ThreadLocalStatic(did) => write!(f, "cannot access thread local static ({did:?})"), ReadExternStatic(did) => write!(f, "cannot read from extern static ({did:?})"), } diff --git a/compiler/rustc_middle/src/mir/interpret/pointer.rs b/compiler/rustc_middle/src/mir/interpret/pointer.rs index 384954cbbd5..5fa802236ed 100644 --- a/compiler/rustc_middle/src/mir/interpret/pointer.rs +++ b/compiler/rustc_middle/src/mir/interpret/pointer.rs @@ -125,6 +125,9 @@ pub trait Provenance: Copy + fmt::Debug { /// Otherwise this function is best-effort (but must agree with `Machine::ptr_get_alloc`). /// (Identifying the offset in that allocation, however, is harder -- use `Memory::ptr_get_alloc` for that.) fn get_alloc_id(self) -> Option<AllocId>; + + /// Defines the 'join' of provenance: what happens when doing a pointer load and different bytes have different provenance. + fn join(left: Option<Self>, right: Option<Self>) -> Option<Self>; } impl Provenance for AllocId { @@ -152,6 +155,10 @@ impl Provenance for AllocId { fn get_alloc_id(self) -> Option<AllocId> { Some(self) } + + fn join(_left: Option<Self>, _right: Option<Self>) -> Option<Self> { + panic!("merging provenance is not supported when `OFFSET_IS_ADDR` is false") + } } /// Represents a pointer in the Miri engine. diff --git a/compiler/rustc_middle/src/mir/interpret/value.rs b/compiler/rustc_middle/src/mir/interpret/value.rs index 1ba16025e32..d4fad7f1ecd 100644 --- a/compiler/rustc_middle/src/mir/interpret/value.rs +++ b/compiler/rustc_middle/src/mir/interpret/value.rs @@ -507,7 +507,7 @@ pub fn get_slice_bytes<'tcx>(cx: &impl HasDataLayout, val: ConstValue<'tcx>) -> if let ConstValue::Slice { data, start, end } = val { let len = end - start; data.inner() - .get_bytes( + .get_bytes_strip_provenance( cx, AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(len) }, ) diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 4e886ff1592..75327cff368 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -2719,7 +2719,7 @@ fn pretty_print_const_value<'tcx>( let n = n.kind().try_to_bits(tcx.data_layout.pointer_size).unwrap(); // cast is ok because we already checked for pointer size (32 or 64 bit) above let range = AllocRange { start: offset, size: Size::from_bytes(n) }; - let byte_str = alloc.inner().get_bytes(&tcx, range).unwrap(); + let byte_str = alloc.inner().get_bytes_strip_provenance(&tcx, range).unwrap(); fmt.write_str("*")?; pretty_print_byte_str(fmt, byte_str)?; return Ok(()); |
