diff options
| author | Ralf Jung <post@ralfj.de> | 2022-08-27 14:54:02 -0400 |
|---|---|---|
| committer | Ralf Jung <post@ralfj.de> | 2022-08-27 18:37:44 -0400 |
| commit | 2e172473daefd24631faf3906bd411798d7d8a17 (patch) | |
| tree | 4bbfce7ca26338ca0db6ac3821acafd58b479d5c | |
| parent | e63a6257118effd270223ae38306013dfd477516 (diff) | |
| download | rust-2e172473daefd24631faf3906bd411798d7d8a17.tar.gz rust-2e172473daefd24631faf3906bd411798d7d8a17.zip | |
interpret: make read-pointer-as-bytes *always* work in Miri
and show some extra information when it happens in CTFE
| -rw-r--r-- | compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs | 5 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/const_eval/error.rs | 13 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/const_eval/eval_queries.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/interpret/intrinsics.rs | 21 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/interpret/memory.rs | 24 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/interpret/operand.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/interpret/place.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/interpret/projection.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/interpret/validity.rs | 35 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/allocation.rs | 156 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/error.rs | 13 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/pointer.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/value.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/mod.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/ty/print/pretty.rs | 2 |
15 files changed, 153 insertions, 145 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index a32b413d45f..1f358b1bbb9 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -186,7 +186,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( let size = Size::from_bytes( 4 * ret_lane_count, /* size_of([u32; ret_lane_count]) */ ); - alloc.inner().get_bytes(fx, alloc_range(offset, size)).unwrap() + alloc + .inner() + .get_bytes_strip_provenance(fx, alloc_range(offset, size)) + .unwrap() } _ => unreachable!("{:?}", idx_const), }; diff --git a/compiler/rustc_const_eval/src/const_eval/error.rs b/compiler/rustc_const_eval/src/const_eval/error.rs index eb81f43c3fe..63a3fef2e62 100644 --- a/compiler/rustc_const_eval/src/const_eval/error.rs +++ b/compiler/rustc_const_eval/src/const_eval/error.rs @@ -10,6 +10,7 @@ use rustc_span::{Span, Symbol}; use super::InterpCx; use crate::interpret::{ struct_error, ErrorHandled, FrameInfo, InterpError, InterpErrorInfo, Machine, MachineStopType, + UnsupportedOpInfo, }; /// The CTFE machine has some custom error kinds. @@ -153,6 +154,18 @@ impl<'tcx> ConstEvalErr<'tcx> { if let Some(span_msg) = span_msg { err.span_label(self.span, span_msg); } + // Add some more context for select error types. + match self.error { + InterpError::Unsupported( + UnsupportedOpInfo::ReadPointerAsBytes + | UnsupportedOpInfo::PartialPointerOverwrite(_) + | UnsupportedOpInfo::PartialPointerCopy(_), + ) => { + err.help("this code performed an operation that depends on the underlying bytes representing a pointer"); + err.help("the absolute address of a pointer is not known at compile-time, so such operations are not supported"); + } + _ => {} + } // Add spans for the stacktrace. Don't print a single-line backtrace though. if self.stacktrace.len() > 1 { // Helper closure to print duplicated lines. diff --git a/compiler/rustc_const_eval/src/const_eval/eval_queries.rs b/compiler/rustc_const_eval/src/const_eval/eval_queries.rs index e13ad1c95bd..6a737d120e2 100644 --- a/compiler/rustc_const_eval/src/const_eval/eval_queries.rs +++ b/compiler/rustc_const_eval/src/const_eval/eval_queries.rs @@ -2,8 +2,8 @@ use super::{CompileTimeEvalContext, CompileTimeInterpreter, ConstEvalErr}; use crate::interpret::eval_nullary_intrinsic; use crate::interpret::{ intern_const_alloc_recursive, Allocation, ConstAlloc, ConstValue, CtfeValidationMode, GlobalId, - Immediate, InternKind, InterpCx, InterpResult, MPlaceTy, MemoryKind, OpTy, RefTracking, - StackPopCleanup, + Immediate, InternKind, InterpCx, InterpError, InterpResult, MPlaceTy, MemoryKind, OpTy, + RefTracking, StackPopCleanup, }; use rustc_hir::def::DefKind; @@ -385,7 +385,9 @@ pub fn eval_to_allocation_raw_provider<'tcx>( ecx.tcx, "it is undefined behavior to use this value", |diag| { - diag.note(NOTE_ON_UNDEFINED_BEHAVIOR_ERROR); + if matches!(err.error, InterpError::UndefinedBehavior(_)) { + diag.note(NOTE_ON_UNDEFINED_BEHAVIOR_ERROR); + } diag.note(&format!( "the raw bytes of the constant ({}", display_allocation( diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs index 6f3bd3bf4c5..a8ec8447f64 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs @@ -687,10 +687,23 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?; assert!(!layout.is_unsized()); - let lhs = self.read_pointer(lhs)?; - let rhs = self.read_pointer(rhs)?; - let lhs_bytes = self.read_bytes_ptr(lhs, layout.size)?; - let rhs_bytes = self.read_bytes_ptr(rhs, layout.size)?; + let get_bytes = |this: &InterpCx<'mir, 'tcx, M>, + op: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>, + size| + -> InterpResult<'tcx, &[u8]> { + let ptr = this.read_pointer(op)?; + let Some(alloc_ref) = self.get_ptr_alloc(ptr, size, Align::ONE)? else { + // zero-sized access + return Ok(&[]); + }; + if alloc_ref.has_provenance() { + throw_ub_format!("`raw_eq` on bytes with provenance"); + } + alloc_ref.get_bytes_strip_provenance() + }; + + let lhs_bytes = get_bytes(self, lhs, layout.size)?; + let rhs_bytes = get_bytes(self, rhs, layout.size)?; Ok(Scalar::from_bool(lhs_bytes == rhs_bytes)) } } diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index f84c6017dbf..69dbc9592fa 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -953,10 +953,10 @@ impl<'tcx, 'a, Prov: Provenance, Extra> AllocRef<'a, 'tcx, Prov, Extra> { } /// `range` is relative to this allocation reference, not the base of the allocation. - pub fn check_bytes(&self, range: AllocRange) -> InterpResult<'tcx> { + pub fn get_bytes_strip_provenance<'b>(&'b self) -> InterpResult<'tcx, &'a [u8]> { Ok(self .alloc - .check_bytes(&self.tcx, self.range.subrange(range)) + .get_bytes_strip_provenance(&self.tcx, self.range) .map_err(|e| e.to_interp_error(self.alloc_id))?) } @@ -967,10 +967,11 @@ impl<'tcx, 'a, Prov: Provenance, Extra> AllocRef<'a, 'tcx, Prov, Extra> { } impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { - /// Reads the given number of bytes from memory. Returns them as a slice. + /// Reads the given number of bytes from memory, and strips their provenance if possible. + /// Returns them as a slice. /// /// Performs appropriate bounds checks. - pub fn read_bytes_ptr( + pub fn read_bytes_ptr_strip_provenance( &self, ptr: Pointer<Option<M::Provenance>>, size: Size, @@ -983,7 +984,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { // (We are staying inside the bounds here so all is good.) Ok(alloc_ref .alloc - .get_bytes(&alloc_ref.tcx, alloc_ref.range) + .get_bytes_strip_provenance(&alloc_ref.tcx, alloc_ref.range) .map_err(|e| e.to_interp_error(alloc_ref.alloc_id))?) } @@ -1078,12 +1079,15 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { return Ok(()); }; - // This checks provenance edges on the src, which needs to happen before + // Checks provenance edges on the src, which needs to happen before // `prepare_provenance_copy`. - let src_bytes = src_alloc - .get_bytes_with_uninit_and_ptr(&tcx, src_range) - .map_err(|e| e.to_interp_error(src_alloc_id))? - .as_ptr(); // raw ptr, so we can also get a ptr to the destination allocation + if src_alloc.range_has_provenance(&tcx, alloc_range(src_range.start, Size::ZERO)) { + throw_unsup!(PartialPointerCopy(Pointer::new(src_alloc_id, src_range.start))); + } + if src_alloc.range_has_provenance(&tcx, alloc_range(src_range.end(), Size::ZERO)) { + throw_unsup!(PartialPointerCopy(Pointer::new(src_alloc_id, src_range.end()))); + } + let src_bytes = src_alloc.get_bytes_unchecked(src_range).as_ptr(); // raw ptr, so we can also get a ptr to the destination allocation // first copy the provenance to a temporary buffer, because // `get_bytes_mut` will clear the provenance, which is correct, // since we don't want to keep any provenance at the target. diff --git a/compiler/rustc_const_eval/src/interpret/operand.rs b/compiler/rustc_const_eval/src/interpret/operand.rs index e80a82acd58..6ec9b174f82 100644 --- a/compiler/rustc_const_eval/src/interpret/operand.rs +++ b/compiler/rustc_const_eval/src/interpret/operand.rs @@ -415,7 +415,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { /// Turn the wide MPlace into a string (must already be dereferenced!) pub fn read_str(&self, mplace: &MPlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx, &str> { let len = mplace.len(self)?; - let bytes = self.read_bytes_ptr(mplace.ptr, Size::from_bytes(len))?; + let bytes = self.read_bytes_ptr_strip_provenance(mplace.ptr, Size::from_bytes(len))?; let str = std::str::from_utf8(bytes).map_err(|err| err_ub!(InvalidStr(err)))?; Ok(str) } diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs index 7aa76fe1dae..d7841ba6670 100644 --- a/compiler/rustc_const_eval/src/interpret/place.rs +++ b/compiler/rustc_const_eval/src/interpret/place.rs @@ -2,8 +2,6 @@ //! into a place. //! All high-level functions to write to memory work on places as destinations. -use std::hash::Hash; - use rustc_ast::Mutability; use rustc_middle::mir; use rustc_middle::ty; @@ -290,7 +288,7 @@ impl<'tcx, Prov: Provenance> PlaceTy<'tcx, Prov> { // FIXME: Working around https://github.com/rust-lang/rust/issues/54385 impl<'mir, 'tcx: 'mir, Prov, M> InterpCx<'mir, 'tcx, M> where - Prov: Provenance + Eq + Hash + 'static, + Prov: Provenance + 'static, M: Machine<'mir, 'tcx, Provenance = Prov>, { /// Take a value, which represents a (thin or wide) reference, and make it a place. diff --git a/compiler/rustc_const_eval/src/interpret/projection.rs b/compiler/rustc_const_eval/src/interpret/projection.rs index 742339f2b0a..a800553f34e 100644 --- a/compiler/rustc_const_eval/src/interpret/projection.rs +++ b/compiler/rustc_const_eval/src/interpret/projection.rs @@ -7,8 +7,6 @@ //! but we still need to do bounds checking and adjust the layout. To not duplicate that with MPlaceTy, we actually //! implement the logic on OpTy, and MPlaceTy calls that. -use std::hash::Hash; - use rustc_middle::mir; use rustc_middle::ty; use rustc_middle::ty::layout::LayoutOf; @@ -22,7 +20,7 @@ use super::{ // FIXME: Working around https://github.com/rust-lang/rust/issues/54385 impl<'mir, 'tcx: 'mir, Prov, M> InterpCx<'mir, 'tcx, M> where - Prov: Provenance + Eq + Hash + 'static, + Prov: Provenance + 'static, M: Machine<'mir, 'tcx, Provenance = Prov>, { //# Field access diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs index e1555f68737..ee55b4a0750 100644 --- a/compiler/rustc_const_eval/src/interpret/validity.rs +++ b/compiler/rustc_const_eval/src/interpret/validity.rs @@ -20,8 +20,8 @@ use rustc_target::abi::{Abi, Scalar as ScalarAbi, Size, VariantIdx, Variants, Wr use std::hash::Hash; use super::{ - alloc_range, CheckInAllocMsg, GlobalAlloc, ImmTy, Immediate, InterpCx, InterpResult, MPlaceTy, - Machine, MemPlaceMeta, OpTy, Scalar, ValueVisitor, + CheckInAllocMsg, GlobalAlloc, ImmTy, Immediate, InterpCx, InterpResult, MPlaceTy, Machine, + MemPlaceMeta, OpTy, Scalar, ValueVisitor, }; macro_rules! throw_validation_failure { @@ -312,7 +312,6 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, ' Ok(try_validation!( self.ecx.read_immediate(op), self.path, - err_unsup!(ReadPointerAsBytes) => { "(potentially part of) a pointer" } expected { "{expected}" }, err_ub!(InvalidUninitBytes(None)) => { "uninitialized memory" } expected { "{expected}" } )) } @@ -345,11 +344,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, ' // FIXME: check if the type/trait match what ty::Dynamic says? } ty::Slice(..) | ty::Str => { - let _len = try_validation!( - meta.unwrap_meta().to_machine_usize(self.ecx), - self.path, - err_unsup!(ReadPointerAsBytes) => { "non-integer slice length in wide pointer" }, - ); + let _len = meta.unwrap_meta().to_machine_usize(self.ecx)?; // We do not check that `len * elem_size <= isize::MAX`: // that is only required for references, and there it falls out of the // "dereferenceable" check performed by Stacked Borrows. @@ -669,8 +664,6 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M> { "{:x}", val } expected { "a valid enum tag" }, err_ub!(InvalidUninitBytes(None)) => { "uninitialized bytes" } expected { "a valid enum tag" }, - err_unsup!(ReadPointerAsBytes) => - { "a pointer" } expected { "a valid enum tag" }, ) .1) }) @@ -810,10 +803,9 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M> let mplace = op.assert_mem_place(); // strings are unsized and hence never immediate let len = mplace.len(self.ecx)?; try_validation!( - self.ecx.read_bytes_ptr(mplace.ptr, Size::from_bytes(len)), + self.ecx.read_bytes_ptr_strip_provenance(mplace.ptr, Size::from_bytes(len)), self.path, err_ub!(InvalidUninitBytes(..)) => { "uninitialized data in `str`" }, - err_unsup!(ReadPointerAsBytes) => { "a pointer in `str`" }, ); } ty::Array(tys, ..) | ty::Slice(tys) @@ -861,9 +853,9 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M> // We also accept uninit, for consistency with the slow path. let alloc = self.ecx.get_ptr_alloc(mplace.ptr, size, mplace.align)?.expect("we already excluded size 0"); - match alloc.check_bytes(alloc_range(Size::ZERO, size)) { + match alloc.get_bytes_strip_provenance() { // In the happy case, we needn't check anything else. - Ok(()) => {} + Ok(_) => {} // Some error happened, try to provide a more detailed description. Err(err) => { // For some errors we might be able to provide extra information. @@ -881,9 +873,6 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M> throw_validation_failure!(self.path, { "uninitialized bytes" }) } - err_unsup!(ReadPointerAsBytes) => { - throw_validation_failure!(self.path, { "a pointer" } expected { "plain (non-pointer) bytes" }) - } // Propagate upwards (that will also check for unexpected errors). _ => return Err(err), @@ -924,14 +913,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { Ok(()) => Ok(()), // Pass through validation failures. Err(err) if matches!(err.kind(), err_ub!(ValidationFailure { .. })) => Err(err), - // Also pass through InvalidProgram, those just indicate that we could not - // validate and each caller will know best what to do with them. - Err(err) if matches!(err.kind(), InterpError::InvalidProgram(_)) => Err(err), - // Avoid other errors as those do not show *where* in the value the issue lies. - Err(err) => { + // Complain about any other kind of UB error -- those are bad because we'd like to + // report them in a way that shows *where* in the value the issue lies. + Err(err) if matches!(err.kind(), InterpError::UndefinedBehavior(_)) => { err.print_backtrace(); - bug!("Unexpected error during validation: {}", err); + bug!("Unexpected Undefined Behavior error during validation: {}", err); } + // Pass through everything else. + Err(err) => Err(err), } } diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index cc39e434225..37ec04b07f8 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -130,6 +130,8 @@ pub enum AllocError { ReadPointerAsBytes, /// Partially overwriting a pointer. PartialPointerOverwrite(Size), + /// Partially copying a pointer. + PartialPointerCopy(Size), /// Using uninitialized data where it is not allowed. InvalidUninitBytes(Option<UninitBytesAccess>), } @@ -152,6 +154,9 @@ impl AllocError { PartialPointerOverwrite(offset) => InterpError::Unsupported( UnsupportedOpInfo::PartialPointerOverwrite(Pointer::new(alloc_id, offset)), ), + PartialPointerCopy(offset) => InterpError::Unsupported( + UnsupportedOpInfo::PartialPointerCopy(Pointer::new(alloc_id, offset)), + ), InvalidUninitBytes(info) => InterpError::UndefinedBehavior( UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))), ), @@ -322,62 +327,35 @@ impl<Prov, Extra> Allocation<Prov, Extra> { /// Byte accessors. impl<Prov: Provenance, Extra> Allocation<Prov, Extra> { /// This is the entirely abstraction-violating way to just grab the raw bytes without - /// caring about provenance. It just deduplicates some code between `read_scalar` - /// and `get_bytes_internal`. - fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] { - &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()] - } - - /// The last argument controls whether we error out when there are uninitialized or pointer - /// bytes. However, we *always* error when there is provenance overlapping the edges of the - /// range. - /// - /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead, + /// caring about provenance or initialization. /// /// This function also guarantees that the resulting pointer will remain stable /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies /// on that. - /// - /// It is the caller's responsibility to check bounds and alignment beforehand. - fn get_bytes_internal( - &self, - cx: &impl HasDataLayout, - range: AllocRange, - check_init_and_ptr: bool, - ) -> AllocResult<&[u8]> { - if check_init_and_ptr { - self.check_init(range)?; - self.check_provenance(cx, range)?; - } else { - // We still don't want provenance on the *edges*. - self.check_provenance_edges(cx, range)?; - } - - Ok(self.get_bytes_even_more_internal(range)) + #[inline] + pub fn get_bytes_unchecked(&self, range: AllocRange) -> &[u8] { + &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()] } - /// Checks that these bytes are initialized and not pointer bytes, and then return them - /// as a slice. + /// Checks that these bytes are initialized, and then strip provenance (if possible) and return + /// them. /// /// It is the caller's responsibility to check bounds and alignment beforehand. /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods /// on `InterpCx` instead. #[inline] - pub fn get_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult<&[u8]> { - self.get_bytes_internal(cx, range, true) - } - - /// It is the caller's responsibility to handle uninitialized and pointer bytes. - /// However, this still checks that there is no provenance on the *edges*. - /// - /// It is the caller's responsibility to check bounds and alignment beforehand. - #[inline] - pub fn get_bytes_with_uninit_and_ptr( + pub fn get_bytes_strip_provenance( &self, cx: &impl HasDataLayout, range: AllocRange, ) -> AllocResult<&[u8]> { - self.get_bytes_internal(cx, range, false) + self.check_init(range)?; + if !Prov::OFFSET_IS_ADDR { + if self.range_has_provenance(cx, range) { + return Err(AllocError::ReadPointerAsBytes); + } + } + Ok(self.get_bytes_unchecked(range)) } /// Just calling this already marks everything as defined and removes provenance, @@ -415,13 +393,6 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> { /// Reading and writing. impl<Prov: Provenance, Extra> Allocation<Prov, Extra> { - /// Validates that this memory range is initiailized and contains no provenance. - pub fn check_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { - // This implicitly does all the checking we are asking for. - self.get_bytes(cx, range)?; - Ok(()) - } - /// Reads a *non-ZST* scalar. /// /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine @@ -438,43 +409,53 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> { range: AllocRange, read_provenance: bool, ) -> AllocResult<Scalar<Prov>> { - if read_provenance { - assert_eq!(range.size, cx.data_layout().pointer_size); - } - // First and foremost, if anything is uninit, bail. if self.is_init(range).is_err() { return Err(AllocError::InvalidUninitBytes(None)); } - // If we are doing a pointer read, and there is provenance exactly where we - // are reading, then we can put data and provenance back together and return that. - if read_provenance && let Some(&prov) = self.provenance.get(&range.start) { - // We already checked init and provenance, so we can use this function. - let bytes = self.get_bytes_even_more_internal(range); - let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); - let ptr = Pointer::new(prov, Size::from_bytes(bits)); - return Ok(Scalar::from_pointer(ptr, cx)); - } + // Get the integer part of the result. We HAVE TO check provenance before returning this! + let bytes = self.get_bytes_unchecked(range); + let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); - // If we are *not* reading a pointer, and we can just ignore provenance, - // then do exactly that. - if !read_provenance && Prov::OFFSET_IS_ADDR { - // We just strip provenance. - let bytes = self.get_bytes_even_more_internal(range); - let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); - return Ok(Scalar::from_uint(bits, range.size)); + if read_provenance { + assert_eq!(range.size, cx.data_layout().pointer_size); + + // When reading data with provenance, the easy case is finding provenance exactly where we + // are reading, then we can put data and provenance back together and return that. + if let Some(&prov) = self.provenance.get(&range.start) { + // Now we can return the bits, with their appropriate provenance. + let ptr = Pointer::new(prov, Size::from_bytes(bits)); + return Ok(Scalar::from_pointer(ptr, cx)); + } + + // If we can work on pointers byte-wise, join the byte-wise provenances. + if Prov::OFFSET_IS_ADDR { + let mut prov = self.offset_get_provenance(cx, range.start); + for offset in 1..range.size.bytes() { + let this_prov = + self.offset_get_provenance(cx, range.start + Size::from_bytes(offset)); + prov = Prov::join(prov, this_prov); + } + // Now use this provenance. + let ptr = Pointer::new(prov, Size::from_bytes(bits)); + return Ok(Scalar::from_maybe_pointer(ptr, cx)); + } + } else { + // We are *not* reading a pointer. + // If we can just ignore provenance, do exactly that. + if Prov::OFFSET_IS_ADDR { + // We just strip provenance. + return Ok(Scalar::from_uint(bits, range.size)); + } } - // It's complicated. Better make sure there is no provenance anywhere. - // FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then - // `read_pointer` is true and we ideally would distinguish the following two cases: - // - The entire `range` is covered by the same provenance, stored in two separate entries of - // the provenance map. Then we should return a pointer with that provenance. - // - The range has inhomogeneous provenance. Then we should return just the - // underlying bits. - let bytes = self.get_bytes(cx, range)?; - let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); + // Fallback path for when we cannot treat provenance bytewise or ignore it. + assert!(!Prov::OFFSET_IS_ADDR); + if self.range_has_provenance(cx, range) { + return Err(AllocError::ReadPointerAsBytes); + } + // There is no provenance, we can just return the bits. Ok(Scalar::from_uint(bits, range.size)) } @@ -534,6 +515,13 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> { self.provenance.range(Size::from_bytes(start)..range.end()) } + /// Get the provenance of a single byte. + fn offset_get_provenance(&self, cx: &impl HasDataLayout, offset: Size) -> Option<Prov> { + let prov = self.range_get_provenance(cx, alloc_range(offset, Size::from_bytes(1))); + assert!(prov.len() <= 1); + prov.first().map(|(_offset, prov)| *prov) + } + /// Returns whether this allocation has progrnance overlapping with the given range. /// /// Note: this function exists to allow `range_get_provenance` to be private, in order to somewhat @@ -543,12 +531,6 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> { !self.range_get_provenance(cx, range).is_empty() } - /// Checks that there is no provenance overlapping with the given range. - #[inline(always)] - fn check_provenance(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { - if self.range_has_provenance(cx, range) { Err(AllocError::ReadPointerAsBytes) } else { Ok(()) } - } - /// Removes all provenance inside the given range. /// If there is provenance overlapping with the edges, it /// are removed as well *and* the bytes they cover are marked as @@ -606,14 +588,6 @@ impl<Prov: Copy, Extra> Allocation<Prov, Extra> { Ok(()) } - - /// Errors if there is provenance overlapping with the edges of the given memory range. - #[inline] - fn check_provenance_edges(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { - self.check_provenance(cx, alloc_range(range.start, Size::ZERO))?; - self.check_provenance(cx, alloc_range(range.end(), Size::ZERO))?; - Ok(()) - } } /// Stores the provenance information of pointers stored in memory. diff --git a/compiler/rustc_middle/src/mir/interpret/error.rs b/compiler/rustc_middle/src/mir/interpret/error.rs index cecb55578d3..e4039cc7c68 100644 --- a/compiler/rustc_middle/src/mir/interpret/error.rs +++ b/compiler/rustc_middle/src/mir/interpret/error.rs @@ -401,14 +401,18 @@ impl fmt::Display for UndefinedBehaviorInfo { pub enum UnsupportedOpInfo { /// Free-form case. Only for errors that are never caught! Unsupported(String), - /// Encountered a pointer where we needed raw bytes. - ReadPointerAsBytes, /// Overwriting parts of a pointer; the resulting state cannot be represented in our /// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>. PartialPointerOverwrite(Pointer<AllocId>), + /// Attempting to `copy` parts of a pointer to somewhere else; the resulting state cannot be + /// represented in our `Allocation` data structure. See + /// <https://github.com/rust-lang/miri/issues/2181>. + PartialPointerCopy(Pointer<AllocId>), // // The variants below are only reachable from CTFE/const prop, miri will never emit them. // + /// Encountered a pointer where we needed raw bytes. + ReadPointerAsBytes, /// Accessing thread local statics ThreadLocalStatic(DefId), /// Accessing an unsupported extern static. @@ -420,10 +424,13 @@ impl fmt::Display for UnsupportedOpInfo { use UnsupportedOpInfo::*; match self { Unsupported(ref msg) => write!(f, "{msg}"), - ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"), PartialPointerOverwrite(ptr) => { write!(f, "unable to overwrite parts of a pointer in memory at {ptr:?}") } + PartialPointerCopy(ptr) => { + write!(f, "unable to copy parts of a pointer from memory at {ptr:?}") + } + ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"), ThreadLocalStatic(did) => write!(f, "cannot access thread local static ({did:?})"), ReadExternStatic(did) => write!(f, "cannot read from extern static ({did:?})"), } diff --git a/compiler/rustc_middle/src/mir/interpret/pointer.rs b/compiler/rustc_middle/src/mir/interpret/pointer.rs index 384954cbbd5..5fa802236ed 100644 --- a/compiler/rustc_middle/src/mir/interpret/pointer.rs +++ b/compiler/rustc_middle/src/mir/interpret/pointer.rs @@ -125,6 +125,9 @@ pub trait Provenance: Copy + fmt::Debug { /// Otherwise this function is best-effort (but must agree with `Machine::ptr_get_alloc`). /// (Identifying the offset in that allocation, however, is harder -- use `Memory::ptr_get_alloc` for that.) fn get_alloc_id(self) -> Option<AllocId>; + + /// Defines the 'join' of provenance: what happens when doing a pointer load and different bytes have different provenance. + fn join(left: Option<Self>, right: Option<Self>) -> Option<Self>; } impl Provenance for AllocId { @@ -152,6 +155,10 @@ impl Provenance for AllocId { fn get_alloc_id(self) -> Option<AllocId> { Some(self) } + + fn join(_left: Option<Self>, _right: Option<Self>) -> Option<Self> { + panic!("merging provenance is not supported when `OFFSET_IS_ADDR` is false") + } } /// Represents a pointer in the Miri engine. diff --git a/compiler/rustc_middle/src/mir/interpret/value.rs b/compiler/rustc_middle/src/mir/interpret/value.rs index 1ba16025e32..d4fad7f1ecd 100644 --- a/compiler/rustc_middle/src/mir/interpret/value.rs +++ b/compiler/rustc_middle/src/mir/interpret/value.rs @@ -507,7 +507,7 @@ pub fn get_slice_bytes<'tcx>(cx: &impl HasDataLayout, val: ConstValue<'tcx>) -> if let ConstValue::Slice { data, start, end } = val { let len = end - start; data.inner() - .get_bytes( + .get_bytes_strip_provenance( cx, AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(len) }, ) diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 4e886ff1592..75327cff368 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -2719,7 +2719,7 @@ fn pretty_print_const_value<'tcx>( let n = n.kind().try_to_bits(tcx.data_layout.pointer_size).unwrap(); // cast is ok because we already checked for pointer size (32 or 64 bit) above let range = AllocRange { start: offset, size: Size::from_bytes(n) }; - let byte_str = alloc.inner().get_bytes(&tcx, range).unwrap(); + let byte_str = alloc.inner().get_bytes_strip_provenance(&tcx, range).unwrap(); fmt.write_str("*")?; pretty_print_byte_str(fmt, byte_str)?; return Ok(()); diff --git a/compiler/rustc_middle/src/ty/print/pretty.rs b/compiler/rustc_middle/src/ty/print/pretty.rs index cc55b7e8611..2dfbcb7fa31 100644 --- a/compiler/rustc_middle/src/ty/print/pretty.rs +++ b/compiler/rustc_middle/src/ty/print/pretty.rs @@ -1275,7 +1275,7 @@ pub trait PrettyPrinter<'tcx>: let range = AllocRange { start: offset, size: Size::from_bytes(len) }; if let Ok(byte_str) = - alloc.inner().get_bytes(&self.tcx(), range) + alloc.inner().get_bytes_strip_provenance(&self.tcx(), range) { p!(pretty_print_byte_str(byte_str)) } else { |
