about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRalf Jung <post@ralfj.de>2020-05-22 11:32:18 +0200
committerGitHub <noreply@github.com>2020-05-22 11:32:18 +0200
commit2059112eb40619d39f89102f832c4450d6b8ffc5 (patch)
tree8645542cf01b7df8bf848de634845272ab815390
parentc60b675e280fedded8d8487acd051cd342e486f2 (diff)
parent0148a7f26ce636ac22ac7797dcd7d292c59e8576 (diff)
downloadrust-2059112eb40619d39f89102f832c4450d6b8ffc5.tar.gz
rust-2059112eb40619d39f89102f832c4450d6b8ffc5.zip
Rollup merge of #71610 - divergentdave:InvalidUndefBytes-range, r=RalfJung
InvalidUndefBytes: Track size of undef region used

This PR adds a size to `UndefinedBehaviorInfo::InvalidUndefBytes`, to keep track of how many undefined bytes in a row were accessed, and changes a few methods to pass this information through. This additional information will eventually be used in Miri to improve diagnostics for this UB error. See also rust-lang/miri#1354 for prior discussion.

I expect Miri will break the next time its submodule is updated, due to this change to the `InvalidUndefBytes`. (The current commit for src/tools/miri predates rust-lang/miri#1354, and thus does not try to destructure the `InvalidUndefBytes` variant) I have a corresponding change prepared for that repository.

r? @RalfJung
-rw-r--r--src/librustc_middle/mir/interpret/allocation.rs37
-rw-r--r--src/librustc_middle/mir/interpret/error.rs39
-rw-r--r--src/librustc_middle/mir/interpret/mod.rs2
-rw-r--r--src/librustc_mir/interpret/validity.rs14
4 files changed, 67 insertions, 25 deletions
diff --git a/src/librustc_middle/mir/interpret/allocation.rs b/src/librustc_middle/mir/interpret/allocation.rs
index 2b6cf224e2c..96195db0bac 100644
--- a/src/librustc_middle/mir/interpret/allocation.rs
+++ b/src/librustc_middle/mir/interpret/allocation.rs
@@ -11,6 +11,7 @@ use rustc_target::abi::{Align, HasDataLayout, Size};
 
 use super::{
     read_target_uint, write_target_uint, AllocId, InterpResult, Pointer, Scalar, ScalarMaybeUninit,
+    UninitBytesAccess,
 };
 
 #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
@@ -545,17 +546,23 @@ impl<'tcx, Tag: Copy, Extra> Allocation<Tag, Extra> {
 impl<'tcx, Tag: Copy, Extra> Allocation<Tag, Extra> {
     /// Checks whether the given range  is entirely defined.
     ///
-    /// Returns `Ok(())` if it's defined. Otherwise returns the index of the byte
-    /// at which the first undefined access begins.
-    fn is_defined(&self, ptr: Pointer<Tag>, size: Size) -> Result<(), Size> {
+    /// Returns `Ok(())` if it's defined. Otherwise returns the range of byte
+    /// indexes of the first contiguous undefined access.
+    fn is_defined(&self, ptr: Pointer<Tag>, size: Size) -> Result<(), Range<Size>> {
         self.init_mask.is_range_initialized(ptr.offset, ptr.offset + size) // `Size` addition
     }
 
-    /// Checks that a range of bytes is defined. If not, returns the `ReadUndefBytes`
-    /// error which will report the first byte which is undefined.
+    /// Checks that a range of bytes is defined. If not, returns the `InvalidUndefBytes`
+    /// error which will report the first range of bytes which is undefined.
     fn check_defined(&self, ptr: Pointer<Tag>, size: Size) -> InterpResult<'tcx> {
-        self.is_defined(ptr, size)
-            .or_else(|idx| throw_ub!(InvalidUninitBytes(Some(Pointer::new(ptr.alloc_id, idx)))))
+        self.is_defined(ptr, size).or_else(|idx_range| {
+            throw_ub!(InvalidUninitBytes(Some(Box::new(UninitBytesAccess {
+                access_ptr: ptr.erase_tag(),
+                access_size: size,
+                uninit_ptr: Pointer::new(ptr.alloc_id, idx_range.start),
+                uninit_size: idx_range.end - idx_range.start, // `Size` subtraction
+            }))))
+        })
     }
 
     pub fn mark_definedness(&mut self, ptr: Pointer<Tag>, size: Size, new_state: bool) {
@@ -758,19 +765,25 @@ impl InitMask {
 
     /// Checks whether the range `start..end` (end-exclusive) is entirely initialized.
     ///
-    /// Returns `Ok(())` if it's initialized. Otherwise returns the index of the byte
-    /// at which the first uninitialized access begins.
+    /// Returns `Ok(())` if it's initialized. Otherwise returns a range of byte
+    /// indexes for the first contiguous span of the uninitialized access.
     #[inline]
-    pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), Size> {
+    pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), Range<Size>> {
         if end > self.len {
-            return Err(self.len);
+            return Err(self.len..end);
         }
 
         // FIXME(oli-obk): optimize this for allocations larger than a block.
         let idx = (start.bytes()..end.bytes()).map(Size::from_bytes).find(|&i| !self.get(i));
 
         match idx {
-            Some(idx) => Err(idx),
+            Some(idx) => {
+                let undef_end = (idx.bytes()..end.bytes())
+                    .map(Size::from_bytes)
+                    .find(|&i| self.get(i))
+                    .unwrap_or(end);
+                Err(idx..undef_end)
+            }
             None => Ok(()),
         }
     }
diff --git a/src/librustc_middle/mir/interpret/error.rs b/src/librustc_middle/mir/interpret/error.rs
index 06fe3793b23..d32a1473449 100644
--- a/src/librustc_middle/mir/interpret/error.rs
+++ b/src/librustc_middle/mir/interpret/error.rs
@@ -6,7 +6,7 @@ use crate::ty::query::TyCtxtAt;
 use crate::ty::{self, layout, tls, FnSig, Ty};
 
 use rustc_data_structures::sync::Lock;
-use rustc_errors::{struct_span_err, DiagnosticBuilder, ErrorReported};
+use rustc_errors::{pluralize, struct_span_err, DiagnosticBuilder, ErrorReported};
 use rustc_hir as hir;
 use rustc_hir::definitions::DefPathData;
 use rustc_macros::HashStable;
@@ -327,6 +327,19 @@ impl fmt::Display for CheckInAllocMsg {
     }
 }
 
+/// Details of an access to uninitialized bytes where it is not allowed.
+#[derive(Debug)]
+pub struct UninitBytesAccess {
+    /// Location of the original memory access.
+    pub access_ptr: Pointer,
+    /// Size of the original memory access.
+    pub access_size: Size,
+    /// Location of the first uninitialized byte that was accessed.
+    pub uninit_ptr: Pointer,
+    /// Number of consecutive uninitialized bytes that were accessed.
+    pub uninit_size: Size,
+}
+
 /// Error information for when the program caused Undefined Behavior.
 pub enum UndefinedBehaviorInfo<'tcx> {
     /// Free-form case. Only for errors that are never caught!
@@ -384,7 +397,7 @@ pub enum UndefinedBehaviorInfo<'tcx> {
     /// Using a string that is not valid UTF-8,
     InvalidStr(std::str::Utf8Error),
     /// Using uninitialized data where it is not allowed.
-    InvalidUninitBytes(Option<Pointer>),
+    InvalidUninitBytes(Option<Box<UninitBytesAccess>>),
     /// Working with a local that is not currently live.
     DeadLocal,
     /// Data size is not equal to target size.
@@ -455,10 +468,18 @@ impl fmt::Display for UndefinedBehaviorInfo<'_> {
                 write!(f, "using {} as function pointer but it does not point to a function", p)
             }
             InvalidStr(err) => write!(f, "this string is not valid UTF-8: {}", err),
-            InvalidUninitBytes(Some(p)) => write!(
+            InvalidUninitBytes(Some(access)) => write!(
                 f,
-                "reading uninitialized memory at {}, but this operation requires initialized memory",
-                p
+                "reading {} byte{} of memory starting at {}, \
+                 but {} byte{} {} uninitialized starting at {}, \
+                 and this operation requires initialized memory",
+                access.access_size.bytes(),
+                pluralize!(access.access_size.bytes()),
+                access.access_ptr,
+                access.uninit_size.bytes(),
+                pluralize!(access.uninit_size.bytes()),
+                if access.uninit_size.bytes() != 1 { "are" } else { "is" },
+                access.uninit_ptr,
             ),
             InvalidUninitBytes(None) => write!(
                 f,
@@ -556,6 +577,9 @@ impl dyn MachineStopType {
     }
 }
 
+#[cfg(target_arch = "x86_64")]
+static_assert_size!(InterpError<'_>, 40);
+
 pub enum InterpError<'tcx> {
     /// The program caused undefined behavior.
     UndefinedBehavior(UndefinedBehaviorInfo<'tcx>),
@@ -604,7 +628,10 @@ impl InterpError<'_> {
             InterpError::MachineStop(b) => mem::size_of_val::<dyn MachineStopType>(&**b) > 0,
             InterpError::Unsupported(UnsupportedOpInfo::Unsupported(_))
             | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::ValidationFailure(_))
-            | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::Ub(_)) => true,
+            | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::Ub(_))
+            | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::InvalidUninitBytes(Some(_))) => {
+                true
+            }
             _ => false,
         }
     }
diff --git a/src/librustc_middle/mir/interpret/mod.rs b/src/librustc_middle/mir/interpret/mod.rs
index 71adb2fa477..d9e52af8900 100644
--- a/src/librustc_middle/mir/interpret/mod.rs
+++ b/src/librustc_middle/mir/interpret/mod.rs
@@ -119,7 +119,7 @@ use crate::ty::{self, Instance, Ty, TyCtxt};
 pub use self::error::{
     struct_error, CheckInAllocMsg, ConstEvalErr, ConstEvalRawResult, ConstEvalResult, ErrorHandled,
     FrameInfo, InterpError, InterpErrorInfo, InterpResult, InvalidProgramInfo, MachineStopType,
-    ResourceExhaustionInfo, UndefinedBehaviorInfo, UnsupportedOpInfo,
+    ResourceExhaustionInfo, UndefinedBehaviorInfo, UninitBytesAccess, UnsupportedOpInfo,
 };
 
 pub use self::value::{get_slice_bytes, ConstValue, RawConst, Scalar, ScalarMaybeUninit};
diff --git a/src/librustc_mir/interpret/validity.rs b/src/librustc_mir/interpret/validity.rs
index f6020641d3e..c83555d65fa 100644
--- a/src/librustc_mir/interpret/validity.rs
+++ b/src/librustc_mir/interpret/validity.rs
@@ -366,7 +366,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
         let place = try_validation!(
             self.ecx.ref_to_mplace(value),
             self.path,
-            err_ub!(InvalidUninitBytes(..)) => { "uninitialized {}", kind },
+            err_ub!(InvalidUninitBytes { .. }) => { "uninitialized {}", kind },
         );
         if place.layout.is_unsized() {
             self.check_wide_ptr_meta(place.meta, place.layout)?;
@@ -514,7 +514,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
                 let place = try_validation!(
                     self.ecx.ref_to_mplace(self.ecx.read_immediate(value)?),
                     self.path,
-                    err_ub!(InvalidUninitBytes(..)) => { "uninitialized raw pointer" },
+                    err_ub!(InvalidUninitBytes { .. } ) => { "uninitialized raw pointer" },
                 );
                 if place.layout.is_unsized() {
                     self.check_wide_ptr_meta(place.meta, place.layout)?;
@@ -592,7 +592,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
         let value = try_validation!(
             value.not_undef(),
             self.path,
-            err_ub!(InvalidUninitBytes(..)) => { "{}", value }
+            err_ub!(InvalidUninitBytes { .. }) => { "{}", value }
                 expected { "something {}", wrapping_range_format(valid_range, max_hi) },
         );
         let bits = match value.to_bits_or_ptr(op.layout.size, self.ecx) {
@@ -803,12 +803,14 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M>
                         // For some errors we might be able to provide extra information.
                         // (This custom logic does not fit the `try_validation!` macro.)
                         match err.kind {
-                            err_ub!(InvalidUninitBytes(Some(ptr))) => {
+                            err_ub!(InvalidUninitBytes(Some(access))) => {
                                 // Some byte was uninitialized, determine which
                                 // element that byte belongs to so we can
                                 // provide an index.
-                                let i = usize::try_from(ptr.offset.bytes() / layout.size.bytes())
-                                    .unwrap();
+                                let i = usize::try_from(
+                                    access.uninit_ptr.offset.bytes() / layout.size.bytes(),
+                                )
+                                .unwrap();
                                 self.path.push(PathElem::ArrayElem(i));
 
                                 throw_validation_failure!(self.path, { "uninitialized bytes" })