about summary refs log tree commit diff
path: root/compiler/rustc_lint/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_lint/src')
-rw-r--r--compiler/rustc_lint/src/invalid_from_utf8.rs85
-rw-r--r--compiler/rustc_lint/src/lib.rs3
-rw-r--r--compiler/rustc_lint/src/lints.rs10
3 files changed, 98 insertions, 0 deletions
diff --git a/compiler/rustc_lint/src/invalid_from_utf8.rs b/compiler/rustc_lint/src/invalid_from_utf8.rs
new file mode 100644
index 00000000000..2118deba5c7
--- /dev/null
+++ b/compiler/rustc_lint/src/invalid_from_utf8.rs
@@ -0,0 +1,85 @@
+use std::str::Utf8Error;
+
+use rustc_ast::{BorrowKind, LitKind};
+use rustc_hir::{Expr, ExprKind};
+use rustc_span::source_map::Spanned;
+use rustc_span::sym;
+
+use crate::lints::InvalidFromUtf8UncheckedDiag;
+use crate::{LateContext, LateLintPass, LintContext};
+
+declare_lint! {
+    /// The `invalid_from_utf8_unchecked` lint checks for calls to
+    /// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
+    /// with an invalid UTF-8 literal.
+    ///
+    /// ### Example
+    ///
+    /// ```rust,compile_fail
+    /// # #[allow(unused)]
+    /// unsafe {
+    ///     std::str::from_utf8_unchecked(b"Ru\x82st");
+    /// }
+    /// ```
+    ///
+    /// {{produces}}
+    ///
+    /// ### Explanation
+    ///
+    /// Creating such a `str` would result in undefined behavior as per documentation
+    /// for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`.
+    pub INVALID_FROM_UTF8_UNCHECKED,
+    Deny,
+    "using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
+}
+
+declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]);
+
+impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
+    fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
+        if let ExprKind::Call(path, [arg]) = expr.kind
+            && let ExprKind::Path(ref qpath) = path.kind
+            && let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
+            && let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
+            && [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
+        {
+            let lint = |utf8_error: Utf8Error| {
+                let method = diag_item.as_str().strip_prefix("str_").unwrap();
+                cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag {
+                    method: format!("std::str::{method}"),
+                    valid_up_to: utf8_error.valid_up_to(),
+                    label: arg.span,
+                })
+            };
+
+            match &arg.kind {
+                ExprKind::Lit(Spanned { node: lit, .. }) => {
+                    if let LitKind::ByteStr(bytes, _) = &lit
+                        && let Err(utf8_error) = std::str::from_utf8(bytes)
+                    {
+                        lint(utf8_error);
+                    }
+                },
+                ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
+                    let elements = args.iter().map(|e|{
+                        match &e.kind {
+                            ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
+                                LitKind::Byte(b) => Some(*b),
+                                LitKind::Int(b, _) => Some(*b as u8),
+                                _ => None
+                            }
+                            _ => None
+                        }
+                    }).collect::<Option<Vec<_>>>();
+
+                    if let Some(elements) = elements
+                        && let Err(utf8_error) = std::str::from_utf8(&elements)
+                    {
+                        lint(utf8_error);
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs
index dfddfe09ab3..c62109b2986 100644
--- a/compiler/rustc_lint/src/lib.rs
+++ b/compiler/rustc_lint/src/lib.rs
@@ -60,6 +60,7 @@ mod expect;
 mod for_loops_over_fallibles;
 pub mod hidden_unicode_codepoints;
 mod internal;
+mod invalid_from_utf8;
 mod late;
 mod let_underscore;
 mod levels;
@@ -102,6 +103,7 @@ use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
 use for_loops_over_fallibles::*;
 use hidden_unicode_codepoints::*;
 use internal::*;
+use invalid_from_utf8::*;
 use let_underscore::*;
 use map_unit_fn::*;
 use methods::*;
@@ -207,6 +209,7 @@ late_lint_methods!(
             HardwiredLints: HardwiredLints,
             ImproperCTypesDeclarations: ImproperCTypesDeclarations,
             ImproperCTypesDefinitions: ImproperCTypesDefinitions,
+            InvalidFromUtf8: InvalidFromUtf8,
             VariantSizeDifferences: VariantSizeDifferences,
             BoxPointers: BoxPointers,
             PathStatements: PathStatements,
diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs
index de1c2be2875..5969bc5ca5a 100644
--- a/compiler/rustc_lint/src/lints.rs
+++ b/compiler/rustc_lint/src/lints.rs
@@ -699,6 +699,16 @@ pub struct ForgetCopyDiag<'a> {
     pub label: Span,
 }
 
+// invalid_from_utf8.rs
+#[derive(LintDiagnostic)]
+#[diag(lint_invalid_from_utf8_unchecked)]
+pub struct InvalidFromUtf8UncheckedDiag {
+    pub method: String,
+    pub valid_up_to: usize,
+    #[label]
+    pub label: Span,
+}
+
 // hidden_unicode_codepoints.rs
 #[derive(LintDiagnostic)]
 #[diag(lint_hidden_unicode_codepoints)]