about summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
authorGuillaume Gomez <guillaume1.gomez@gmail.com>2023-09-21 13:25:38 +0200
committerGitHub <noreply@github.com>2023-09-21 13:25:38 +0200
commit9ce64bae9415d0680f89137d705a06629b29fcae (patch)
treeec5c9e750345ea4b42856a0a66b04b1cee962c6b /compiler
parente4a361a48a59ead52b302aaa2e1d9d345264935a (diff)
parentf156d3bc5720603b8ce7a6a33bbfd464f9ca2e84 (diff)
downloadrust-9ce64bae9415d0680f89137d705a06629b29fcae.tar.gz
rust-9ce64bae9415d0680f89137d705a06629b29fcae.zip
Rollup merge of #115257 - Urgau:invalid-utf8-walk-up-hir, r=Nilstrieb
Improve invalid UTF-8 lint by finding the expression initializer

This PR introduce a small mechanism to walk up the HIR through bindings, if/else, consts, ... when trying lint on invalid UTF-8.

Fixes https://github.com/rust-lang/rust/issues/115208
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_lint/src/context.rs48
-rw-r--r--compiler/rustc_lint/src/invalid_from_utf8.rs25
2 files changed, 62 insertions, 11 deletions
diff --git a/compiler/rustc_lint/src/context.rs b/compiler/rustc_lint/src/context.rs
index 7a336a8f694..460d54739a2 100644
--- a/compiler/rustc_lint/src/context.rs
+++ b/compiler/rustc_lint/src/context.rs
@@ -1315,6 +1315,54 @@ impl<'tcx> LateContext<'tcx> {
                 tcx.try_normalize_erasing_regions(self.param_env, proj).ok()
             })
     }
+
+    /// If the given expression is a local binding, find the initializer expression.
+    /// If that initializer expression is another local or **outside** (`const`/`static`)
+    /// binding, find its initializer again.
+    ///
+    /// This process repeats as long as possible (but usually no more than once).
+    /// Type-check adjustments are not taken in account in this function.
+    ///
+    /// Examples:
+    /// ```
+    /// const ABC: i32 = 1;
+    /// //               ^ output
+    /// let def = ABC;
+    /// dbg!(def);
+    /// //   ^^^ input
+    ///
+    /// // or...
+    /// let abc = 1;
+    /// let def = abc + 2;
+    /// //        ^^^^^^^ output
+    /// dbg!(def);
+    /// //   ^^^ input
+    /// ```
+    pub fn expr_or_init<'a>(&self, mut expr: &'a hir::Expr<'tcx>) -> &'a hir::Expr<'tcx> {
+        expr = expr.peel_blocks();
+
+        while let hir::ExprKind::Path(ref qpath) = expr.kind
+            && let Some(parent_node) = match self.qpath_res(qpath, expr.hir_id) {
+                Res::Local(hir_id) => self.tcx.hir().find_parent(hir_id),
+                Res::Def(_, def_id) => self.tcx.hir().get_if_local(def_id),
+                _ => None,
+            }
+            && let Some(init) = match parent_node {
+                hir::Node::Expr(expr) => Some(expr),
+                hir::Node::Local(hir::Local { init, .. }) => *init,
+                hir::Node::Item(item) => match item.kind {
+                    hir::ItemKind::Const(.., body_id) | hir::ItemKind::Static(.., body_id) => {
+                        Some(self.tcx.hir().body(body_id).value)
+                    }
+                    _ => None
+                }
+                _ => None
+            }
+        {
+            expr = init.peel_blocks();
+        }
+        expr
+    }
 }
 
 impl<'tcx> abi::HasDataLayout for LateContext<'tcx> {
diff --git a/compiler/rustc_lint/src/invalid_from_utf8.rs b/compiler/rustc_lint/src/invalid_from_utf8.rs
index 3291286ad67..1841e7c85a8 100644
--- a/compiler/rustc_lint/src/invalid_from_utf8.rs
+++ b/compiler/rustc_lint/src/invalid_from_utf8.rs
@@ -1,6 +1,6 @@
 use std::str::Utf8Error;
 
-use rustc_ast::{BorrowKind, LitKind};
+use rustc_ast::LitKind;
 use rustc_hir::{Expr, ExprKind};
 use rustc_span::source_map::Spanned;
 use rustc_span::sym;
@@ -11,7 +11,7 @@ use crate::{LateContext, LateLintPass, LintContext};
 declare_lint! {
     /// The `invalid_from_utf8_unchecked` lint checks for calls to
     /// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
-    /// with an invalid UTF-8 literal.
+    /// with a known invalid UTF-8 value.
     ///
     /// ### Example
     ///
@@ -36,7 +36,7 @@ declare_lint! {
 declare_lint! {
     /// The `invalid_from_utf8` lint checks for calls to
     /// `std::str::from_utf8` and `std::str::from_utf8_mut`
-    /// with an invalid UTF-8 literal.
+    /// with a known invalid UTF-8 value.
     ///
     /// ### Example
     ///
@@ -67,8 +67,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
             && [sym::str_from_utf8, sym::str_from_utf8_mut,
                 sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
         {
-            let lint = |utf8_error: Utf8Error| {
-                let label = arg.span;
+            let lint = |label, utf8_error: Utf8Error| {
                 let method = diag_item.as_str().strip_prefix("str_").unwrap();
                 let method = format!("std::str::{method}");
                 let valid_up_to = utf8_error.valid_up_to();
@@ -78,22 +77,26 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
                     if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
                     expr.span,
                     if is_unchecked_variant {
-                        InvalidFromUtf8Diag::Unchecked { method,  valid_up_to, label }
+                        InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
                     } else {
-                        InvalidFromUtf8Diag::Checked { method,  valid_up_to, label }
+                        InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
                     }
                 )
             };
 
-            match &arg.kind {
+            let mut init = cx.expr_or_init(arg);
+            while let ExprKind::AddrOf(.., inner) = init.kind {
+                init = cx.expr_or_init(inner);
+            }
+            match init.kind {
                 ExprKind::Lit(Spanned { node: lit, .. }) => {
                     if let LitKind::ByteStr(bytes, _) = &lit
                         && let Err(utf8_error) = std::str::from_utf8(bytes)
                     {
-                        lint(utf8_error);
+                        lint(init.span, utf8_error);
                     }
                 },
-                ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
+                ExprKind::Array(args) => {
                     let elements = args.iter().map(|e|{
                         match &e.kind {
                             ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
@@ -108,7 +111,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
                     if let Some(elements) = elements
                         && let Err(utf8_error) = std::str::from_utf8(&elements)
                     {
-                        lint(utf8_error);
+                        lint(init.span, utf8_error);
                     }
                 }
                 _ => {}