about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUrgau <urgau@numericable.fr>2023-08-26 22:09:18 +0200
committerUrgau <urgau@numericable.fr>2023-09-21 10:16:29 +0200
commitf156d3bc5720603b8ce7a6a33bbfd464f9ca2e84 (patch)
treecb77ee1da447171eae7b5317ecfe8ffcb1e900e1
parent6192690b92d32d0d8511e1dd33b7cd35a52d6209 (diff)
downloadrust-f156d3bc5720603b8ce7a6a33bbfd464f9ca2e84.tar.gz
rust-f156d3bc5720603b8ce7a6a33bbfd464f9ca2e84.zip
Improve invalid UTF-8 lint by finding the expression initializer
-rw-r--r--compiler/rustc_lint/src/invalid_from_utf8.rs25
-rw-r--r--tests/ui/lint/invalid_from_utf8.rs27
-rw-r--r--tests/ui/lint/invalid_from_utf8.stderr135
3 files changed, 137 insertions, 50 deletions
diff --git a/compiler/rustc_lint/src/invalid_from_utf8.rs b/compiler/rustc_lint/src/invalid_from_utf8.rs
index 3291286ad67..1841e7c85a8 100644
--- a/compiler/rustc_lint/src/invalid_from_utf8.rs
+++ b/compiler/rustc_lint/src/invalid_from_utf8.rs
@@ -1,6 +1,6 @@
 use std::str::Utf8Error;
 
-use rustc_ast::{BorrowKind, LitKind};
+use rustc_ast::LitKind;
 use rustc_hir::{Expr, ExprKind};
 use rustc_span::source_map::Spanned;
 use rustc_span::sym;
@@ -11,7 +11,7 @@ use crate::{LateContext, LateLintPass, LintContext};
 declare_lint! {
     /// The `invalid_from_utf8_unchecked` lint checks for calls to
     /// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
-    /// with an invalid UTF-8 literal.
+    /// with a known invalid UTF-8 value.
     ///
     /// ### Example
     ///
@@ -36,7 +36,7 @@ declare_lint! {
 declare_lint! {
     /// The `invalid_from_utf8` lint checks for calls to
     /// `std::str::from_utf8` and `std::str::from_utf8_mut`
-    /// with an invalid UTF-8 literal.
+    /// with a known invalid UTF-8 value.
     ///
     /// ### Example
     ///
@@ -67,8 +67,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
             && [sym::str_from_utf8, sym::str_from_utf8_mut,
                 sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
         {
-            let lint = |utf8_error: Utf8Error| {
-                let label = arg.span;
+            let lint = |label, utf8_error: Utf8Error| {
                 let method = diag_item.as_str().strip_prefix("str_").unwrap();
                 let method = format!("std::str::{method}");
                 let valid_up_to = utf8_error.valid_up_to();
@@ -78,22 +77,26 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
                     if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
                     expr.span,
                     if is_unchecked_variant {
-                        InvalidFromUtf8Diag::Unchecked { method,  valid_up_to, label }
+                        InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
                     } else {
-                        InvalidFromUtf8Diag::Checked { method,  valid_up_to, label }
+                        InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
                     }
                 )
             };
 
-            match &arg.kind {
+            let mut init = cx.expr_or_init(arg);
+            while let ExprKind::AddrOf(.., inner) = init.kind {
+                init = cx.expr_or_init(inner);
+            }
+            match init.kind {
                 ExprKind::Lit(Spanned { node: lit, .. }) => {
                     if let LitKind::ByteStr(bytes, _) = &lit
                         && let Err(utf8_error) = std::str::from_utf8(bytes)
                     {
-                        lint(utf8_error);
+                        lint(init.span, utf8_error);
                     }
                 },
-                ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
+                ExprKind::Array(args) => {
                     let elements = args.iter().map(|e|{
                         match &e.kind {
                             ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
@@ -108,7 +111,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
                     if let Some(elements) = elements
                         && let Err(utf8_error) = std::str::from_utf8(&elements)
                     {
-                        lint(utf8_error);
+                        lint(init.span, utf8_error);
                     }
                 }
                 _ => {}
diff --git a/tests/ui/lint/invalid_from_utf8.rs b/tests/ui/lint/invalid_from_utf8.rs
index 9c8c636812e..43ceffb71e5 100644
--- a/tests/ui/lint/invalid_from_utf8.rs
+++ b/tests/ui/lint/invalid_from_utf8.rs
@@ -1,6 +1,8 @@
 // check-pass
 
+#![feature(inline_const)]
 #![feature(concat_bytes)]
+
 #![warn(invalid_from_utf8_unchecked)]
 #![warn(invalid_from_utf8)]
 
@@ -90,4 +92,29 @@ pub fn from_utf8() {
     }
 }
 
+pub fn from_utf8_with_indirections() {
+    let mut a = [99, 108, 130, 105, 112, 112, 121];
+    std::str::from_utf8_mut(&mut a);
+    //~^ WARN calls to `std::str::from_utf8_mut`
+    let mut b = &mut a;
+    let mut c = b;
+    std::str::from_utf8_mut(c);
+    //~^ WARN calls to `std::str::from_utf8_mut`
+    let mut c = &[99, 108, 130, 105, 112, 112, 121];
+    std::str::from_utf8(c);
+    //~^ WARN calls to `std::str::from_utf8`
+    const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
+    std::str::from_utf8(&INVALID_1);
+    //~^ WARN calls to `std::str::from_utf8`
+    static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
+    std::str::from_utf8(&INVALID_2);
+    //~^ WARN calls to `std::str::from_utf8`
+    const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121];
+    std::str::from_utf8(INVALID_3);
+    //~^ WARN calls to `std::str::from_utf8`
+    const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] };
+    std::str::from_utf8(INVALID_4);
+    //~^ WARN calls to `std::str::from_utf8`
+}
+
 fn main() {}
diff --git a/tests/ui/lint/invalid_from_utf8.stderr b/tests/ui/lint/invalid_from_utf8.stderr
index 8e00d3bf872..884165d4f12 100644
--- a/tests/ui/lint/invalid_from_utf8.stderr
+++ b/tests/ui/lint/invalid_from_utf8.stderr
@@ -1,43 +1,43 @@
 warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:19:9
+  --> $DIR/invalid_from_utf8.rs:21:9
    |
 LL |         std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
-   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
-   |                                           |
-   |                                           the literal was valid UTF-8 up to the 2 bytes
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
+   |                                                |
+   |                                                the literal was valid UTF-8 up to the 2 bytes
    |
 note: the lint level is defined here
-  --> $DIR/invalid_from_utf8.rs:4:9
+  --> $DIR/invalid_from_utf8.rs:6:9
    |
 LL | #![warn(invalid_from_utf8_unchecked)]
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:21:9
+  --> $DIR/invalid_from_utf8.rs:23:9
    |
 LL |         std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
-   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
-   |                                           |
-   |                                           the literal was valid UTF-8 up to the 2 bytes
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
+   |                                                |
+   |                                                the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:39:9
+  --> $DIR/invalid_from_utf8.rs:41:9
    |
 LL |         std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
-   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
-   |                                       |
-   |                                       the literal was valid UTF-8 up to the 2 bytes
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
+   |                                        |
+   |                                        the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:41:9
+  --> $DIR/invalid_from_utf8.rs:43:9
    |
 LL |         std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
-   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
-   |                                       |
-   |                                       the literal was valid UTF-8 up to the 2 bytes
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
+   |                                        |
+   |                                        the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:43:9
+  --> $DIR/invalid_from_utf8.rs:45:9
    |
 LL |         std::str::from_utf8_unchecked(b"cl\x82ippy");
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
@@ -45,7 +45,7 @@ LL |         std::str::from_utf8_unchecked(b"cl\x82ippy");
    |                                       the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:45:9
+  --> $DIR/invalid_from_utf8.rs:47:9
    |
 LL |         std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
@@ -53,45 +53,45 @@ LL |         std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
    |                                       the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
-  --> $DIR/invalid_from_utf8.rs:62:9
+  --> $DIR/invalid_from_utf8.rs:64:9
    |
 LL |         std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
-   |         ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
-   |                                 |
-   |                                 the literal was valid UTF-8 up to the 2 bytes
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
+   |                                      |
+   |                                      the literal was valid UTF-8 up to the 2 bytes
    |
 note: the lint level is defined here
-  --> $DIR/invalid_from_utf8.rs:5:9
+  --> $DIR/invalid_from_utf8.rs:7:9
    |
 LL | #![warn(invalid_from_utf8)]
    |         ^^^^^^^^^^^^^^^^^
 
 warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
-  --> $DIR/invalid_from_utf8.rs:64:9
+  --> $DIR/invalid_from_utf8.rs:66:9
    |
 LL |         std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
-   |         ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
-   |                                 |
-   |                                 the literal was valid UTF-8 up to the 2 bytes
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
+   |                                      |
+   |                                      the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8` with a invalid literal always return an error
-  --> $DIR/invalid_from_utf8.rs:82:9
+  --> $DIR/invalid_from_utf8.rs:84:9
    |
 LL |         std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
-   |         ^^^^^^^^^^^^^^^^^^^^-----------------------------------^
-   |                             |
-   |                             the literal was valid UTF-8 up to the 2 bytes
+   |         ^^^^^^^^^^^^^^^^^^^^^----------------------------------^
+   |                              |
+   |                              the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8` with a invalid literal always return an error
-  --> $DIR/invalid_from_utf8.rs:84:9
+  --> $DIR/invalid_from_utf8.rs:86:9
    |
 LL |         std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
-   |         ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
-   |                             |
-   |                             the literal was valid UTF-8 up to the 2 bytes
+   |         ^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
+   |                              |
+   |                              the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8` with a invalid literal always return an error
-  --> $DIR/invalid_from_utf8.rs:86:9
+  --> $DIR/invalid_from_utf8.rs:88:9
    |
 LL |         std::str::from_utf8(b"cl\x82ippy");
    |         ^^^^^^^^^^^^^^^^^^^^-------------^
@@ -99,12 +99,69 @@ LL |         std::str::from_utf8(b"cl\x82ippy");
    |                             the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8` with a invalid literal always return an error
-  --> $DIR/invalid_from_utf8.rs:88:9
+  --> $DIR/invalid_from_utf8.rs:90:9
    |
 LL |         std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
    |         ^^^^^^^^^^^^^^^^^^^^---------------------------------^
    |                             |
    |                             the literal was valid UTF-8 up to the 2 bytes
 
-warning: 12 warnings emitted
+warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:97:5
+   |
+LL |     let mut a = [99, 108, 130, 105, 112, 112, 121];
+   |                 ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
+LL |     std::str::from_utf8_mut(&mut a);
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:101:5
+   |
+LL |     let mut a = [99, 108, 130, 105, 112, 112, 121];
+   |                 ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
+...
+LL |     std::str::from_utf8_mut(c);
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:104:5
+   |
+LL |     let mut c = &[99, 108, 130, 105, 112, 112, 121];
+   |                  ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
+LL |     std::str::from_utf8(c);
+   |     ^^^^^^^^^^^^^^^^^^^^^^
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:107:5
+   |
+LL |     const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
+   |                                ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
+LL |     std::str::from_utf8(&INVALID_1);
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:110:5
+   |
+LL |     static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
+   |                                 ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
+LL |     std::str::from_utf8(&INVALID_2);
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:113:5
+   |
+LL |     const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121];
+   |                                          ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
+LL |     std::str::from_utf8(INVALID_3);
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:116:5
+   |
+LL |     const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] };
+   |                                            ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
+LL |     std::str::from_utf8(INVALID_4);
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: 19 warnings emitted