about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--compiler/rustc_lint/messages.ftl4
-rw-r--r--compiler/rustc_lint/src/invalid_from_utf8.rs49
-rw-r--r--compiler/rustc_lint/src/lints.rs21
-rw-r--r--compiler/rustc_span/src/symbol.rs2
-rw-r--r--library/core/src/str/converts.rs2
-rw-r--r--tests/ui/lint/invalid_from_utf8.rs44
-rw-r--r--tests/ui/lint/invalid_from_utf8.stderr68
7 files changed, 169 insertions, 21 deletions
diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl
index 35edb458478..e707ac41a05 100644
--- a/compiler/rustc_lint/messages.ftl
+++ b/compiler/rustc_lint/messages.ftl
@@ -305,6 +305,10 @@ lint_improper_ctypes_union_layout_reason = this union has unspecified layout
 lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
 
 # FIXME: we should ordinalize $valid_up_to when we add support for doing so
+lint_invalid_from_utf8_checked = calls to `{$method}` with a invalid literal always return an error
+    .label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
+
+# FIXME: we should ordinalize $valid_up_to when we add support for doing so
 lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
     .label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
 
diff --git a/compiler/rustc_lint/src/invalid_from_utf8.rs b/compiler/rustc_lint/src/invalid_from_utf8.rs
index 2118deba5c7..3291286ad67 100644
--- a/compiler/rustc_lint/src/invalid_from_utf8.rs
+++ b/compiler/rustc_lint/src/invalid_from_utf8.rs
@@ -5,7 +5,7 @@ use rustc_hir::{Expr, ExprKind};
 use rustc_span::source_map::Spanned;
 use rustc_span::sym;
 
-use crate::lints::InvalidFromUtf8UncheckedDiag;
+use crate::lints::InvalidFromUtf8Diag;
 use crate::{LateContext, LateLintPass, LintContext};
 
 declare_lint! {
@@ -33,7 +33,30 @@ declare_lint! {
     "using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
 }
 
-declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]);
+declare_lint! {
+    /// The `invalid_from_utf8` lint checks for calls to
+    /// `std::str::from_utf8` and `std::str::from_utf8_mut`
+    /// with an invalid UTF-8 literal.
+    ///
+    /// ### Example
+    ///
+    /// ```rust
+    /// # #[allow(unused)]
+    /// std::str::from_utf8(b"Ru\x82st");
+    /// ```
+    ///
+    /// {{produces}}
+    ///
+    /// ### Explanation
+    ///
+    /// Trying to create such a `str` would always return an error as per documentation
+    /// for `std::str::from_utf8` and `std::str::from_utf8_mut`.
+    pub INVALID_FROM_UTF8,
+    Warn,
+    "using a non UTF-8 literal in `std::str::from_utf8`"
+}
+
+declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]);
 
 impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
     fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
@@ -41,15 +64,25 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
             && let ExprKind::Path(ref qpath) = path.kind
             && let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
             && let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
-            && [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
+            && [sym::str_from_utf8, sym::str_from_utf8_mut,
+                sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
         {
             let lint = |utf8_error: Utf8Error| {
+                let label = arg.span;
                 let method = diag_item.as_str().strip_prefix("str_").unwrap();
-                cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag {
-                    method: format!("std::str::{method}"),
-                    valid_up_to: utf8_error.valid_up_to(),
-                    label: arg.span,
-                })
+                let method = format!("std::str::{method}");
+                let valid_up_to = utf8_error.valid_up_to();
+                let is_unchecked_variant = diag_item.as_str().contains("unchecked");
+
+                cx.emit_spanned_lint(
+                    if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
+                    expr.span,
+                    if is_unchecked_variant {
+                        InvalidFromUtf8Diag::Unchecked { method,  valid_up_to, label }
+                    } else {
+                        InvalidFromUtf8Diag::Checked { method,  valid_up_to, label }
+                    }
+                )
             };
 
             match &arg.kind {
diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs
index 5969bc5ca5a..dcfef84f550 100644
--- a/compiler/rustc_lint/src/lints.rs
+++ b/compiler/rustc_lint/src/lints.rs
@@ -701,12 +701,21 @@ pub struct ForgetCopyDiag<'a> {
 
 // invalid_from_utf8.rs
 #[derive(LintDiagnostic)]
-#[diag(lint_invalid_from_utf8_unchecked)]
-pub struct InvalidFromUtf8UncheckedDiag {
-    pub method: String,
-    pub valid_up_to: usize,
-    #[label]
-    pub label: Span,
+pub enum InvalidFromUtf8Diag {
+    #[diag(lint_invalid_from_utf8_unchecked)]
+    Unchecked {
+        method: String,
+        valid_up_to: usize,
+        #[label]
+        label: Span,
+    },
+    #[diag(lint_invalid_from_utf8_checked)]
+    Checked {
+        method: String,
+        valid_up_to: usize,
+        #[label]
+        label: Span,
+    },
 }
 
 // hidden_unicode_codepoints.rs
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 4fc73c4ae86..1185563ea80 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -1454,6 +1454,8 @@ symbols! {
         stop_after_dataflow,
         store,
         str,
+        str_from_utf8,
+        str_from_utf8_mut,
         str_from_utf8_unchecked,
         str_from_utf8_unchecked_mut,
         str_split_whitespace,
diff --git a/library/core/src/str/converts.rs b/library/core/src/str/converts.rs
index 12fac0567cb..0f23cf7ae23 100644
--- a/library/core/src/str/converts.rs
+++ b/library/core/src/str/converts.rs
@@ -84,6 +84,7 @@ use super::Utf8Error;
 #[stable(feature = "rust1", since = "1.0.0")]
 #[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
 #[rustc_allow_const_fn_unstable(str_internals)]
+#[rustc_diagnostic_item = "str_from_utf8"]
 pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
     // FIXME: This should use `?` again, once it's `const`
     match run_utf8_validation(v) {
@@ -127,6 +128,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
 /// errors that can be returned.
 #[stable(feature = "str_mut_extras", since = "1.20.0")]
 #[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
+#[rustc_diagnostic_item = "str_from_utf8_mut"]
 pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
     // This should use `?` again, once it's `const`
     match run_utf8_validation(v) {
diff --git a/tests/ui/lint/invalid_from_utf8.rs b/tests/ui/lint/invalid_from_utf8.rs
index 4a27c659c73..9c8c636812e 100644
--- a/tests/ui/lint/invalid_from_utf8.rs
+++ b/tests/ui/lint/invalid_from_utf8.rs
@@ -2,6 +2,7 @@
 
 #![feature(concat_bytes)]
 #![warn(invalid_from_utf8_unchecked)]
+#![warn(invalid_from_utf8)]
 
 pub fn from_utf8_unchecked_mut() {
     // Valid
@@ -46,4 +47,47 @@ pub fn from_utf8_unchecked() {
     }
 }
 
+pub fn from_utf8_mut() {
+    // Valid
+    {
+        std::str::from_utf8_mut(&mut [99, 108, 105, 112, 112, 121]);
+        std::str::from_utf8_mut(&mut [b'c', b'l', b'i', b'p', b'p', b'y']);
+
+        let x = 0xa0;
+        std::str::from_utf8_mut(&mut [0xc0, x]);
+    }
+
+    // Invalid
+    {
+        std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
+        //~^ WARN calls to `std::str::from_utf8_mut`
+        std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
+        //~^ WARN calls to `std::str::from_utf8_mut`
+    }
+}
+
+pub fn from_utf8() {
+    // Valid
+    {
+        std::str::from_utf8(&[99, 108, 105, 112, 112, 121]);
+        std::str::from_utf8(&[b'c', b'l', b'i', b'p', b'p', b'y']);
+        std::str::from_utf8(b"clippy");
+
+        let x = 0xA0;
+        std::str::from_utf8(&[0xC0, x]);
+    }
+
+    // Invalid
+    {
+        std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
+        //~^ WARN calls to `std::str::from_utf8`
+        std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
+        //~^ WARN calls to `std::str::from_utf8`
+        std::str::from_utf8(b"cl\x82ippy");
+        //~^ WARN calls to `std::str::from_utf8`
+        std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
+        //~^ WARN calls to `std::str::from_utf8`
+    }
+}
+
 fn main() {}
diff --git a/tests/ui/lint/invalid_from_utf8.stderr b/tests/ui/lint/invalid_from_utf8.stderr
index 63cd906237d..8e00d3bf872 100644
--- a/tests/ui/lint/invalid_from_utf8.stderr
+++ b/tests/ui/lint/invalid_from_utf8.stderr
@@ -1,5 +1,5 @@
 warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:18:9
+  --> $DIR/invalid_from_utf8.rs:19:9
    |
 LL |         std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
@@ -13,7 +13,7 @@ LL | #![warn(invalid_from_utf8_unchecked)]
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:20:9
+  --> $DIR/invalid_from_utf8.rs:21:9
    |
 LL |         std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
@@ -21,7 +21,7 @@ LL |         std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i',
    |                                           the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:38:9
+  --> $DIR/invalid_from_utf8.rs:39:9
    |
 LL |         std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
@@ -29,7 +29,7 @@ LL |         std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
    |                                       the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:40:9
+  --> $DIR/invalid_from_utf8.rs:41:9
    |
 LL |         std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
@@ -37,7 +37,7 @@ LL |         std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'
    |                                       the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:42:9
+  --> $DIR/invalid_from_utf8.rs:43:9
    |
 LL |         std::str::from_utf8_unchecked(b"cl\x82ippy");
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
@@ -45,12 +45,66 @@ LL |         std::str::from_utf8_unchecked(b"cl\x82ippy");
    |                                       the literal was valid UTF-8 up to the 2 bytes
 
 warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
-  --> $DIR/invalid_from_utf8.rs:44:9
+  --> $DIR/invalid_from_utf8.rs:45:9
    |
 LL |         std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
    |                                       |
    |                                       the literal was valid UTF-8 up to the 2 bytes
 
-warning: 6 warnings emitted
+warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:62:9
+   |
+LL |         std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
+   |                                 |
+   |                                 the literal was valid UTF-8 up to the 2 bytes
+   |
+note: the lint level is defined here
+  --> $DIR/invalid_from_utf8.rs:5:9
+   |
+LL | #![warn(invalid_from_utf8)]
+   |         ^^^^^^^^^^^^^^^^^
+
+warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:64:9
+   |
+LL |         std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
+   |                                 |
+   |                                 the literal was valid UTF-8 up to the 2 bytes
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:82:9
+   |
+LL |         std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
+   |         ^^^^^^^^^^^^^^^^^^^^-----------------------------------^
+   |                             |
+   |                             the literal was valid UTF-8 up to the 2 bytes
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:84:9
+   |
+LL |         std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
+   |         ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
+   |                             |
+   |                             the literal was valid UTF-8 up to the 2 bytes
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:86:9
+   |
+LL |         std::str::from_utf8(b"cl\x82ippy");
+   |         ^^^^^^^^^^^^^^^^^^^^-------------^
+   |                             |
+   |                             the literal was valid UTF-8 up to the 2 bytes
+
+warning: calls to `std::str::from_utf8` with a invalid literal always return an error
+  --> $DIR/invalid_from_utf8.rs:88:9
+   |
+LL |         std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
+   |         ^^^^^^^^^^^^^^^^^^^^---------------------------------^
+   |                             |
+   |                             the literal was valid UTF-8 up to the 2 bytes
+
+warning: 12 warnings emitted