about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2022-07-03 20:02:38 +0000
committerbors <bors@rust-lang.org>2022-07-03 20:02:38 +0000
commit8c8987749b999b25408763544fc17030fa3fb92f (patch)
treed6c69c9a2403f9832ec2f70f4b33b75fae90a978
parentbe9e35f6b2326524dffbaf13a46f9289743bd9fd (diff)
parentde646e10db2141fc9fffddf103611fe7d1fd9d68 (diff)
downloadrust-8c8987749b999b25408763544fc17030fa3fb92f.tar.gz
rust-8c8987749b999b25408763544fc17030fa3fb92f.zip
Auto merge of #9105 - Serial-ATA:lint-invalid-utf8, r=Jarcho
Add `invalid_utf8_in_unchecked`

changelog: Add [`invalid_utf8_in_unchecked`]
closes: #629

Don't know how useful of a lint this is, just saw this was a really old issue :smile:.
-rw-r--r--CHANGELOG.md1
-rw-r--r--clippy_lints/src/invalid_utf8_in_unchecked.rs74
-rw-r--r--clippy_lints/src/lib.register_all.rs1
-rw-r--r--clippy_lints/src/lib.register_correctness.rs1
-rw-r--r--clippy_lints/src/lib.register_lints.rs1
-rw-r--r--clippy_lints/src/lib.rs2
-rw-r--r--clippy_utils/src/paths.rs1
-rw-r--r--tests/ui/invalid_utf8_in_unchecked.rs20
-rw-r--r--tests/ui/invalid_utf8_in_unchecked.stderr22
9 files changed, 123 insertions, 0 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 71e498c301b..1b792736a6a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3641,6 +3641,7 @@ Released 2018-09-13
 [`invalid_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_ref
 [`invalid_regex`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_regex
 [`invalid_upcast_comparisons`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_upcast_comparisons
+[`invalid_utf8_in_unchecked`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_utf8_in_unchecked
 [`invisible_characters`]: https://rust-lang.github.io/rust-clippy/master/index.html#invisible_characters
 [`is_digit_ascii_radix`]: https://rust-lang.github.io/rust-clippy/master/index.html#is_digit_ascii_radix
 [`items_after_statements`]: https://rust-lang.github.io/rust-clippy/master/index.html#items_after_statements
diff --git a/clippy_lints/src/invalid_utf8_in_unchecked.rs b/clippy_lints/src/invalid_utf8_in_unchecked.rs
new file mode 100644
index 00000000000..e0a607f9a95
--- /dev/null
+++ b/clippy_lints/src/invalid_utf8_in_unchecked.rs
@@ -0,0 +1,74 @@
+use clippy_utils::diagnostics::span_lint;
+use clippy_utils::{match_function_call, paths};
+use rustc_ast::{BorrowKind, LitKind};
+use rustc_hir::{Expr, ExprKind};
+use rustc_lint::{LateContext, LateLintPass};
+use rustc_session::{declare_lint_pass, declare_tool_lint};
+use rustc_span::source_map::Spanned;
+use rustc_span::Span;
+
+declare_clippy_lint! {
+    /// ### What it does
+    /// Checks for `std::str::from_utf8_unchecked` with an invalid UTF-8 literal
+    ///
+    /// ### Why is this bad?
+    /// Creating such a `str` would result in undefined behavior
+    ///
+    /// ### Example
+    /// ```rust
+    /// # #[allow(unused)]
+    /// unsafe {
+    ///     std::str::from_utf8_unchecked(b"cl\x82ippy");
+    /// }
+    /// ```
+    #[clippy::version = "1.64.0"]
+    pub INVALID_UTF8_IN_UNCHECKED,
+    correctness,
+    "using a non UTF-8 literal in `std::std::from_utf8_unchecked`"
+}
+declare_lint_pass!(InvalidUtf8InUnchecked => [INVALID_UTF8_IN_UNCHECKED]);
+
+impl<'tcx> LateLintPass<'tcx> for InvalidUtf8InUnchecked {
+    fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
+        if let Some([arg]) = match_function_call(cx, expr, &paths::STR_FROM_UTF8_UNCHECKED) {
+            match &arg.kind {
+                ExprKind::Lit(Spanned { node: lit, .. }) => {
+                    if let LitKind::ByteStr(bytes) = &lit
+                        && std::str::from_utf8(bytes).is_err()
+                    {
+                        lint(cx, expr.span);
+                    }
+                },
+                ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
+                    let elements = args.iter().map(|e|{
+                        match &e.kind {
+                            ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
+                                LitKind::Byte(b) => Some(*b),
+                                #[allow(clippy::cast_possible_truncation)]
+                                LitKind::Int(b, _) => Some(*b as u8),
+                                _ => None
+                            }
+                            _ => None
+                        }
+                    }).collect::<Option<Vec<_>>>();
+
+                    if let Some(elements) = elements
+                        && std::str::from_utf8(&elements).is_err()
+                    {
+                        lint(cx, expr.span);
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+fn lint(cx: &LateContext<'_>, span: Span) {
+    span_lint(
+        cx,
+        INVALID_UTF8_IN_UNCHECKED,
+        span,
+        "non UTF-8 literal in `std::str::from_utf8_unchecked`",
+    );
+}
diff --git a/clippy_lints/src/lib.register_all.rs b/clippy_lints/src/lib.register_all.rs
index 563ad891603..da26a3f0130 100644
--- a/clippy_lints/src/lib.register_all.rs
+++ b/clippy_lints/src/lib.register_all.rs
@@ -92,6 +92,7 @@ store.register_group(true, "clippy::all", Some("clippy_all"), vec![
     LintId::of(init_numbered_fields::INIT_NUMBERED_FIELDS),
     LintId::of(inline_fn_without_body::INLINE_FN_WITHOUT_BODY),
     LintId::of(int_plus_one::INT_PLUS_ONE),
+    LintId::of(invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED),
     LintId::of(large_const_arrays::LARGE_CONST_ARRAYS),
     LintId::of(large_enum_variant::LARGE_ENUM_VARIANT),
     LintId::of(len_zero::COMPARISON_TO_EMPTY),
diff --git a/clippy_lints/src/lib.register_correctness.rs b/clippy_lints/src/lib.register_correctness.rs
index 7d5e65cb27a..9975859c54f 100644
--- a/clippy_lints/src/lib.register_correctness.rs
+++ b/clippy_lints/src/lib.register_correctness.rs
@@ -29,6 +29,7 @@ store.register_group(true, "clippy::correctness", Some("clippy_correctness"), ve
     LintId::of(infinite_iter::INFINITE_ITER),
     LintId::of(inherent_to_string::INHERENT_TO_STRING_SHADOW_DISPLAY),
     LintId::of(inline_fn_without_body::INLINE_FN_WITHOUT_BODY),
+    LintId::of(invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED),
     LintId::of(let_underscore::LET_UNDERSCORE_LOCK),
     LintId::of(literal_representation::MISTYPED_LITERAL_SUFFIXES),
     LintId::of(loops::ITER_NEXT_LOOP),
diff --git a/clippy_lints/src/lib.register_lints.rs b/clippy_lints/src/lib.register_lints.rs
index d3c75f8b519..ceb8470657f 100644
--- a/clippy_lints/src/lib.register_lints.rs
+++ b/clippy_lints/src/lib.register_lints.rs
@@ -196,6 +196,7 @@ store.register_lints(&[
     inline_fn_without_body::INLINE_FN_WITHOUT_BODY,
     int_plus_one::INT_PLUS_ONE,
     invalid_upcast_comparisons::INVALID_UPCAST_COMPARISONS,
+    invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED,
     items_after_statements::ITEMS_AFTER_STATEMENTS,
     iter_not_returning_iterator::ITER_NOT_RETURNING_ITERATOR,
     large_const_arrays::LARGE_CONST_ARRAYS,
diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs
index 172fdf8c852..1604d1078ee 100644
--- a/clippy_lints/src/lib.rs
+++ b/clippy_lints/src/lib.rs
@@ -255,6 +255,7 @@ mod init_numbered_fields;
 mod inline_fn_without_body;
 mod int_plus_one;
 mod invalid_upcast_comparisons;
+mod invalid_utf8_in_unchecked;
 mod items_after_statements;
 mod iter_not_returning_iterator;
 mod large_const_arrays;
@@ -913,6 +914,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
     store.register_late_pass(move || Box::new(manual_retain::ManualRetain::new(msrv)));
     let verbose_bit_mask_threshold = conf.verbose_bit_mask_threshold;
     store.register_late_pass(move || Box::new(operators::Operators::new(verbose_bit_mask_threshold)));
+    store.register_late_pass(|| Box::new(invalid_utf8_in_unchecked::InvalidUtf8InUnchecked));
     // add lints here, do not remove this comment, it's used in `new_lint`
 }
 
diff --git a/clippy_utils/src/paths.rs b/clippy_utils/src/paths.rs
index 6542e77113b..05429d05d9e 100644
--- a/clippy_utils/src/paths.rs
+++ b/clippy_utils/src/paths.rs
@@ -163,6 +163,7 @@ pub const STR_BYTES: [&str; 4] = ["core", "str", "<impl str>", "bytes"];
 pub const STR_CHARS: [&str; 4] = ["core", "str", "<impl str>", "chars"];
 pub const STR_ENDS_WITH: [&str; 4] = ["core", "str", "<impl str>", "ends_with"];
 pub const STR_FROM_UTF8: [&str; 4] = ["core", "str", "converts", "from_utf8"];
+pub const STR_FROM_UTF8_UNCHECKED: [&str; 4] = ["core", "str", "converts", "from_utf8_unchecked"];
 pub const STR_LEN: [&str; 4] = ["core", "str", "<impl str>", "len"];
 pub const STR_STARTS_WITH: [&str; 4] = ["core", "str", "<impl str>", "starts_with"];
 #[cfg(feature = "internal")]
diff --git a/tests/ui/invalid_utf8_in_unchecked.rs b/tests/ui/invalid_utf8_in_unchecked.rs
new file mode 100644
index 00000000000..3dc096d3197
--- /dev/null
+++ b/tests/ui/invalid_utf8_in_unchecked.rs
@@ -0,0 +1,20 @@
+#![warn(clippy::invalid_utf8_in_unchecked)]
+
+fn main() {
+    // Valid
+    unsafe {
+        std::str::from_utf8_unchecked(&[99, 108, 105, 112, 112, 121]);
+        std::str::from_utf8_unchecked(&[b'c', b'l', b'i', b'p', b'p', b'y']);
+        std::str::from_utf8_unchecked(b"clippy");
+
+        let x = 0xA0;
+        std::str::from_utf8_unchecked(&[0xC0, x]);
+    }
+
+    // Invalid
+    unsafe {
+        std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
+        std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
+        std::str::from_utf8_unchecked(b"cl\x82ippy");
+    }
+}
diff --git a/tests/ui/invalid_utf8_in_unchecked.stderr b/tests/ui/invalid_utf8_in_unchecked.stderr
new file mode 100644
index 00000000000..c89cd2758ee
--- /dev/null
+++ b/tests/ui/invalid_utf8_in_unchecked.stderr
@@ -0,0 +1,22 @@
+error: non UTF-8 literal in `std::str::from_utf8_unchecked`
+  --> $DIR/invalid_utf8_in_unchecked.rs:16:9
+   |
+LL |         std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+   = note: `-D clippy::invalid-utf8-in-unchecked` implied by `-D warnings`
+
+error: non UTF-8 literal in `std::str::from_utf8_unchecked`
+  --> $DIR/invalid_utf8_in_unchecked.rs:17:9
+   |
+LL |         std::str::from_utf8_unchecked(&[b'c', b'l', b'/x82', b'i', b'p', b'p', b'y']);
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+error: non UTF-8 literal in `std::str::from_utf8_unchecked`
+  --> $DIR/invalid_utf8_in_unchecked.rs:18:9
+   |
+LL |         std::str::from_utf8_unchecked(b"cl/x82ippy");
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+error: aborting due to 3 previous errors
+