about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-10-05 18:41:45 +0000
committerbors <bors@rust-lang.org>2024-10-05 18:41:45 +0000
commit753629bb33c9fff9fb693a2d1d4464bd696ccf62 (patch)
tree082135298a072dfafd88b64be40baa98425674e2
parentf173eb3aefdc12ddd3a732141ff46ff65483cf0a (diff)
parent196dbcf6d5fa474e9dd15a0bbaad1ced3e0f28fe (diff)
downloadrust-753629bb33c9fff9fb693a2d1d4464bd696ccf62.tar.gz
rust-753629bb33c9fff9fb693a2d1d4464bd696ccf62.zip
Auto merge of #13412 - GnomedDev:regex-comp-in-loop, r=y21
Implement lint for regex::Regex compilation inside a loop

Closes #598.

Seems like a pretty simple one, I'm not sure if I sorted out all the lint plumbing correctly because I was adding it to the existing regex pass, but seems to work. The name is a bit jank and I'm super open to suggestions for changing it.

changelog: [`regex_creation_in_loops`]: Added lint for Regex compilation inside loops.
-rw-r--r--CHANGELOG.md1
-rw-r--r--clippy_lints/src/declared_lints.rs1
-rw-r--r--clippy_lints/src/regex.rs65
-rw-r--r--tests/ui/regex.rs30
-rw-r--r--tests/ui/regex.stderr52
5 files changed, 145 insertions, 4 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d253d52531..cb00807d71a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5874,6 +5874,7 @@ Released 2018-09-13
 [`ref_option`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option
 [`ref_option_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option_ref
 [`ref_patterns`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_patterns
+[`regex_creation_in_loops`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_creation_in_loops
 [`regex_macro`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_macro
 [`renamed_function_params`]: https://rust-lang.github.io/rust-clippy/master/index.html#renamed_function_params
 [`repeat_once`]: https://rust-lang.github.io/rust-clippy/master/index.html#repeat_once
diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs
index 9cec672beb0..44402578f3f 100644
--- a/clippy_lints/src/declared_lints.rs
+++ b/clippy_lints/src/declared_lints.rs
@@ -639,6 +639,7 @@ pub static LINTS: &[&crate::LintInfo] = &[
     crate::ref_patterns::REF_PATTERNS_INFO,
     crate::reference::DEREF_ADDROF_INFO,
     crate::regex::INVALID_REGEX_INFO,
+    crate::regex::REGEX_CREATION_IN_LOOPS_INFO,
     crate::regex::TRIVIAL_REGEX_INFO,
     crate::repeat_vec_with_capacity::REPEAT_VEC_WITH_CAPACITY_INFO,
     crate::reserve_after_initialization::RESERVE_AFTER_INITIALIZATION_INFO,
diff --git a/clippy_lints/src/regex.rs b/clippy_lints/src/regex.rs
index 12cbdb854ef..6a5bf1b8045 100644
--- a/clippy_lints/src/regex.rs
+++ b/clippy_lints/src/regex.rs
@@ -6,7 +6,7 @@ use clippy_utils::source::SpanRangeExt;
 use clippy_utils::{def_path_res_with_base, find_crates, path_def_id, paths};
 use rustc_ast::ast::{LitKind, StrStyle};
 use rustc_hir::def_id::DefIdMap;
-use rustc_hir::{BorrowKind, Expr, ExprKind};
+use rustc_hir::{BorrowKind, Expr, ExprKind, OwnerId};
 use rustc_lint::{LateContext, LateLintPass};
 use rustc_session::impl_lint_pass;
 use rustc_span::{BytePos, Span};
@@ -55,6 +55,44 @@ declare_clippy_lint! {
     "trivial regular expressions"
 }
 
+declare_clippy_lint! {
+    /// ### What it does
+    ///
+    /// Checks for [regex](https://crates.io/crates/regex) compilation inside a loop with a literal.
+    ///
+    /// ### Why is this bad?
+    ///
+    /// Compiling a regex is a much more expensive operation than using one, and a compiled regex can be used multiple times.
+    /// This is documented as an antipattern [on the regex documentation](https://docs.rs/regex/latest/regex/#avoid-re-compiling-regexes-especially-in-a-loop)
+    ///
+    /// ### Example
+    /// ```no_run
+    /// # let haystacks = [""];
+    /// # const MY_REGEX: &str = "a.b";
+    /// for haystack in haystacks {
+    ///     let regex = regex::Regex::new(MY_REGEX).unwrap();
+    ///     if regex.is_match(haystack) {
+    ///         // Perform operation
+    ///     }
+    /// }
+    /// ```
+    /// can be replaced with
+    /// ```no_run
+    /// # let haystacks = [""];
+    /// # const MY_REGEX: &str = "a.b";
+    /// let regex = regex::Regex::new(MY_REGEX).unwrap();
+    /// for haystack in haystacks {
+    ///     if regex.is_match(haystack) {
+    ///         // Perform operation
+    ///     }
+    /// }
+    /// ```
+    #[clippy::version = "1.83.0"]
+    pub REGEX_CREATION_IN_LOOPS,
+    perf,
+    "regular expression compilation performed in a loop"
+}
+
 #[derive(Copy, Clone)]
 enum RegexKind {
     Unicode,
@@ -66,9 +104,10 @@ enum RegexKind {
 #[derive(Default)]
 pub struct Regex {
     definitions: DefIdMap<RegexKind>,
+    loop_stack: Vec<(OwnerId, Span)>,
 }
 
-impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX]);
+impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX, REGEX_CREATION_IN_LOOPS]);
 
 impl<'tcx> LateLintPass<'tcx> for Regex {
     fn check_crate(&mut self, cx: &LateContext<'tcx>) {
@@ -99,12 +138,34 @@ impl<'tcx> LateLintPass<'tcx> for Regex {
             && let Some(def_id) = path_def_id(cx, fun)
             && let Some(regex_kind) = self.definitions.get(&def_id)
         {
+            if let Some(&(loop_item_id, loop_span)) = self.loop_stack.last()
+                && loop_item_id == fun.hir_id.owner
+                && (matches!(arg.kind, ExprKind::Lit(_)) || const_str(cx, arg).is_some())
+            {
+                span_lint_and_help(
+                    cx,
+                    REGEX_CREATION_IN_LOOPS,
+                    fun.span,
+                    "compiling a regex in a loop",
+                    Some(loop_span),
+                    "move the regex construction outside this loop",
+                );
+            }
+
             match regex_kind {
                 RegexKind::Unicode => check_regex(cx, arg, true),
                 RegexKind::UnicodeSet => check_set(cx, arg, true),
                 RegexKind::Bytes => check_regex(cx, arg, false),
                 RegexKind::BytesSet => check_set(cx, arg, false),
             }
+        } else if let ExprKind::Loop(block, _, _, span) = expr.kind {
+            self.loop_stack.push((block.hir_id.owner, span));
+        }
+    }
+
+    fn check_expr_post(&mut self, _: &LateContext<'tcx>, expr: &'tcx Expr<'_>) {
+        if matches!(expr.kind, ExprKind::Loop(..)) {
+            self.loop_stack.pop();
         }
     }
 }
diff --git a/tests/ui/regex.rs b/tests/ui/regex.rs
index 4fb6c08bb44..f607a2d50c6 100644
--- a/tests/ui/regex.rs
+++ b/tests/ui/regex.rs
@@ -5,7 +5,7 @@
     clippy::needless_borrow,
     clippy::needless_borrows_for_generic_args
 )]
-#![warn(clippy::invalid_regex, clippy::trivial_regex)]
+#![warn(clippy::invalid_regex, clippy::trivial_regex, clippy::regex_creation_in_loops)]
 
 extern crate regex;
 
@@ -118,7 +118,35 @@ fn trivial_regex() {
     let _ = BRegex::new(r"\b{start}word\b{end}");
 }
 
+fn regex_creation_in_loops() {
+    loop {
+        static STATIC_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| Regex::new("a.b").unwrap());
+
+        let regex = Regex::new("a.b");
+        //~^ ERROR: compiling a regex in a loop
+        let regex = BRegex::new("a.b");
+        //~^ ERROR: compiling a regex in a loop
+        #[allow(clippy::regex_creation_in_loops)]
+        let allowed_regex = Regex::new("a.b");
+
+        if true {
+            let regex = Regex::new("a.b");
+            //~^ ERROR: compiling a regex in a loop
+        }
+
+        for _ in 0..10 {
+            let nested_regex = Regex::new("a.b");
+            //~^ ERROR: compiling a regex in a loop
+        }
+    }
+
+    for i in 0..10 {
+        let dependant_regex = Regex::new(&format!("{i}"));
+    }
+}
+
 fn main() {
     syntax_error();
     trivial_regex();
+    regex_creation_in_loops();
 }
diff --git a/tests/ui/regex.stderr b/tests/ui/regex.stderr
index e936208d8d7..18dd538c68b 100644
--- a/tests/ui/regex.stderr
+++ b/tests/ui/regex.stderr
@@ -195,5 +195,55 @@ LL |     let binary_trivial_empty = BRegex::new("^$");
    |
    = help: consider using `str::is_empty`
 
-error: aborting due to 24 previous errors
+error: compiling a regex in a loop
+  --> tests/ui/regex.rs:125:21
+   |
+LL |         let regex = Regex::new("a.b");
+   |                     ^^^^^^^^^^
+   |
+help: move the regex construction outside this loop
+  --> tests/ui/regex.rs:122:5
+   |
+LL |     loop {
+   |     ^^^^
+   = note: `-D clippy::regex-creation-in-loops` implied by `-D warnings`
+   = help: to override `-D warnings` add `#[allow(clippy::regex_creation_in_loops)]`
+
+error: compiling a regex in a loop
+  --> tests/ui/regex.rs:127:21
+   |
+LL |         let regex = BRegex::new("a.b");
+   |                     ^^^^^^^^^^^
+   |
+help: move the regex construction outside this loop
+  --> tests/ui/regex.rs:122:5
+   |
+LL |     loop {
+   |     ^^^^
+
+error: compiling a regex in a loop
+  --> tests/ui/regex.rs:133:25
+   |
+LL |             let regex = Regex::new("a.b");
+   |                         ^^^^^^^^^^
+   |
+help: move the regex construction outside this loop
+  --> tests/ui/regex.rs:122:5
+   |
+LL |     loop {
+   |     ^^^^
+
+error: compiling a regex in a loop
+  --> tests/ui/regex.rs:138:32
+   |
+LL |             let nested_regex = Regex::new("a.b");
+   |                                ^^^^^^^^^^
+   |
+help: move the regex construction outside this loop
+  --> tests/ui/regex.rs:137:9
+   |
+LL |         for _ in 0..10 {
+   |         ^^^^^^^^^^^^^^
+
+error: aborting due to 28 previous errors