about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-07-17 15:38:33 +0000
committerbors <bors@rust-lang.org>2023-07-17 15:38:33 +0000
commit410456da51262c5ecc57554ea6007327cb8328e9 (patch)
tree051ccfbeb9f6595eba3cc2367d8ddcc516456de0
parent54fa92287b91ff16bb6001d816dd7ae19dc52f91 (diff)
parentc83d58f5076f3a79ea79514395a02babd15687b1 (diff)
downloadrust-410456da51262c5ecc57554ea6007327cb8328e9.tar.gz
rust-410456da51262c5ecc57554ea6007327cb8328e9.zip
Auto merge of #11116 - y21:format_collect, r=Manishearth
new lint: `format_collect`

A perf lint that looks for `format!`ing inside of `map`, then collecting it into a `String`. Did a quick benchmark locally and it's a bit more than 2x faster with fold.
`write!` is still not optimal (presumably because the fmt stuff goes through dynamic dispatch), but it's still a lot better than creating a new string on every element.
I thought about making a machine applicable suggestion, but there's a lot of suggestions that need to be made here, so I decided to just add help messages.

changelog: new lint: `format_collect`
-rw-r--r--CHANGELOG.md1
-rw-r--r--clippy_lints/src/declared_lints.rs1
-rw-r--r--clippy_lints/src/methods/format_collect.rs33
-rw-r--r--clippy_lints/src/methods/mod.rs40
-rw-r--r--src/main.rs3
-rw-r--r--tests/ui/format_collect.rs31
-rw-r--r--tests/ui/format_collect.stderr62
7 files changed, 168 insertions, 3 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3b6e3b865f..ea8450ed5c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4865,6 +4865,7 @@ Released 2018-09-13
 [`forget_copy`]: https://rust-lang.github.io/rust-clippy/master/index.html#forget_copy
 [`forget_non_drop`]: https://rust-lang.github.io/rust-clippy/master/index.html#forget_non_drop
 [`forget_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#forget_ref
+[`format_collect`]: https://rust-lang.github.io/rust-clippy/master/index.html#format_collect
 [`format_in_format_args`]: https://rust-lang.github.io/rust-clippy/master/index.html#format_in_format_args
 [`format_push_string`]: https://rust-lang.github.io/rust-clippy/master/index.html#format_push_string
 [`from_iter_instead_of_collect`]: https://rust-lang.github.io/rust-clippy/master/index.html#from_iter_instead_of_collect
diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs
index 498d657b31f..7472158a81c 100644
--- a/clippy_lints/src/declared_lints.rs
+++ b/clippy_lints/src/declared_lints.rs
@@ -338,6 +338,7 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[
     crate::methods::FILTER_NEXT_INFO,
     crate::methods::FLAT_MAP_IDENTITY_INFO,
     crate::methods::FLAT_MAP_OPTION_INFO,
+    crate::methods::FORMAT_COLLECT_INFO,
     crate::methods::FROM_ITER_INSTEAD_OF_COLLECT_INFO,
     crate::methods::GET_FIRST_INFO,
     crate::methods::GET_LAST_WITH_LEN_INFO,
diff --git a/clippy_lints/src/methods/format_collect.rs b/clippy_lints/src/methods/format_collect.rs
new file mode 100644
index 00000000000..1f8863f8521
--- /dev/null
+++ b/clippy_lints/src/methods/format_collect.rs
@@ -0,0 +1,33 @@
+use super::FORMAT_COLLECT;
+use clippy_utils::diagnostics::span_lint_and_then;
+use clippy_utils::macros::{is_format_macro, root_macro_call_first_node};
+use clippy_utils::ty::is_type_lang_item;
+use rustc_hir::{Expr, ExprKind, LangItem};
+use rustc_lint::LateContext;
+use rustc_span::Span;
+
+/// Same as `peel_blocks` but only actually considers blocks that are not from an expansion.
+/// This is needed because always calling `peel_blocks` would otherwise remove parts of the
+/// `format!` macro, which would cause `root_macro_call_first_node` to return `None`.
+fn peel_non_expn_blocks<'tcx>(expr: &'tcx Expr<'tcx>) -> Option<&'tcx Expr<'tcx>> {
+    match expr.kind {
+        ExprKind::Block(block, _) if !expr.span.from_expansion() => peel_non_expn_blocks(block.expr?),
+        _ => Some(expr),
+    }
+}
+
+pub(super) fn check(cx: &LateContext<'_>, expr: &Expr<'_>, map_arg: &Expr<'_>, map_span: Span) {
+    if is_type_lang_item(cx, cx.typeck_results().expr_ty(expr), LangItem::String)
+        && let ExprKind::Closure(closure) = map_arg.kind
+        && let body = cx.tcx.hir().body(closure.body)
+        && let Some(value) = peel_non_expn_blocks(body.value)
+        && let Some(mac) = root_macro_call_first_node(cx, value)
+        && is_format_macro(cx, mac.def_id)
+    {
+        span_lint_and_then(cx, FORMAT_COLLECT, expr.span, "use of `format!` to build up a string from an iterator", |diag| {
+            diag.span_help(map_span, "call `fold` instead")
+                .span_help(value.span.source_callsite(), "... and use the `write!` macro here")
+                .note("this can be written more efficiently by appending to a `String` directly");
+        });
+    }
+}
diff --git a/clippy_lints/src/methods/mod.rs b/clippy_lints/src/methods/mod.rs
index 2747ab5615c..81f59f68ec6 100644
--- a/clippy_lints/src/methods/mod.rs
+++ b/clippy_lints/src/methods/mod.rs
@@ -26,6 +26,7 @@ mod filter_map_next;
 mod filter_next;
 mod flat_map_identity;
 mod flat_map_option;
+mod format_collect;
 mod from_iter_instead_of_collect;
 mod get_first;
 mod get_last_with_len;
@@ -3378,6 +3379,41 @@ declare_clippy_lint! {
     "calling `Stdin::read_line`, then trying to parse it without first trimming"
 }
 
+declare_clippy_lint! {
+    /// ### What it does
+    /// Checks for usage of `.map(|_| format!(..)).collect::<String>()`.
+    ///
+    /// ### Why is this bad?
+    /// This allocates a new string for every element in the iterator.
+    /// This can be done more efficiently by creating the `String` once and appending to it in `Iterator::fold`,
+    /// using either the `write!` macro which supports exactly the same syntax as the `format!` macro,
+    /// or concatenating with `+` in case the iterator yields `&str`/`String`.
+    ///
+    /// Note also that `write!`-ing into a `String` can never fail, despite the return type of `write!` being `std::fmt::Result`,
+    /// so it can be safely ignored or unwrapped.
+    ///
+    /// ### Example
+    /// ```rust
+    /// fn hex_encode(bytes: &[u8]) -> String {
+    ///     bytes.iter().map(|b| format!("{b:02X}")).collect()
+    /// }
+    /// ```
+    /// Use instead:
+    /// ```rust
+    /// use std::fmt::Write;
+    /// fn hex_encode(bytes: &[u8]) -> String {
+    ///     bytes.iter().fold(String::new(), |mut output, b| {
+    ///         let _ = write!(output, "{b:02X}");
+    ///         output
+    ///     })
+    /// }
+    /// ```
+    #[clippy::version = "1.72.0"]
+    pub FORMAT_COLLECT,
+    perf,
+    "`format!`ing every element in a collection, then collecting the strings into a new `String`"
+}
+
 pub struct Methods {
     avoid_breaking_exported_api: bool,
     msrv: Msrv,
@@ -3512,6 +3548,7 @@ impl_lint_pass!(Methods => [
     UNNECESSARY_LITERAL_UNWRAP,
     DRAIN_COLLECT,
     MANUAL_TRY_FOLD,
+    FORMAT_COLLECT,
 ]);
 
 /// Extracts a method call name, args, and `Span` of the method name.
@@ -3733,8 +3770,9 @@ impl Methods {
                         Some((name @ ("cloned" | "copied"), recv2, [], _, _)) => {
                             iter_cloned_collect::check(cx, name, expr, recv2);
                         },
-                        Some(("map", m_recv, [m_arg], _, _)) => {
+                        Some(("map", m_recv, [m_arg], m_ident_span, _)) => {
                             map_collect_result_unit::check(cx, expr, m_recv, m_arg);
+                            format_collect::check(cx, expr, m_arg, m_ident_span);
                         },
                         Some(("take", take_self_arg, [take_arg], _, _)) => {
                             if self.msrv.meets(msrvs::STR_REPEAT) {
diff --git a/src/main.rs b/src/main.rs
index cdc85cb33ca..26b655076cf 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -132,8 +132,7 @@ impl ClippyCmd {
         let clippy_args: String = self
             .clippy_args
             .iter()
-            .map(|arg| format!("{arg}__CLIPPY_HACKERY__"))
-            .collect();
+            .fold(String::new(), |s, arg| s + arg + "__CLIPPY_HACKERY__");
 
         // Currently, `CLIPPY_TERMINAL_WIDTH` is used only to format "unknown field" error messages.
         let terminal_width = termize::dimensions().map_or(0, |(w, _)| w);
diff --git a/tests/ui/format_collect.rs b/tests/ui/format_collect.rs
new file mode 100644
index 00000000000..c7f2b7b6950
--- /dev/null
+++ b/tests/ui/format_collect.rs
@@ -0,0 +1,31 @@
+#![allow(unused, dead_code)]
+#![warn(clippy::format_collect)]
+
+fn hex_encode(bytes: &[u8]) -> String {
+    bytes.iter().map(|b| format!("{b:02X}")).collect()
+}
+
+#[rustfmt::skip]
+fn hex_encode_deep(bytes: &[u8]) -> String {
+    bytes.iter().map(|b| {{{{{ format!("{b:02X}") }}}}}).collect()
+}
+
+macro_rules! fmt {
+    ($x:ident) => {
+        format!("{x:02X}", x = $x)
+    };
+}
+
+fn from_macro(bytes: &[u8]) -> String {
+    bytes.iter().map(|x| fmt!(x)).collect()
+}
+
+fn with_block() -> String {
+    (1..10)
+        .map(|s| {
+            let y = 1;
+            format!("{s} {y}")
+        })
+        .collect()
+}
+fn main() {}
diff --git a/tests/ui/format_collect.stderr b/tests/ui/format_collect.stderr
new file mode 100644
index 00000000000..d918f1ed466
--- /dev/null
+++ b/tests/ui/format_collect.stderr
@@ -0,0 +1,62 @@
+error: use of `format!` to build up a string from an iterator
+  --> $DIR/format_collect.rs:5:5
+   |
+LL |     bytes.iter().map(|b| format!("{b:02X}")).collect()
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+help: call `fold` instead
+  --> $DIR/format_collect.rs:5:18
+   |
+LL |     bytes.iter().map(|b| format!("{b:02X}")).collect()
+   |                  ^^^
+help: ... and use the `write!` macro here
+  --> $DIR/format_collect.rs:5:26
+   |
+LL |     bytes.iter().map(|b| format!("{b:02X}")).collect()
+   |                          ^^^^^^^^^^^^^^^^^^
+   = note: this can be written more efficiently by appending to a `String` directly
+   = note: `-D clippy::format-collect` implied by `-D warnings`
+
+error: use of `format!` to build up a string from an iterator
+  --> $DIR/format_collect.rs:10:5
+   |
+LL |     bytes.iter().map(|b| {{{{{ format!("{b:02X}") }}}}}).collect()
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+help: call `fold` instead
+  --> $DIR/format_collect.rs:10:18
+   |
+LL |     bytes.iter().map(|b| {{{{{ format!("{b:02X}") }}}}}).collect()
+   |                  ^^^
+help: ... and use the `write!` macro here
+  --> $DIR/format_collect.rs:10:32
+   |
+LL |     bytes.iter().map(|b| {{{{{ format!("{b:02X}") }}}}}).collect()
+   |                                ^^^^^^^^^^^^^^^^^^
+   = note: this can be written more efficiently by appending to a `String` directly
+
+error: use of `format!` to build up a string from an iterator
+  --> $DIR/format_collect.rs:24:5
+   |
+LL | /     (1..10)
+LL | |         .map(|s| {
+LL | |             let y = 1;
+LL | |             format!("{s} {y}")
+LL | |         })
+LL | |         .collect()
+   | |__________________^
+   |
+help: call `fold` instead
+  --> $DIR/format_collect.rs:25:10
+   |
+LL |         .map(|s| {
+   |          ^^^
+help: ... and use the `write!` macro here
+  --> $DIR/format_collect.rs:27:13
+   |
+LL |             format!("{s} {y}")
+   |             ^^^^^^^^^^^^^^^^^^
+   = note: this can be written more efficiently by appending to a `String` directly
+
+error: aborting due to 3 previous errors
+