about summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2023-06-22 09:19:58 +1000
committerNicholas Nethercote <n.nethercote@gmail.com>2023-06-26 09:14:11 +1000
commit487bdeb519a821298b76a35d8c437ef2e733da0d (patch)
tree821310f1dc56d6dec4c9eef6dab0d8d1dd9919d9 /compiler
parent8084f397c6710e4748994dd1488b1f399283d469 (diff)
downloadrust-487bdeb519a821298b76a35d8c437ef2e733da0d.tar.gz
rust-487bdeb519a821298b76a35d8c437ef2e733da0d.zip
Improve ordering and naming of CGUs for non-incremental builds.
Currently there are two problems.

First, the CGUS don't end up in size order. The merging loop does sort
by size on each iteration, but we don't sort after the final merge, so
typically there is one CGU out of place. (And sometimes we don't enter
the merging loop at all, in which case they end up in random order.)

Second, we then assign names that differ only by a numeric suffix, and
then we sort them lexicographically by name, giving us an order like
this:

regex.f10ba03eb5ec7975-cgu.1
regex.f10ba03eb5ec7975-cgu.10
regex.f10ba03eb5ec7975-cgu.11
regex.f10ba03eb5ec7975-cgu.12
regex.f10ba03eb5ec7975-cgu.13
regex.f10ba03eb5ec7975-cgu.14
regex.f10ba03eb5ec7975-cgu.15
regex.f10ba03eb5ec7975-cgu.2
regex.f10ba03eb5ec7975-cgu.3
regex.f10ba03eb5ec7975-cgu.4
regex.f10ba03eb5ec7975-cgu.5
regex.f10ba03eb5ec7975-cgu.6
regex.f10ba03eb5ec7975-cgu.7
regex.f10ba03eb5ec7975-cgu.8
regex.f10ba03eb5ec7975-cgu.9

These two problems are really annoying when debugging and profiling the
CGUs.

This commit ensures CGUs are sorted by name *and* reverse sorted by
size. This involves (a) one extra sort by size operation, and (b)
padding the numeric indices with zeroes, e.g.
`regex.f10ba03eb5ec7975-cgu.01`.

(Note that none of this applies for incremental builds, where a
different hash-based CGU naming scheme is used.)
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_monomorphize/src/partitioning.rs33
1 files changed, 27 insertions, 6 deletions
diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs
index 97e3748b8b8..10a496cf993 100644
--- a/compiler/rustc_monomorphize/src/partitioning.rs
+++ b/compiler/rustc_monomorphize/src/partitioning.rs
@@ -368,6 +368,7 @@ fn merge_codegen_units<'tcx>(
 
     let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
 
+    // Rename the newly merged CGUs.
     if cx.tcx.sess.opts.incremental.is_some() {
         // If we are doing incremental compilation, we want CGU names to
         // reflect the path of the source level module they correspond to.
@@ -404,18 +405,38 @@ fn merge_codegen_units<'tcx>(
                 }
             }
         }
+
+        // A sorted order here ensures what follows can be deterministic.
+        codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
     } else {
-        // If we are compiling non-incrementally we just generate simple CGU
-        // names containing an index.
+        // When compiling non-incrementally, we rename the CGUS so they have
+        // identical names except for the numeric suffix, something like
+        // `regex.f10ba03eb5ec7975-cgu.N`, where `N` varies.
+        //
+        // It is useful for debugging and profiling purposes if the resulting
+        // CGUs are sorted by name *and* reverse sorted by size. (CGU 0 is the
+        // biggest, CGU 1 is the second biggest, etc.)
+        //
+        // So first we reverse sort by size. Then we generate the names with
+        // zero-padded suffixes, which means they are automatically sorted by
+        // names. The numeric suffix width depends on the number of CGUs, which
+        // is always greater than zero:
+        // - [1,9]     CGUS: `0`, `1`, `2`, ...
+        // - [10,99]   CGUS: `00`, `01`, `02`, ...
+        // - [100,999] CGUS: `000`, `001`, `002`, ...
+        // - etc.
+        //
+        // If we didn't zero-pad the sorted-by-name order would be `XYZ-cgu.0`,
+        // `XYZ-cgu.1`, `XYZ-cgu.10`, `XYZ-cgu.11`, ..., `XYZ-cgu.2`, etc.
+        codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));
+        let num_digits = codegen_units.len().ilog10() as usize + 1;
         for (index, cgu) in codegen_units.iter_mut().enumerate() {
+            let suffix = format!("{index:0num_digits$}");
             let numbered_codegen_unit_name =
-                cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
+                cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(suffix));
             cgu.set_name(numbered_codegen_unit_name);
         }
     }
-
-    // A sorted order here ensures what follows can be deterministic.
-    codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
 }
 
 fn internalize_symbols<'tcx>(