about summary refs log tree commit diff
path: root/compiler/rustc_data_structures
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-05-03 17:41:48 +0000
committerbors <bors@rust-lang.org>2024-05-03 17:41:48 +0000
commit0d7b2fb797f214ea7514cfeaf2caef8178d8e3fc (patch)
tree8d76cbb72cc5e77a2885baffc05a1845733b3764 /compiler/rustc_data_structures
parentd6d3b342e85272f5e75c0d7a1dd3a1d8becb40ac (diff)
parent6ee3713b08a3612de836a2dab527e5a644517aa1 (diff)
downloadrust-0d7b2fb797f214ea7514cfeaf2caef8178d8e3fc.tar.gz
rust-0d7b2fb797f214ea7514cfeaf2caef8178d8e3fc.zip
Auto merge of #123441 - saethlin:fixed-len-file-names, r=oli-obk
Stabilize the size of incr comp object file names

The current implementation does not produce stable-length paths, and we create the paths in a way that makes our allocation behavior is nondeterministic. I think `@eddyb` fixed a number of other cases like this in the past, and this PR fixes another one. Whether that actually matters I have no idea, but we still have bimodal behavior in rustc-perf and the non-uniformity in `find` and `ls` was bothering me.

I've also removed the truncation of the mangled CGU names. Before this PR incr comp paths look like this:
```
target/debug/incremental/scratch-38izrrq90cex7/s-gux6gz0ow8-1ph76gg-ewe1xj434l26w9up5bedsojpd/261xgo1oqnd90ry5.o
```
And after, they look like this:
```
target/debug/incremental/scratch-035omutqbfkbw/s-gux6borni0-16r3v1j-6n64tmwqzchtgqzwwim5amuga/55v2re42sztc8je9bva6g8ft3.o
```

On the one hand, I'm sure this will break some people's builds because they're on Windows and only a few bytes from the path length limit. But if we're that seriously worried about the length of our file names, I have some other ideas on how to make them smaller. And last time I deleted some hash truncations from the compiler, there was a huge drop in the number if incremental compilation ICEs that were reported: https://github.com/rust-lang/rust/pull/110367https://github.com/rust-lang/rust/pull/110367

---

Upon further reading, this PR actually fixes a bug. This comment says the CGU names are supposed to be a fixed-length hash, and before this PR they aren't: https://github.com/rust-lang/rust/blob/ca7d34efa94afe271accf2bd3d44152a5bd6fff1/compiler/rustc_monomorphize/src/partitioning.rs#L445-L448
Diffstat (limited to 'compiler/rustc_data_structures')
-rw-r--r--compiler/rustc_data_structures/src/base_n.rs114
-rw-r--r--compiler/rustc_data_structures/src/base_n/tests.rs12
-rw-r--r--compiler/rustc_data_structures/src/lib.rs2
3 files changed, 102 insertions, 26 deletions
diff --git a/compiler/rustc_data_structures/src/base_n.rs b/compiler/rustc_data_structures/src/base_n.rs
index a3eb2b9c416..aed89fadc4c 100644
--- a/compiler/rustc_data_structures/src/base_n.rs
+++ b/compiler/rustc_data_structures/src/base_n.rs
@@ -1,6 +1,7 @@
 /// Converts unsigned integers into a string representation with some base.
 /// Bases up to and including 36 can be used for case-insensitive things.
-use std::str;
+use std::ascii;
+use std::fmt;
 
 #[cfg(test)]
 mod tests;
@@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64;
 pub const ALPHANUMERIC_ONLY: usize = 62;
 pub const CASE_INSENSITIVE: usize = 36;
 
-const BASE_64: &[u8; MAX_BASE] =
-    b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
+const BASE_64: [ascii::Char; MAX_BASE] = {
+    let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
+    let Some(ascii) = bytes.as_ascii() else { panic!() };
+    *ascii
+};
 
-#[inline]
-pub fn push_str(mut n: u128, base: usize, output: &mut String) {
-    debug_assert!(base >= 2 && base <= MAX_BASE);
-    let mut s = [0u8; 128];
-    let mut index = s.len();
+pub struct BaseNString {
+    start: usize,
+    buf: [ascii::Char; 128],
+}
+
+impl std::ops::Deref for BaseNString {
+    type Target = str;
 
-    let base = base as u128;
+    fn deref(&self) -> &str {
+        self.buf[self.start..].as_str()
+    }
+}
+
+impl AsRef<str> for BaseNString {
+    fn as_ref(&self) -> &str {
+        self.buf[self.start..].as_str()
+    }
+}
+
+impl fmt::Display for BaseNString {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self)
+    }
+}
+
+// This trait just lets us reserve the exact right amount of space when doing fixed-length
+// case-insensitve encoding. Add any impls you need.
+pub trait ToBaseN: Into<u128> {
+    fn encoded_len(base: usize) -> usize;
+
+    fn to_base_fixed_len(self, base: usize) -> BaseNString {
+        let mut encoded = self.to_base(base);
+        encoded.start = encoded.buf.len() - Self::encoded_len(base);
+        encoded
+    }
 
-    loop {
-        index -= 1;
-        s[index] = BASE_64[(n % base) as usize];
-        n /= base;
+    fn to_base(self, base: usize) -> BaseNString {
+        let mut output = [ascii::Char::Digit0; 128];
 
-        if n == 0 {
-            break;
+        let mut n: u128 = self.into();
+
+        let mut index = output.len();
+        loop {
+            index -= 1;
+            output[index] = BASE_64[(n % base as u128) as usize];
+            n /= base as u128;
+
+            if n == 0 {
+                break;
+            }
+        }
+        assert_eq!(n, 0);
+
+        BaseNString { start: index, buf: output }
+    }
+}
+
+impl ToBaseN for u128 {
+    fn encoded_len(base: usize) -> usize {
+        let mut max = u128::MAX;
+        let mut len = 0;
+        while max > 0 {
+            len += 1;
+            max /= base as u128;
         }
+        len
     }
+}
 
-    output.push_str(unsafe {
-        // SAFETY: `s` is populated using only valid utf8 characters from `BASE_64`
-        str::from_utf8_unchecked(&s[index..])
-    });
+impl ToBaseN for u64 {
+    fn encoded_len(base: usize) -> usize {
+        let mut max = u64::MAX;
+        let mut len = 0;
+        while max > 0 {
+            len += 1;
+            max /= base as u64;
+        }
+        len
+    }
 }
 
-#[inline]
-pub fn encode(n: u128, base: usize) -> String {
-    let mut s = String::new();
-    push_str(n, base, &mut s);
-    s
+impl ToBaseN for u32 {
+    fn encoded_len(base: usize) -> usize {
+        let mut max = u32::MAX;
+        let mut len = 0;
+        while max > 0 {
+            len += 1;
+            max /= base as u32;
+        }
+        len
+    }
 }
diff --git a/compiler/rustc_data_structures/src/base_n/tests.rs b/compiler/rustc_data_structures/src/base_n/tests.rs
index 2be2f0532c9..148d8dde02a 100644
--- a/compiler/rustc_data_structures/src/base_n/tests.rs
+++ b/compiler/rustc_data_structures/src/base_n/tests.rs
@@ -1,9 +1,17 @@
 use super::*;
 
 #[test]
-fn test_encode() {
+fn limits() {
+    assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36));
+    assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36));
+    assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36));
+}
+
+#[test]
+fn test_to_base() {
     fn test(n: u128, base: usize) {
-        assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
+        assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32));
+        assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32));
     }
 
     for base in 2..37 {
diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs
index cf54e700e2b..8dd85b25e0e 100644
--- a/compiler/rustc_data_structures/src/lib.rs
+++ b/compiler/rustc_data_structures/src/lib.rs
@@ -16,6 +16,8 @@
 #![doc(rust_logo)]
 #![feature(allocator_api)]
 #![feature(array_windows)]
+#![feature(ascii_char)]
+#![feature(ascii_char_variants)]
 #![feature(auto_traits)]
 #![feature(cfg_match)]
 #![feature(core_intrinsics)]