diff options
| author | bors <bors@rust-lang.org> | 2024-05-03 17:41:48 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2024-05-03 17:41:48 +0000 |
| commit | 0d7b2fb797f214ea7514cfeaf2caef8178d8e3fc (patch) | |
| tree | 8d76cbb72cc5e77a2885baffc05a1845733b3764 /compiler/rustc_data_structures | |
| parent | d6d3b342e85272f5e75c0d7a1dd3a1d8becb40ac (diff) | |
| parent | 6ee3713b08a3612de836a2dab527e5a644517aa1 (diff) | |
| download | rust-0d7b2fb797f214ea7514cfeaf2caef8178d8e3fc.tar.gz rust-0d7b2fb797f214ea7514cfeaf2caef8178d8e3fc.zip | |
Auto merge of #123441 - saethlin:fixed-len-file-names, r=oli-obk
Stabilize the size of incr comp object file names The current implementation does not produce stable-length paths, and we create the paths in a way that makes our allocation behavior is nondeterministic. I think `@eddyb` fixed a number of other cases like this in the past, and this PR fixes another one. Whether that actually matters I have no idea, but we still have bimodal behavior in rustc-perf and the non-uniformity in `find` and `ls` was bothering me. I've also removed the truncation of the mangled CGU names. Before this PR incr comp paths look like this: ``` target/debug/incremental/scratch-38izrrq90cex7/s-gux6gz0ow8-1ph76gg-ewe1xj434l26w9up5bedsojpd/261xgo1oqnd90ry5.o ``` And after, they look like this: ``` target/debug/incremental/scratch-035omutqbfkbw/s-gux6borni0-16r3v1j-6n64tmwqzchtgqzwwim5amuga/55v2re42sztc8je9bva6g8ft3.o ``` On the one hand, I'm sure this will break some people's builds because they're on Windows and only a few bytes from the path length limit. But if we're that seriously worried about the length of our file names, I have some other ideas on how to make them smaller. And last time I deleted some hash truncations from the compiler, there was a huge drop in the number if incremental compilation ICEs that were reported: https://github.com/rust-lang/rust/pull/110367https://github.com/rust-lang/rust/pull/110367 --- Upon further reading, this PR actually fixes a bug. This comment says the CGU names are supposed to be a fixed-length hash, and before this PR they aren't: https://github.com/rust-lang/rust/blob/ca7d34efa94afe271accf2bd3d44152a5bd6fff1/compiler/rustc_monomorphize/src/partitioning.rs#L445-L448
Diffstat (limited to 'compiler/rustc_data_structures')
| -rw-r--r-- | compiler/rustc_data_structures/src/base_n.rs | 114 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/base_n/tests.rs | 12 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/lib.rs | 2 |
3 files changed, 102 insertions, 26 deletions
diff --git a/compiler/rustc_data_structures/src/base_n.rs b/compiler/rustc_data_structures/src/base_n.rs index a3eb2b9c416..aed89fadc4c 100644 --- a/compiler/rustc_data_structures/src/base_n.rs +++ b/compiler/rustc_data_structures/src/base_n.rs @@ -1,6 +1,7 @@ /// Converts unsigned integers into a string representation with some base. /// Bases up to and including 36 can be used for case-insensitive things. -use std::str; +use std::ascii; +use std::fmt; #[cfg(test)] mod tests; @@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64; pub const ALPHANUMERIC_ONLY: usize = 62; pub const CASE_INSENSITIVE: usize = 36; -const BASE_64: &[u8; MAX_BASE] = - b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; +const BASE_64: [ascii::Char; MAX_BASE] = { + let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; + let Some(ascii) = bytes.as_ascii() else { panic!() }; + *ascii +}; -#[inline] -pub fn push_str(mut n: u128, base: usize, output: &mut String) { - debug_assert!(base >= 2 && base <= MAX_BASE); - let mut s = [0u8; 128]; - let mut index = s.len(); +pub struct BaseNString { + start: usize, + buf: [ascii::Char; 128], +} + +impl std::ops::Deref for BaseNString { + type Target = str; - let base = base as u128; + fn deref(&self) -> &str { + self.buf[self.start..].as_str() + } +} + +impl AsRef<str> for BaseNString { + fn as_ref(&self) -> &str { + self.buf[self.start..].as_str() + } +} + +impl fmt::Display for BaseNString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self) + } +} + +// This trait just lets us reserve the exact right amount of space when doing fixed-length +// case-insensitve encoding. Add any impls you need. +pub trait ToBaseN: Into<u128> { + fn encoded_len(base: usize) -> usize; + + fn to_base_fixed_len(self, base: usize) -> BaseNString { + let mut encoded = self.to_base(base); + encoded.start = encoded.buf.len() - Self::encoded_len(base); + encoded + } - loop { - index -= 1; - s[index] = BASE_64[(n % base) as usize]; - n /= base; + fn to_base(self, base: usize) -> BaseNString { + let mut output = [ascii::Char::Digit0; 128]; - if n == 0 { - break; + let mut n: u128 = self.into(); + + let mut index = output.len(); + loop { + index -= 1; + output[index] = BASE_64[(n % base as u128) as usize]; + n /= base as u128; + + if n == 0 { + break; + } + } + assert_eq!(n, 0); + + BaseNString { start: index, buf: output } + } +} + +impl ToBaseN for u128 { + fn encoded_len(base: usize) -> usize { + let mut max = u128::MAX; + let mut len = 0; + while max > 0 { + len += 1; + max /= base as u128; } + len } +} - output.push_str(unsafe { - // SAFETY: `s` is populated using only valid utf8 characters from `BASE_64` - str::from_utf8_unchecked(&s[index..]) - }); +impl ToBaseN for u64 { + fn encoded_len(base: usize) -> usize { + let mut max = u64::MAX; + let mut len = 0; + while max > 0 { + len += 1; + max /= base as u64; + } + len + } } -#[inline] -pub fn encode(n: u128, base: usize) -> String { - let mut s = String::new(); - push_str(n, base, &mut s); - s +impl ToBaseN for u32 { + fn encoded_len(base: usize) -> usize { + let mut max = u32::MAX; + let mut len = 0; + while max > 0 { + len += 1; + max /= base as u32; + } + len + } } diff --git a/compiler/rustc_data_structures/src/base_n/tests.rs b/compiler/rustc_data_structures/src/base_n/tests.rs index 2be2f0532c9..148d8dde02a 100644 --- a/compiler/rustc_data_structures/src/base_n/tests.rs +++ b/compiler/rustc_data_structures/src/base_n/tests.rs @@ -1,9 +1,17 @@ use super::*; #[test] -fn test_encode() { +fn limits() { + assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36)); + assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36)); + assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36)); +} + +#[test] +fn test_to_base() { fn test(n: u128, base: usize) { - assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32)); + assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32)); + assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32)); } for base in 2..37 { diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index cf54e700e2b..8dd85b25e0e 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -16,6 +16,8 @@ #![doc(rust_logo)] #![feature(allocator_api)] #![feature(array_windows)] +#![feature(ascii_char)] +#![feature(ascii_char_variants)] #![feature(auto_traits)] #![feature(cfg_match)] #![feature(core_intrinsics)] |
