diff options
| author | Karl Meakin <karl.meakin@arm.com> | 2025-08-11 00:57:29 +0000 |
|---|---|---|
| committer | Karl Meakin <karl.meakin@arm.com> | 2025-08-16 01:46:30 +0100 |
| commit | 1bb9b151c9f9b5116254827f04add845aff33408 (patch) | |
| tree | f7d3847093f8b44c7512ac5317c9034145048f29 | |
| parent | cd7cbe818e4a66d46fe2df993d1b8518eba8a5cd (diff) | |
| download | rust-1bb9b151c9f9b5116254827f04add845aff33408.tar.gz rust-1bb9b151c9f9b5116254827f04add845aff33408.zip | |
refactor: Hard-code `char::is_control`
According to https://www.unicode.org/policies/stability_policy.html#Property_Value, the set of codepoints in `Cc` will never change. So we can hard-code the patterns to match against instead of using a table.
| -rw-r--r-- | library/core/src/char/methods.rs | 6 | ||||
| -rw-r--r-- | library/core/src/unicode/mod.rs | 1 | ||||
| -rw-r--r-- | library/core/src/unicode/unicode_data.rs | 25 | ||||
| -rw-r--r-- | src/tools/unicode-table-generator/src/main.rs | 1 |
4 files changed, 5 insertions, 28 deletions
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 7ee0962721f..61ac7f8a339 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -950,7 +950,11 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn is_control(self) -> bool { - unicode::Cc(self) + // According to + // https://www.unicode.org/policies/stability_policy.html#Property_Value, + // the set of codepoints in `Cc` will never change. + // So we can just hard-code the patterns to match against instead of using a table. + matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}') } /// Returns `true` if this `char` has the `Grapheme_Extend` property. diff --git a/library/core/src/unicode/mod.rs b/library/core/src/unicode/mod.rs index 49dbdeb1a6d..e1cb69c3c4f 100644 --- a/library/core/src/unicode/mod.rs +++ b/library/core/src/unicode/mod.rs @@ -9,7 +9,6 @@ pub use unicode_data::conversions; #[rustfmt::skip] pub(crate) use unicode_data::alphabetic::lookup as Alphabetic; -pub(crate) use unicode_data::cc::lookup as Cc; pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend; pub(crate) use unicode_data::lowercase::lookup as Lowercase; pub(crate) use unicode_data::n::lookup as N; diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs index b57234bbee9..55f64f1e96e 100644 --- a/library/core/src/unicode/unicode_data.rs +++ b/library/core/src/unicode/unicode_data.rs @@ -359,31 +359,6 @@ pub mod cased { } #[rustfmt::skip] -pub mod cc { - use super::ShortOffsetRunHeader; - - static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [ - ShortOffsetRunHeader::new(0, 1114272), - ]; - static OFFSETS: [u8; 5] = [ - 0, 32, 95, 33, 0, - ]; - pub fn lookup(c: char) -> bool { - const { - assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); - let mut i = 0; - while i < SHORT_OFFSET_RUNS.len() { - assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); - i += 1; - } - } - // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` - // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. - unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } - } -} - -#[rustfmt::skip] pub mod grapheme_extend { use super::ShortOffsetRunHeader; diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index 6cdb82a87bd..38e5e8bbdb9 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -92,7 +92,6 @@ static PROPERTIES: &[&str] = &[ "Case_Ignorable", "Grapheme_Extend", "White_Space", - "Cc", "N", ]; |
