about summary refs log tree commit diff
diff options
context:
space:
mode:
authorKarl Meakin <karl.meakin@arm.com>2025-08-11 00:57:29 +0000
committerKarl Meakin <karl.meakin@arm.com>2025-08-16 01:46:30 +0100
commit1bb9b151c9f9b5116254827f04add845aff33408 (patch)
treef7d3847093f8b44c7512ac5317c9034145048f29
parentcd7cbe818e4a66d46fe2df993d1b8518eba8a5cd (diff)
downloadrust-1bb9b151c9f9b5116254827f04add845aff33408.tar.gz
rust-1bb9b151c9f9b5116254827f04add845aff33408.zip
refactor: Hard-code `char::is_control`
According to
https://www.unicode.org/policies/stability_policy.html#Property_Value,
the set of codepoints in `Cc` will never change. So we can hard-code
the patterns to match against instead of using a table.
-rw-r--r--library/core/src/char/methods.rs6
-rw-r--r--library/core/src/unicode/mod.rs1
-rw-r--r--library/core/src/unicode/unicode_data.rs25
-rw-r--r--src/tools/unicode-table-generator/src/main.rs1
4 files changed, 5 insertions, 28 deletions
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 7ee0962721f..61ac7f8a339 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -950,7 +950,11 @@ impl char {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn is_control(self) -> bool {
-        unicode::Cc(self)
+        // According to
+        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
+        // the set of codepoints in `Cc` will never change.
+        // So we can just hard-code the patterns to match against instead of using a table.
+        matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
     }
 
     /// Returns `true` if this `char` has the `Grapheme_Extend` property.
diff --git a/library/core/src/unicode/mod.rs b/library/core/src/unicode/mod.rs
index 49dbdeb1a6d..e1cb69c3c4f 100644
--- a/library/core/src/unicode/mod.rs
+++ b/library/core/src/unicode/mod.rs
@@ -9,7 +9,6 @@ pub use unicode_data::conversions;
 
 #[rustfmt::skip]
 pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
-pub(crate) use unicode_data::cc::lookup as Cc;
 pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
 pub(crate) use unicode_data::lowercase::lookup as Lowercase;
 pub(crate) use unicode_data::n::lookup as N;
diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs
index b57234bbee9..55f64f1e96e 100644
--- a/library/core/src/unicode/unicode_data.rs
+++ b/library/core/src/unicode/unicode_data.rs
@@ -359,31 +359,6 @@ pub mod cased {
 }
 
 #[rustfmt::skip]
-pub mod cc {
-    use super::ShortOffsetRunHeader;
-
-    static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [
-        ShortOffsetRunHeader::new(0, 1114272),
-    ];
-    static OFFSETS: [u8; 5] = [
-        0, 32, 95, 33, 0,
-    ];
-    pub fn lookup(c: char) -> bool {
-        const {
-            assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
-            let mut i = 0;
-            while i < SHORT_OFFSET_RUNS.len() {
-                assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
-                i += 1;
-            }
-        }
-        // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
-        // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
-        unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
-    }
-}
-
-#[rustfmt::skip]
 pub mod grapheme_extend {
     use super::ShortOffsetRunHeader;
 
diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs
index 6cdb82a87bd..38e5e8bbdb9 100644
--- a/src/tools/unicode-table-generator/src/main.rs
+++ b/src/tools/unicode-table-generator/src/main.rs
@@ -92,7 +92,6 @@ static PROPERTIES: &[&str] = &[
     "Case_Ignorable",
     "Grapheme_Extend",
     "White_Space",
-    "Cc",
     "N",
 ];