about summary refs log tree commit diff
path: root/src/tools
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2025-09-08 07:25:07 +0000
committerbors <bors@rust-lang.org>2025-09-08 07:25:07 +0000
commitbeeb8e3af54295ba494c250e84ecda4c2c5d85ff (patch)
treed19c1b03b6c7df056b27160bc7d910468d112e86 /src/tools
parenta09fbe2c8372643a27a8082236120f95ed4e6bba (diff)
parenta8c669461f0c71985c72dd5b05f70b8d4d149e3b (diff)
downloadrust-beeb8e3af54295ba494c250e84ecda4c2c5d85ff.tar.gz
rust-beeb8e3af54295ba494c250e84ecda4c2c5d85ff.zip
Auto merge of #146173 - Kmeakin:km/unicode-data/no-ascii, r=jhpratt
Don't include ASCII characters in Unicode tables

Split off from https://github.com/rust-lang/rust/pull/145219
Diffstat (limited to 'src/tools')
-rw-r--r--src/tools/unicode-table-generator/src/cascading_map.rs1
-rw-r--r--src/tools/unicode-table-generator/src/main.rs1
-rw-r--r--src/tools/unicode-table-generator/src/raw_emitter.rs1
-rw-r--r--src/tools/unicode-table-generator/src/skiplist.rs2
4 files changed, 5 insertions, 0 deletions
diff --git a/src/tools/unicode-table-generator/src/cascading_map.rs b/src/tools/unicode-table-generator/src/cascading_map.rs
index 78a7bba3208..56e6401908d 100644
--- a/src/tools/unicode-table-generator/src/cascading_map.rs
+++ b/src/tools/unicode-table-generator/src/cascading_map.rs
@@ -64,6 +64,7 @@ impl RawEmitter {
 
         writeln!(&mut self.file, "#[inline]").unwrap();
         writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
+        writeln!(&mut self.file, "    debug_assert!(!c.is_ascii());").unwrap();
         writeln!(&mut self.file, "    match c as u32 >> 8 {{").unwrap();
         for arm in arms {
             writeln!(&mut self.file, "        {arm},").unwrap();
diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs
index aa7d97f7f3d..ded9205ffc4 100644
--- a/src/tools/unicode-table-generator/src/main.rs
+++ b/src/tools/unicode-table-generator/src/main.rs
@@ -195,6 +195,7 @@ fn load_data() -> UnicodeData {
                 .into_iter()
                 .flatten()
                 .flat_map(|cp| cp.scalar())
+                .filter(|c| !c.is_ascii())
                 .map(u32::from)
                 .collect::<Vec<_>>();
             (prop, ranges_from_set(&codepoints))
diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs
index 03ed9499e26..297965615c1 100644
--- a/src/tools/unicode-table-generator/src/raw_emitter.rs
+++ b/src/tools/unicode-table-generator/src/raw_emitter.rs
@@ -98,6 +98,7 @@ impl RawEmitter {
         self.blank_line();
 
         writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
+        writeln!(&mut self.file, "    debug_assert!(!c.is_ascii());").unwrap();
         if first_code_point > 0x7f {
             writeln!(&mut self.file, "    (c as u32) >= {first_code_point:#04x} &&").unwrap();
         }
diff --git a/src/tools/unicode-table-generator/src/skiplist.rs b/src/tools/unicode-table-generator/src/skiplist.rs
index 34c9802e122..660a8f342f7 100644
--- a/src/tools/unicode-table-generator/src/skiplist.rs
+++ b/src/tools/unicode-table-generator/src/skiplist.rs
@@ -99,6 +99,7 @@ impl RawEmitter {
         if first_code_point > 0x7f {
             writeln!(&mut self.file, "#[inline]").unwrap();
             writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
+            writeln!(&mut self.file, "    debug_assert!(!c.is_ascii());").unwrap();
             writeln!(&mut self.file, "    (c as u32) >= {first_code_point:#04x} && lookup_slow(c)")
                 .unwrap();
             writeln!(&mut self.file, "}}").unwrap();
@@ -107,6 +108,7 @@ impl RawEmitter {
             writeln!(&mut self.file, "fn lookup_slow(c: char) -> bool {{").unwrap();
         } else {
             writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
+            writeln!(&mut self.file, "    debug_assert!(!c.is_ascii());").unwrap();
         }
         writeln!(&mut self.file, "    const {{").unwrap();
         writeln!(