diff options
| author | bors <bors@rust-lang.org> | 2025-09-08 07:25:07 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-09-08 07:25:07 +0000 |
| commit | beeb8e3af54295ba494c250e84ecda4c2c5d85ff (patch) | |
| tree | d19c1b03b6c7df056b27160bc7d910468d112e86 /src/tools | |
| parent | a09fbe2c8372643a27a8082236120f95ed4e6bba (diff) | |
| parent | a8c669461f0c71985c72dd5b05f70b8d4d149e3b (diff) | |
| download | rust-beeb8e3af54295ba494c250e84ecda4c2c5d85ff.tar.gz rust-beeb8e3af54295ba494c250e84ecda4c2c5d85ff.zip | |
Auto merge of #146173 - Kmeakin:km/unicode-data/no-ascii, r=jhpratt
Don't include ASCII characters in Unicode tables Split off from https://github.com/rust-lang/rust/pull/145219
Diffstat (limited to 'src/tools')
4 files changed, 5 insertions, 0 deletions
diff --git a/src/tools/unicode-table-generator/src/cascading_map.rs b/src/tools/unicode-table-generator/src/cascading_map.rs index 78a7bba3208..56e6401908d 100644 --- a/src/tools/unicode-table-generator/src/cascading_map.rs +++ b/src/tools/unicode-table-generator/src/cascading_map.rs @@ -64,6 +64,7 @@ impl RawEmitter { writeln!(&mut self.file, "#[inline]").unwrap(); writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap(); + writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap(); writeln!(&mut self.file, " match c as u32 >> 8 {{").unwrap(); for arm in arms { writeln!(&mut self.file, " {arm},").unwrap(); diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index aa7d97f7f3d..ded9205ffc4 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -195,6 +195,7 @@ fn load_data() -> UnicodeData { .into_iter() .flatten() .flat_map(|cp| cp.scalar()) + .filter(|c| !c.is_ascii()) .map(u32::from) .collect::<Vec<_>>(); (prop, ranges_from_set(&codepoints)) diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs index 03ed9499e26..297965615c1 100644 --- a/src/tools/unicode-table-generator/src/raw_emitter.rs +++ b/src/tools/unicode-table-generator/src/raw_emitter.rs @@ -98,6 +98,7 @@ impl RawEmitter { self.blank_line(); writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap(); + writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap(); if first_code_point > 0x7f { writeln!(&mut self.file, " (c as u32) >= {first_code_point:#04x} &&").unwrap(); } diff --git a/src/tools/unicode-table-generator/src/skiplist.rs b/src/tools/unicode-table-generator/src/skiplist.rs index 34c9802e122..660a8f342f7 100644 --- a/src/tools/unicode-table-generator/src/skiplist.rs +++ b/src/tools/unicode-table-generator/src/skiplist.rs @@ -99,6 +99,7 @@ impl RawEmitter { if first_code_point > 0x7f { writeln!(&mut self.file, "#[inline]").unwrap(); writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap(); + writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap(); writeln!(&mut self.file, " (c as u32) >= {first_code_point:#04x} && lookup_slow(c)") .unwrap(); writeln!(&mut self.file, "}}").unwrap(); @@ -107,6 +108,7 @@ impl RawEmitter { writeln!(&mut self.file, "fn lookup_slow(c: char) -> bool {{").unwrap(); } else { writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap(); + writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap(); } writeln!(&mut self.file, " const {{").unwrap(); writeln!( |
