diff options
| author | Karl Meakin <karl.meakin@arm.com> | 2025-08-10 02:13:03 +0100 |
|---|---|---|
| committer | Karl Meakin <karl.meakin@arm.com> | 2025-08-15 01:29:13 +0000 |
| commit | c3ce0796544152460554d8b8db4e56528fe362db (patch) | |
| tree | 4f3138b9a519cbca09729c006bd87e5a8c2cddb2 /src/tools/unicode-table-generator | |
| parent | 30d1bc7ba869c0f86bc6d2e1d9ed1ad3b58f7865 (diff) | |
| download | rust-c3ce0796544152460554d8b8db4e56528fe362db.tar.gz rust-c3ce0796544152460554d8b8db4e56528fe362db.zip | |
refactor: Add tests for case conversions
Diffstat (limited to 'src/tools/unicode-table-generator')
| -rw-r--r-- | src/tools/unicode-table-generator/src/case_mapping.rs | 4 | ||||
| -rw-r--r-- | src/tools/unicode-table-generator/src/main.rs | 48 |
2 files changed, 41 insertions, 11 deletions
diff --git a/src/tools/unicode-table-generator/src/case_mapping.rs b/src/tools/unicode-table-generator/src/case_mapping.rs index a8527ea9a42..49aef3ec33e 100644 --- a/src/tools/unicode-table-generator/src/case_mapping.rs +++ b/src/tools/unicode-table-generator/src/case_mapping.rs @@ -21,11 +21,11 @@ pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [usize; 2]) (file, [lower_size, upper_size]) } -fn generate_tables(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> (String, usize) { +fn generate_tables(case: &str, data: &BTreeMap<u32, [u32; 3]>) -> (String, usize) { let mut mappings = Vec::with_capacity(data.len()); let mut multis = Vec::new(); - for (&key, &(a, b, c)) in data.iter() { + for (&key, &[a, b, c]) in data.iter() { let key = char::from_u32(key).unwrap(); if key.is_ascii() { diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index c9530fec48a..1d70eebdbc6 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -100,11 +100,11 @@ static PROPERTIES: &[&str] = &[ struct UnicodeData { ranges: Vec<(&'static str, Vec<Range<u32>>)>, - to_upper: BTreeMap<u32, (u32, u32, u32)>, - to_lower: BTreeMap<u32, (u32, u32, u32)>, + to_upper: BTreeMap<u32, [u32; 3]>, + to_lower: BTreeMap<u32, [u32; 3]>, } -fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<(u32, u32, u32)> { +fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<[u32; 3]> { let mut a = None; let mut b = None; let mut c = None; @@ -125,7 +125,7 @@ fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<(u32 } } - Some((a.unwrap(), b.unwrap_or(0), c.unwrap_or(0))) + Some([a.unwrap(), b.unwrap_or(0), c.unwrap_or(0)]) } static UNICODE_DIRECTORY: &str = "unicode-downloads"; @@ -165,12 +165,12 @@ fn load_data() -> UnicodeData { if let Some(mapped) = row.simple_lowercase_mapping && mapped != row.codepoint { - to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0)); + to_lower.insert(row.codepoint.value(), [mapped.value(), 0, 0]); } if let Some(mapped) = row.simple_uppercase_mapping && mapped != row.codepoint { - to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0)); + to_upper.insert(row.codepoint.value(), [mapped.value(), 0, 0]); } } @@ -224,7 +224,7 @@ fn main() { let ranges_by_property = &unicode_data.ranges; if let Some(path) = test_path { - std::fs::write(&path, generate_tests(ranges_by_property).unwrap()).unwrap(); + std::fs::write(&path, generate_tests(&unicode_data).unwrap()).unwrap(); } let mut table_file = String::new(); @@ -328,7 +328,7 @@ fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String { out } -fn generate_tests(ranges: &[(&str, Vec<Range<u32>>)]) -> Result<String, fmt::Error> { +fn generate_tests(data: &UnicodeData) -> Result<String, fmt::Error> { let mut s = String::new(); writeln!(s, "#![feature(core_intrinsics)]")?; writeln!(s, "#![allow(internal_features, dead_code)]")?; @@ -336,7 +336,7 @@ fn generate_tests(ranges: &[(&str, Vec<Range<u32>>)]) -> Result<String, fmt::Err writeln!(s, "use std::intrinsics;")?; writeln!(s, "mod unicode_data;")?; writeln!(s, "fn main() {{")?; - for (property, ranges) in ranges { + for (property, ranges) in &data.ranges { let prop = property.to_lowercase(); writeln!(s, r#" println!("Testing {prop}");"#)?; writeln!(s, " {prop}_true();")?; @@ -355,6 +355,36 @@ fn generate_tests(ranges: &[(&str, Vec<Range<u32>>)]) -> Result<String, fmt::Err writeln!(s, " }}")?; } + for (name, conversion) in ["to_lower", "to_upper"].iter().zip([&data.to_lower, &data.to_upper]) + { + writeln!(s, r#" println!("Testing {name}");"#)?; + for (c, mapping) in conversion { + let c = char::from_u32(*c).unwrap(); + let mapping = mapping.map(|c| char::from_u32(c).unwrap()); + writeln!( + s, + r#" assert_eq!(unicode_data::conversions::{name}({c:?}), {mapping:?});"# + )?; + } + let unmapped: Vec<_> = (char::MIN..=char::MAX) + .filter(|c| !c.is_ascii()) + .map(u32::from) + .filter(|c| !conversion.contains_key(c)) + .collect(); + let unmapped_ranges = ranges_from_set(&unmapped); + for range in unmapped_ranges { + let start = char::from_u32(range.start).unwrap(); + let end = char::from_u32(range.end - 1).unwrap(); + writeln!(s, " for c in {start:?}..={end:?} {{")?; + writeln!( + s, + r#" assert_eq!(unicode_data::conversions::{name}(c), [c, '\0', '\0']);"# + )?; + + writeln!(s, " }}")?; + } + } + writeln!(s, "}}")?; Ok(s) } |
