diff options
| author | Karl Meakin <karl.meakin@arm.com> | 2025-08-10 01:10:15 +0100 |
|---|---|---|
| committer | Karl Meakin <karl.meakin@arm.com> | 2025-08-15 01:29:12 +0000 |
| commit | 5d54ac5276eade9d9424fc4fbfa6f77bcd5d4940 (patch) | |
| tree | 80e641bef18df92a4b2fe6b657c26ee11ad700d8 | |
| parent | 69e1974bb0bfbcc679d29950b1e4540cd0b9b3ee (diff) | |
| download | rust-5d54ac5276eade9d9424fc4fbfa6f77bcd5d4940.tar.gz rust-5d54ac5276eade9d9424fc4fbfa6f77bcd5d4940.zip | |
refactor: rewrite `ranges_from_set`
The `merge_ranges` function was very complicated and hard to understand. Forunately, we can use `slice::chunk_by` to achieve the same thing.
| -rw-r--r-- | src/tools/unicode-table-generator/src/main.rs | 83 |
1 files changed, 17 insertions, 66 deletions
diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index f755ad048e4..bf0511a2c77 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -187,33 +187,19 @@ fn load_data() -> UnicodeData { } } - let mut properties: HashMap<&'static str, Vec<Range<u32>>> = properties + let mut properties: Vec<(&'static str, Vec<Range<u32>>)> = properties .into_iter() - .map(|(k, v)| { - ( - k, - v.into_iter() - .flat_map(|codepoints| match codepoints { - Codepoints::Single(c) => c - .scalar() - .map(|ch| ch as u32..ch as u32 + 1) - .into_iter() - .collect::<Vec<_>>(), - Codepoints::Range(c) => c - .into_iter() - .flat_map(|c| c.scalar().map(|ch| ch as u32..ch as u32 + 1)) - .collect::<Vec<_>>(), - }) - .collect::<Vec<Range<u32>>>(), - ) + .map(|(prop, codepoints)| { + let codepoints = codepoints + .into_iter() + .flatten() + .flat_map(|cp| cp.scalar()) + .map(u32::from) + .collect::<Vec<_>>(); + (prop, ranges_from_set(&codepoints)) }) .collect(); - for ranges in properties.values_mut() { - merge_ranges(ranges); - } - - let mut properties = properties.into_iter().collect::<Vec<_>>(); properties.sort_by_key(|p| p.0); UnicodeData { ranges: properties, to_lower, to_upper } } @@ -402,48 +388,13 @@ fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool } } +/// Group the elements of `set` into contigous ranges fn ranges_from_set(set: &[u32]) -> Vec<Range<u32>> { - let mut ranges = set.iter().map(|e| (*e)..(*e + 1)).collect::<Vec<Range<u32>>>(); - merge_ranges(&mut ranges); - ranges -} - -fn merge_ranges(ranges: &mut Vec<Range<u32>>) { - loop { - let mut new_ranges = Vec::new(); - let mut idx_iter = 0..(ranges.len() - 1); - let mut should_insert_last = true; - while let Some(idx) = idx_iter.next() { - let cur = ranges[idx].clone(); - let next = ranges[idx + 1].clone(); - if cur.end == next.start { - if idx_iter.next().is_none() { - // We're merging the last element - should_insert_last = false; - } - new_ranges.push(cur.start..next.end); - } else { - // We're *not* merging the last element - should_insert_last = true; - new_ranges.push(cur); - } - } - if should_insert_last { - new_ranges.push(ranges.last().unwrap().clone()); - } - if new_ranges.len() == ranges.len() { - *ranges = new_ranges; - break; - } else { - *ranges = new_ranges; - } - } - - let mut last_end = None; - for range in ranges { - if let Some(last) = last_end { - assert!(range.start > last, "{range:?}"); - } - last_end = Some(range.end); - } + set.chunk_by(|a, b| a + 1 == *b) + .map(|chunk| { + let start = *chunk.first().unwrap(); + let end = *chunk.last().unwrap(); + start..(end + 1) + }) + .collect() } |
