Rollup merge of #144134 - hkBst:cleanup-unicode-table-gen, r=Mark-Simulacrum

Cleanup unicode table gen Fixing clippy warnings and moving to edition 2024.
author: Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com> 2025-07-20 08:56:09 +0200
committer: GitHub <noreply@github.com> 2025-07-20 08:56:09 +0200
commit: 9a927a2f03b4436fbc2371dfd50f9847420be905 (patch)
tree: 92271a235dfa72b7239d5495ed0e8494ee0206ab
parent: d24684ef4f78f25e559eec469a49834c0e3cccf5 (diff)
parent: b0073d92fbb5402ff7bed69f28aab4b34b05a9df (diff)
download: rust-9a927a2f03b4436fbc2371dfd50f9847420be905.tar.gz
rust-9a927a2f03b4436fbc2371dfd50f9847420be905.zip
5 files changed, 38 insertions, 45 deletions
diff --git a/src/tools/unicode-table-generator/Cargo.toml b/src/tools/unicode-table-generator/Cargo.toml
index f8a500922d0..3ca6e9e316f 100644
--- a/src/tools/unicode-table-generator/Cargo.toml
+++ b/src/tools/unicode-table-generator/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "unicode-table-generator"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
diff --git a/src/tools/unicode-table-generator/src/cascading_map.rs b/src/tools/unicode-table-generator/src/cascading_map.rs
index 1eb35e819c0..78a7bba3208 100644
--- a/src/tools/unicode-table-generator/src/cascading_map.rs
+++ b/src/tools/unicode-table-generator/src/cascading_map.rs
@@ -21,7 +21,7 @@ impl RawEmitter {
 
         let points = ranges
             .iter()
-            .flat_map(|r| (r.start..r.end).into_iter().collect::<Vec<u32>>())
+            .flat_map(|r| (r.start..r.end).collect::<Vec<u32>>())
             .collect::<Vec<u32>>();
 
         println!("there are {} points", points.len());
@@ -32,21 +32,20 @@ impl RawEmitter {
             // assert that there is no whitespace over the 0x3000 range.
             assert!(point <= 0x3000, "the highest unicode whitespace value has changed");
             let high_bytes = point as usize >> 8;
-            let codepoints = codepoints_by_high_bytes.entry(high_bytes).or_insert_with(Vec::new);
+            let codepoints = codepoints_by_high_bytes.entry(high_bytes).or_default();
             codepoints.push(point);
         }
 
         let mut bit_for_high_byte = 1u8;
         let mut arms = Vec::<String>::new();
 
-        let mut high_bytes: Vec<usize> =
-            codepoints_by_high_bytes.keys().map(|k| k.clone()).collect();
+        let mut high_bytes: Vec<usize> = codepoints_by_high_bytes.keys().copied().collect();
         high_bytes.sort();
         for high_byte in high_bytes {
             let codepoints = codepoints_by_high_bytes.get_mut(&high_byte).unwrap();
             if codepoints.len() == 1 {
                 let ch = codepoints.pop().unwrap();
-                arms.push(format!("{} => c as u32 == {:#04x}", high_byte, ch));
+                arms.push(format!("{high_byte} => c as u32 == {ch:#04x}"));
                 continue;
             }
             // more than 1 codepoint in this arm
@@ -54,8 +53,7 @@ impl RawEmitter {
                 map[(*codepoint & 0xff) as usize] |= bit_for_high_byte;
             }
             arms.push(format!(
-                "{} => WHITESPACE_MAP[c as usize & 0xff] & {} != 0",
-                high_byte, bit_for_high_byte
+                "{high_byte} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0"
             ));
             bit_for_high_byte <<= 1;
         }
@@ -68,7 +66,7 @@ impl RawEmitter {
         writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
         writeln!(&mut self.file, "    match c as u32 >> 8 {{").unwrap();
         for arm in arms {
-            writeln!(&mut self.file, "        {},", arm).unwrap();
+            writeln!(&mut self.file, "        {arm},").unwrap();
         }
         writeln!(&mut self.file, "        _ => false,").unwrap();
         writeln!(&mut self.file, "    }}").unwrap();
diff --git a/src/tools/unicode-table-generator/src/case_mapping.rs b/src/tools/unicode-table-generator/src/case_mapping.rs
index 00241b7ee0e..9c6454492e7 100644
--- a/src/tools/unicode-table-generator/src/case_mapping.rs
+++ b/src/tools/unicode-table-generator/src/case_mapping.rs
@@ -9,7 +9,7 @@ const INDEX_MASK: u32 = 1 << 22;
 pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
     let mut file = String::new();
 
-    write!(file, "const INDEX_MASK: u32 = 0x{:x};", INDEX_MASK).unwrap();
+    write!(file, "const INDEX_MASK: u32 = 0x{INDEX_MASK:x};").unwrap();
     file.push_str("\n\n");
     file.push_str(HEADER.trim_start());
     file.push('\n');
diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs
index 415db2c4dbc..6cdb82a87bd 100644
--- a/src/tools/unicode-table-generator/src/main.rs
+++ b/src/tools/unicode-table-generator/src/main.rs
@@ -160,15 +160,15 @@ fn load_data() -> UnicodeData {
                 .push(Codepoints::Single(row.codepoint));
         }
 
-        if let Some(mapped) = row.simple_lowercase_mapping {
-            if mapped != row.codepoint {
-                to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0));
-            }
+        if let Some(mapped) = row.simple_lowercase_mapping
+            && mapped != row.codepoint
+        {
+            to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0));
         }
-        if let Some(mapped) = row.simple_uppercase_mapping {
-            if mapped != row.codepoint {
-                to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0));
-            }
+        if let Some(mapped) = row.simple_uppercase_mapping
+            && mapped != row.codepoint
+        {
+            to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0));
         }
     }
 
@@ -196,12 +196,12 @@ fn load_data() -> UnicodeData {
                     .flat_map(|codepoints| match codepoints {
                         Codepoints::Single(c) => c
                             .scalar()
-                            .map(|ch| (ch as u32..ch as u32 + 1))
+                            .map(|ch| ch as u32..ch as u32 + 1)
                             .into_iter()
                             .collect::<Vec<_>>(),
                         Codepoints::Range(c) => c
                             .into_iter()
-                            .flat_map(|c| c.scalar().map(|ch| (ch as u32..ch as u32 + 1)))
+                            .flat_map(|c| c.scalar().map(|ch| ch as u32..ch as u32 + 1))
                             .collect::<Vec<_>>(),
                     })
                     .collect::<Vec<Range<u32>>>(),
@@ -236,7 +236,7 @@ fn main() {
     let ranges_by_property = &unicode_data.ranges;
 
     if let Some(path) = test_path {
-        std::fs::write(&path, generate_tests(&write_location, &ranges_by_property)).unwrap();
+        std::fs::write(&path, generate_tests(&write_location, ranges_by_property)).unwrap();
     }
 
     let mut total_bytes = 0;
@@ -246,9 +246,9 @@ fn main() {
 
         let mut emitter = RawEmitter::new();
         if property == &"White_Space" {
-            emit_whitespace(&mut emitter, &ranges);
+            emit_whitespace(&mut emitter, ranges);
         } else {
-            emit_codepoints(&mut emitter, &ranges);
+            emit_codepoints(&mut emitter, ranges);
         }
 
         modules.push((property.to_lowercase().to_string(), emitter.file));
@@ -288,7 +288,7 @@ fn main() {
         for line in contents.lines() {
             if !line.trim().is_empty() {
                 table_file.push_str("    ");
-                table_file.push_str(&line);
+                table_file.push_str(line);
             }
             table_file.push('\n');
         }
@@ -312,7 +312,7 @@ fn version() -> String {
     let start = readme.find(prefix).unwrap() + prefix.len();
     let end = readme.find(" of the Unicode Standard.").unwrap();
     let version =
-        readme[start..end].split('.').map(|v| v.parse::<u32>().expect(&v)).collect::<Vec<_>>();
+        readme[start..end].split('.').map(|v| v.parse::<u32>().expect(v)).collect::<Vec<_>>();
     let [major, minor, micro] = [version[0], version[1], version[2]];
 
     out.push_str(&format!("({major}, {minor}, {micro});\n"));
@@ -320,7 +320,7 @@ fn version() -> String {
 }
 
 fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
-    let pieces = values.into_iter().map(|b| format!("{:?}, ", b)).collect::<Vec<_>>();
+    let pieces = values.into_iter().map(|b| format!("{b:?}, ")).collect::<Vec<_>>();
     let mut out = String::new();
     let mut line = String::from("\n    ");
     for piece in pieces {
@@ -348,7 +348,7 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
     s.push_str("\nfn main() {\n");
 
     for (property, ranges) in ranges {
-        s.push_str(&format!(r#"    println!("Testing {}");"#, property));
+        s.push_str(&format!(r#"    println!("Testing {property}");"#));
         s.push('\n');
         s.push_str(&format!("    {}_true();\n", property.to_lowercase()));
         s.push_str(&format!("    {}_false();\n", property.to_lowercase()));
@@ -373,7 +373,7 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
         s.push_str("    }\n\n");
     }
 
-    s.push_str("}");
+    s.push('}');
     s
 }
 
@@ -388,7 +388,7 @@ fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool
                 range.start,
             ));
         } else {
-            s.push_str(&format!("        for chn in {:?}u32 {{\n", range));
+            s.push_str(&format!("        for chn in {range:?}u32 {{\n"));
             s.push_str(&format!(
                 "            assert!({}unicode_data::{}::lookup(std::char::from_u32(chn).unwrap()), \"{{:?}}\", chn);\n",
                 if truthy { "" } else { "!" },
@@ -439,7 +439,7 @@ fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
     let mut last_end = None;
     for range in ranges {
         if let Some(last) = last_end {
-            assert!(range.start > last, "{:?}", range);
+            assert!(range.start > last, "{range:?}");
         }
         last_end = Some(range.end);
     }
diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs
index ee94d3c93a6..e9e0efc4594 100644
--- a/src/tools/unicode-table-generator/src/raw_emitter.rs
+++ b/src/tools/unicode-table-generator/src/raw_emitter.rs
@@ -156,10 +156,10 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
     emitter.blank_line();
 
     let mut bitset = emitter.clone();
-    let bitset_ok = bitset.emit_bitset(&ranges).is_ok();
+    let bitset_ok = bitset.emit_bitset(ranges).is_ok();
 
     let mut skiplist = emitter.clone();
-    skiplist.emit_skiplist(&ranges);
+    skiplist.emit_skiplist(ranges);
 
     if bitset_ok && bitset.bytes_used <= skiplist.bytes_used {
         *emitter = bitset;
@@ -174,7 +174,7 @@ pub fn emit_whitespace(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
     emitter.blank_line();
 
     let mut cascading = emitter.clone();
-    cascading.emit_cascading_map(&ranges);
+    cascading.emit_cascading_map(ranges);
     *emitter = cascading;
     emitter.desc = String::from("cascading");
 }
@@ -272,7 +272,7 @@ impl Canonicalized {
         // for canonical when possible.
         while let Some((&to, _)) = mappings
             .iter()
-            .find(|(&to, _)| to == 0)
+            .find(|&(&to, _)| to == 0)
             .or_else(|| mappings.iter().max_by_key(|m| m.1.len()))
         {
             // Get the mapping with the most entries. Currently, no mapping can
@@ -311,10 +311,9 @@ impl Canonicalized {
                     }
                 }
             }
-            assert!(
-                unique_mapping
-                    .insert(to, UniqueMapping::Canonical(canonical_words.len()))
-                    .is_none()
+            assert_eq!(
+                unique_mapping.insert(to, UniqueMapping::Canonical(canonical_words.len())),
+                None
             );
             canonical_words.push(to);
 
@@ -340,14 +339,10 @@ impl Canonicalized {
         // We'll probably always have some slack though so this loop will still
         // be needed.
         for &w in unique_words {
-            if !unique_mapping.contains_key(&w) {
-                assert!(
-                    unique_mapping
-                        .insert(w, UniqueMapping::Canonical(canonical_words.len()))
-                        .is_none()
-                );
+            unique_mapping.entry(w).or_insert_with(|| {
                 canonical_words.push(w);
-            }
+                UniqueMapping::Canonical(canonical_words.len())
+            });
         }
         assert_eq!(canonicalized_words.len() + canonical_words.len(), unique_words.len());
         assert_eq!(unique_mapping.len(), unique_words.len());
author	Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com>	2025-07-20 08:56:09 +0200
committer	GitHub <noreply@github.com>	2025-07-20 08:56:09 +0200
commit	9a927a2f03b4436fbc2371dfd50f9847420be905 (patch)
tree	92271a235dfa72b7239d5495ed0e8494ee0206ab
parent	d24684ef4f78f25e559eec469a49834c0e3cccf5 (diff)
parent	b0073d92fbb5402ff7bed69f28aab4b34b05a9df (diff)
download	rust-9a927a2f03b4436fbc2371dfd50f9847420be905.tar.gz rust-9a927a2f03b4436fbc2371dfd50f9847420be905.zip