about summary refs log tree commit diff
diff options
context:
space:
mode:
authorKarl Meakin <karl.meakin@arm.com>2025-08-09 23:52:11 +0100
committerKarl Meakin <karl.meakin@arm.com>2025-08-15 01:29:12 +0000
commitc99224536152cff14639b64a3fa4f7a215fd037c (patch)
tree2ce7f3921d001507020f4fc0e9405261bb6c1d97
parent898aff704d6f0d00343f21d31b8b9bfac8e43007 (diff)
downloadrust-c99224536152cff14639b64a3fa4f7a215fd037c.tar.gz
rust-c99224536152cff14639b64a3fa4f7a215fd037c.zip
refactor: Include table sizes in comment at top of `unicode_data.rs`
To make changes in table size obvious from git diffs
-rw-r--r--library/core/src/unicode/unicode_data.rs10
-rw-r--r--src/tools/unicode-table-generator/src/main.rs20
2 files changed, 19 insertions, 11 deletions
diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs
index b57234bbee9..6059f7d6450 100644
--- a/library/core/src/unicode/unicode_data.rs
+++ b/library/core/src/unicode/unicode_data.rs
@@ -1,4 +1,14 @@
 ///! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!
+// Alphabetic      :  1727 bytes, 142759 codepoints in 757 ranges (U+000041 - U+0323B0) using skiplist
+// Case_Ignorable  :  1053 bytes,   2749 codepoints in 452 ranges (U+000027 - U+0E01F0) using skiplist
+// Cased           :   407 bytes,   4578 codepoints in 159 ranges (U+000041 - U+01F18A) using skiplist
+// Cc              :     9 bytes,     65 codepoints in   2 ranges (U+000000 - U+0000A0) using skiplist
+// Grapheme_Extend :   887 bytes,   2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist
+// Lowercase       :   935 bytes,   2569 codepoints in 675 ranges (U+000061 - U+01E944) using bitset
+// N               :   457 bytes,   1911 codepoints in 144 ranges (U+000030 - U+01FBFA) using skiplist
+// Uppercase       :   799 bytes,   1978 codepoints in 656 ranges (U+000041 - U+01F18A) using bitset
+// White_Space     :   256 bytes,     25 codepoints in  10 ranges (U+000009 - U+003001) using cascading
+// Total           :  6530 bytes
 
 #[inline(always)]
 const fn bitset_search<
diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs
index 6cdb82a87bd..c1017142097 100644
--- a/src/tools/unicode-table-generator/src/main.rs
+++ b/src/tools/unicode-table-generator/src/main.rs
@@ -239,6 +239,11 @@ fn main() {
         std::fs::write(&path, generate_tests(&write_location, ranges_by_property)).unwrap();
     }
 
+    let mut table_file = String::new();
+    table_file.push_str(
+        "///! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!\n",
+    );
+
     let mut total_bytes = 0;
     let mut modules = Vec::new();
     for (property, ranges) in ranges_by_property {
@@ -252,8 +257,8 @@ fn main() {
         }
 
         modules.push((property.to_lowercase().to_string(), emitter.file));
-        println!(
-            "{:15}: {} bytes, {} codepoints in {} ranges ({} - {}) using {}",
+        table_file.push_str(&format!(
+            "// {:16}: {:5} bytes, {:6} codepoints in {:3} ranges (U+{:06X} - U+{:06X}) using {}\n",
             property,
             emitter.bytes_used,
             datapoints,
@@ -261,15 +266,10 @@ fn main() {
             ranges.first().unwrap().start,
             ranges.last().unwrap().end,
             emitter.desc,
-        );
+        ));
         total_bytes += emitter.bytes_used;
     }
-
-    let mut table_file = String::new();
-
-    table_file.push_str(
-        "///! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!\n",
-    );
+    table_file.push_str(&format!("// {:16}: {:5} bytes\n", "Total", total_bytes));
 
     // Include the range search function
     table_file.push('\n');
@@ -296,8 +296,6 @@ fn main() {
     }
 
     std::fs::write(&write_location, format!("{}\n", table_file.trim_end())).unwrap();
-
-    println!("Total table sizes: {total_bytes} bytes");
 }
 
 fn version() -> String {