about summary refs log tree commit diff
path: root/library/core/src
diff options
context:
space:
mode:
authorStuart Cook <Zalathar@users.noreply.github.com>2025-09-03 23:08:06 +1000
committerGitHub <noreply@github.com>2025-09-03 23:08:06 +1000
commit8b790d7b00fd498ad979eefcbb993850b0caf4b5 (patch)
tree185460172fcc7e01c9e6dbf86002160ae9ff8ff3 /library/core/src
parentf4b946a14788df30b693a28e96aa18c9bee618ad (diff)
parentc3ce0796544152460554d8b8db4e56528fe362db (diff)
downloadrust-8b790d7b00fd498ad979eefcbb993850b0caf4b5.tar.gz
rust-8b790d7b00fd498ad979eefcbb993850b0caf4b5.zip
Rollup merge of #145414 - Kmeakin:km/unicode-table-refactors, r=joshtriplett,tgross35
unicode-table-generator refactors

Split off from https://github.com/rust-lang/rust/pull/145219
Diffstat (limited to 'library/core/src')
-rw-r--r--library/core/src/unicode/unicode_data.rs20
1 files changed, 16 insertions, 4 deletions
diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs
index 55f64f1e96e..f78983bc339 100644
--- a/library/core/src/unicode/unicode_data.rs
+++ b/library/core/src/unicode/unicode_data.rs
@@ -1,4 +1,16 @@
 ///! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!
+// Alphabetic      :  1727 bytes, 142759 codepoints in 757 ranges (U+000041 - U+0323B0) using skiplist
+// Case_Ignorable  :  1053 bytes,   2749 codepoints in 452 ranges (U+000027 - U+0E01F0) using skiplist
+// Cased           :   407 bytes,   4578 codepoints in 159 ranges (U+000041 - U+01F18A) using skiplist
+// Cc              :     9 bytes,     65 codepoints in   2 ranges (U+000000 - U+0000A0) using skiplist
+// Grapheme_Extend :   887 bytes,   2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist
+// Lowercase       :   935 bytes,   2569 codepoints in 675 ranges (U+000061 - U+01E944) using bitset
+// N               :   457 bytes,   1911 codepoints in 144 ranges (U+000030 - U+01FBFA) using skiplist
+// Uppercase       :   799 bytes,   1978 codepoints in 656 ranges (U+000041 - U+01F18A) using bitset
+// White_Space     :   256 bytes,     25 codepoints in  10 ranges (U+000009 - U+003001) using cascading
+// to_lower        : 11484 bytes
+// to_upper        : 13432 bytes
+// Total           : 31446 bytes
 
 #[inline(always)]
 const fn bitset_search<
@@ -747,7 +759,7 @@ pub mod conversions {
         }
     }
 
-    static LOWERCASE_TABLE: &[(char, u32)] = &[
+    static LOWERCASE_TABLE: &[(char, u32); 1434] = &[
         ('\u{c0}', 224), ('\u{c1}', 225), ('\u{c2}', 226), ('\u{c3}', 227), ('\u{c4}', 228),
         ('\u{c5}', 229), ('\u{c6}', 230), ('\u{c7}', 231), ('\u{c8}', 232), ('\u{c9}', 233),
         ('\u{ca}', 234), ('\u{cb}', 235), ('\u{cc}', 236), ('\u{cd}', 237), ('\u{ce}', 238),
@@ -1097,11 +1109,11 @@ pub mod conversions {
         ('\u{1e921}', 125251),
     ];
 
-    static LOWERCASE_TABLE_MULTI: &[[char; 3]] = &[
+    static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[
         ['i', '\u{307}', '\u{0}'],
     ];
 
-    static UPPERCASE_TABLE: &[(char, u32)] = &[
+    static UPPERCASE_TABLE: &[(char, u32); 1526] = &[
         ('\u{b5}', 924), ('\u{df}', 4194304), ('\u{e0}', 192), ('\u{e1}', 193), ('\u{e2}', 194),
         ('\u{e3}', 195), ('\u{e4}', 196), ('\u{e5}', 197), ('\u{e6}', 198), ('\u{e7}', 199),
         ('\u{e8}', 200), ('\u{e9}', 201), ('\u{ea}', 202), ('\u{eb}', 203), ('\u{ec}', 204),
@@ -1474,7 +1486,7 @@ pub mod conversions {
         ('\u{1e941}', 125215), ('\u{1e942}', 125216), ('\u{1e943}', 125217),
     ];
 
-    static UPPERCASE_TABLE_MULTI: &[[char; 3]] = &[
+    static UPPERCASE_TABLE_MULTI: &[[char; 3]; 102] = &[
         ['S', 'S', '\u{0}'], ['\u{2bc}', 'N', '\u{0}'], ['J', '\u{30c}', '\u{0}'],
         ['\u{399}', '\u{308}', '\u{301}'], ['\u{3a5}', '\u{308}', '\u{301}'],
         ['\u{535}', '\u{552}', '\u{0}'], ['H', '\u{331}', '\u{0}'], ['T', '\u{308}', '\u{0}'],