about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/libcore/unicode/mod.rs28
-rw-r--r--src/libcore/unicode/unicode_data.rs1347
-rw-r--r--src/tools/unicode-table-generator/src/main.rs28
-rw-r--r--src/tools/unicode-table-generator/src/raw_emitter.rs240
4 files changed, 1211 insertions, 432 deletions
diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs
index d1c68863e16..2a41685a480 100644
--- a/src/libcore/unicode/mod.rs
+++ b/src/libcore/unicode/mod.rs
@@ -34,12 +34,19 @@ pub use unicode_data::uppercase::lookup as Uppercase;
 pub use unicode_data::white_space::lookup as White_Space;
 
 #[inline(always)]
-fn range_search<const N: usize, const CHUNK_SIZE: usize, const N1: usize, const N2: usize>(
+fn range_search<
+    const N: usize,
+    const CHUNK_SIZE: usize,
+    const N1: usize,
+    const CANONICAL: usize,
+    const CANONICALIZED: usize,
+>(
     needle: u32,
     chunk_idx_map: &[u8; N],
     (last_chunk_idx, last_chunk_mapping): (u16, u8),
     bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
-    bitset: &[u64; N2],
+    bitset_canonical: &[u64; CANONICAL],
+    bitset_canonicalized: &[(u8, u8); CANONICALIZED],
 ) -> bool {
     let bucket_idx = (needle / 64) as usize;
     let chunk_map_idx = bucket_idx / CHUNK_SIZE;
@@ -53,7 +60,20 @@ fn range_search<const N: usize, const CHUNK_SIZE: usize, const N1: usize, const
     } else {
         chunk_idx_map[chunk_map_idx]
     };
-    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
-    let word = bitset[(idx as usize)];
+    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
+    let word = if idx < CANONICAL {
+        bitset_canonical[idx]
+    } else {
+        let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
+        let mut word = bitset_canonical[real_idx as usize];
+        let should_invert = mapping & (1 << 7) != 0;
+        if should_invert {
+            word = !word;
+        }
+        // Unset the inversion bit
+        let rotate_by = mapping & !(1 << 7);
+        word = word.rotate_left(rotate_by as u32);
+        word
+    };
     (word & (1 << (needle % 64) as u64)) != 0
 }
diff --git a/src/libcore/unicode/unicode_data.rs b/src/libcore/unicode/unicode_data.rs
index 7a72f080e33..555a0437f7b 100644
--- a/src/libcore/unicode/unicode_data.rs
+++ b/src/libcore/unicode/unicode_data.rs
@@ -5,120 +5,261 @@ pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0);
 
 #[rustfmt::skip]
 pub mod alphabetic {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (393, 67);
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (393, 11);
     static BITSET_CHUNKS_MAP: [u8; 393] = [
-        8, 60, 56, 38, 16, 33, 34, 24, 35, 50, 41, 49, 37, 39, 20, 66, 9, 0, 6, 0, 0, 0, 36, 18,
-        26, 0, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 70, 74, 74, 74, 74, 74, 74, 74, 74,
-        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
-        74, 74, 74, 74, 74, 74, 74, 74, 74, 73, 74, 74, 52, 15, 13, 21, 74, 74, 74, 74, 74, 74, 74,
-        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 2, 48, 65, 10, 32, 7, 53, 64, 31, 19, 44, 5, 42, 27, 45, 30, 22, 29, 28, 4,
-        74, 68, 46, 0, 0, 0, 0, 0, 74, 74, 17, 0, 0, 0, 0, 0, 0, 0, 74, 43, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 74, 23, 0, 11, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 72, 74,
-        74, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 61, 0, 0, 0, 0, 47, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 51, 55, 0, 0, 0, 0, 14, 3, 0, 0, 57, 0, 0, 25, 1, 0, 0, 0, 0, 0, 0,
-        0, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
-        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
-        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
-        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 59, 74, 74, 74, 74, 74, 74, 74,
-        63, 40, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 54, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
-        74, 74, 74, 71, 0, 0, 0, 0, 0, 0, 74, 12, 0, 0, 74, 74, 74, 74, 74, 74, 74, 74, 74,
+        73, 17, 1, 52, 39, 48, 49, 58, 50, 26, 0, 29, 51, 57, 60, 12, 63, 72, 66, 72, 72, 72, 55,
+        53, 42, 72, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, 23,
+        38, 36, 61, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 72, 72, 72,
+        72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 70, 28, 14, 64, 47, 65, 3, 16, 46, 40,
+        32, 67, 30, 43, 24, 54, 35, 45, 44, 68, 4, 10, 31, 72, 72, 72, 72, 72, 4, 4, 59, 72, 72, 72,
+        72, 72, 72, 72, 4, 33, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 4,
+        34, 72, 62, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 21, 72, 72, 72, 72, 72, 72, 72, 72,
+        72, 72, 72, 72, 72, 72, 72, 72, 72, 15, 18, 72, 72, 72, 72, 25, 72, 72, 72, 72, 72, 72, 72,
+        72, 72, 72, 72, 27, 22, 72, 72, 72, 72, 37, 69, 72, 72, 19, 72, 72, 41, 71, 72, 72, 72, 72,
+        72, 72, 72, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4,
+        4, 4, 4, 13, 56, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7,
+        72, 72, 72, 72, 72, 72, 4, 74, 72, 72, 4, 4, 4, 4, 4, 4, 4, 4, 4,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 8]; 75] = [
-        [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 195, 205, 10, 0], [0, 0, 0, 0, 254, 254, 254, 254],
-        [0, 0, 0, 58, 0, 0, 0, 0], [0, 0, 0, 92, 0, 0, 66, 0], [0, 0, 69, 0, 199, 6, 195, 93],
-        [0, 0, 182, 52, 0, 0, 0, 0], [0, 0, 233, 19, 216, 108, 237, 21],
-        [0, 107, 103, 180, 254, 254, 254, 254], [0, 148, 30, 0, 172, 226, 9, 0],
-        [0, 184, 254, 125, 106, 221, 145, 29], [0, 254, 0, 0, 254, 247, 39, 68],
-        [35, 0, 0, 0, 0, 0, 0, 0], [48, 80, 254, 169, 206, 123, 189, 139],
-        [53, 0, 0, 0, 129, 17, 0, 0], [57, 149, 254, 65, 223, 254, 249, 187],
-        [59, 54, 185, 203, 171, 191, 161, 117], [62, 0, 0, 0, 0, 0, 0, 0],
-        [63, 0, 0, 0, 0, 0, 0, 0], [88, 122, 31, 46, 89, 74, 20, 0],
-        [95, 131, 168, 105, 254, 254, 254, 82], [95, 179, 145, 86, 211, 204, 254, 56],
-        [101, 0, 225, 146, 151, 2, 217, 45], [101, 37, 0, 60, 65, 160, 18, 0],
-        [109, 16, 127, 38, 1, 192, 124, 0], [111, 135, 113, 0, 0, 0, 0, 0],
-        [119, 253, 224, 175, 193, 254, 227, 195], [134, 0, 202, 51, 163, 43, 0, 0],
-        [143, 190, 91, 0, 153, 218, 24, 0], [144, 246, 32, 101, 0, 0, 0, 0],
-        [145, 4, 97, 0, 55, 0, 0, 0], [150, 94, 37, 85, 102, 0, 157, 0],
-        [154, 34, 254, 110, 0, 84, 0, 0], [158, 87, 164, 118, 162, 67, 159, 23],
-        [166, 42, 165, 72, 167, 177, 126, 76], [176, 246, 234, 174, 254, 254, 254, 254],
-        [213, 239, 254, 77, 209, 64, 142, 238], [225, 101, 207, 89, 98, 81, 208, 10],
-        [230, 215, 254, 152, 246, 248, 71, 104], [232, 83, 147, 1, 188, 13, 178, 70],
-        [237, 254, 254, 254, 254, 254, 254, 254], [253, 254, 254, 254, 254, 254, 254, 254],
-        [254, 6, 100, 50, 75, 90, 254, 28], [254, 7, 0, 0, 0, 0, 0, 0],
-        [254, 9, 75, 75, 49, 0, 0, 0], [254, 41, 254, 8, 0, 0, 141, 33],
-        [254, 62, 254, 254, 254, 3, 0, 0], [254, 121, 36, 0, 0, 0, 0, 0],
-        [254, 210, 254, 25, 136, 251, 71, 243], [254, 214, 231, 99, 79, 78, 183, 27],
-        [254, 235, 140, 241, 240, 26, 228, 128], [254, 242, 170, 252, 138, 245, 254, 254],
-        [254, 254, 15, 132, 254, 254, 254, 254], [254, 254, 196, 114, 201, 44, 0, 0],
-        [254, 254, 197, 254, 254, 254, 254, 254], [254, 254, 220, 173, 186, 212, 219, 14],
-        [254, 254, 250, 254, 194, 229, 156, 73], [254, 254, 254, 5, 254, 12, 0, 0],
-        [254, 254, 254, 22, 9, 0, 0, 0], [254, 254, 254, 35, 254, 254, 254, 254],
-        [254, 254, 254, 61, 0, 155, 222, 181], [254, 254, 254, 116, 0, 0, 0, 0],
-        [254, 254, 254, 254, 37, 200, 254, 254], [254, 254, 254, 254, 84, 254, 254, 254],
-        [254, 254, 254, 254, 95, 47, 0, 0], [254, 254, 254, 254, 133, 246, 244, 112],
-        [254, 254, 254, 254, 236, 130, 137, 120], [254, 254, 254, 254, 254, 11, 0, 0],
-        [254, 254, 254, 254, 254, 254, 25, 0], [254, 254, 254, 254, 254, 254, 198, 115],
-        [254, 254, 254, 254, 254, 254, 254, 0], [254, 254, 254, 254, 254, 254, 254, 40],
-        [254, 254, 254, 254, 254, 254, 254, 96], [254, 254, 254, 254, 254, 254, 254, 125],
-        [254, 254, 254, 254, 254, 254, 254, 254],
+        [0, 16, 16, 16, 16, 16, 16, 16], [16, 16, 7, 16, 196, 175, 130, 69],
+        [16, 16, 11, 16, 16, 16, 16, 16], [16, 16, 12, 96, 227, 48, 244, 244],
+        [16, 16, 16, 16, 16, 16, 16, 16], [16, 16, 16, 16, 16, 16, 16, 226],
+        [16, 16, 16, 16, 16, 16, 16, 229], [16, 16, 16, 16, 16, 16, 16, 241],
+        [16, 16, 16, 16, 16, 16, 16, 244], [16, 16, 16, 16, 16, 16, 19, 97],
+        [16, 16, 16, 16, 16, 16, 249, 244], [16, 16, 16, 16, 16, 252, 244, 244],
+        [16, 16, 16, 16, 17, 108, 113, 101], [16, 16, 16, 16, 24, 16, 16, 16],
+        [16, 16, 16, 16, 201, 4, 207, 94], [16, 16, 16, 16, 240, 159, 16, 16],
+        [16, 16, 16, 16, 243, 51, 244, 244], [16, 16, 16, 62, 244, 129, 170, 193],
+        [16, 16, 16, 213, 244, 244, 244, 244], [16, 16, 16, 232, 16, 29, 244, 244],
+        [16, 16, 16, 253, 16, 16, 16, 16], [16, 16, 16, 254, 242, 244, 244, 244],
+        [16, 16, 205, 145, 221, 222, 5, 31], [16, 16, 208, 237, 16, 16, 16, 16],
+        [16, 45, 16, 28, 244, 244, 117, 224], [16, 102, 42, 244, 244, 244, 244, 244],
+        [16, 179, 116, 182, 181, 36, 219, 245], [16, 199, 143, 200, 114, 184, 16, 16],
+        [16, 203, 16, 249, 112, 185, 234, 183], [16, 204, 177, 87, 74, 73, 202, 37],
+        [16, 223, 15, 54, 210, 81, 16, 38], [16, 231, 16, 16, 16, 212, 244, 244],
+        [16, 242, 210, 210, 53, 244, 244, 244], [16, 250, 244, 244, 244, 244, 244, 244],
+        [22, 240, 244, 25, 217, 133, 216, 244], [22, 244, 173, 14, 125, 228, 167, 49],
+        [52, 247, 16, 142, 163, 103, 195, 115], [55, 244, 244, 244, 107, 187, 244, 244],
+        [59, 123, 16, 217, 171, 16, 1, 153], [61, 56, 152, 161, 191, 155, 134, 98],
+        [80, 20, 248, 50, 239, 70, 236, 244], [93, 111, 95, 244, 244, 244, 244, 244],
+        [100, 0, 172, 192, 157, 16, 9, 218], [110, 244, 160, 251, 136, 47, 244, 244],
+        [119, 154, 82, 244, 127, 168, 35, 244], [120, 4, 40, 22, 244, 244, 244, 244],
+        [124, 84, 240, 77, 88, 244, 6, 244], [128, 41, 16, 233, 244, 24, 244, 244],
+        [131, 79, 137, 99, 135, 64, 132, 34], [139, 46, 138, 68, 140, 148, 105, 71],
+        [147, 4, 178, 146, 16, 16, 16, 16], [173, 22, 10, 239, 86, 75, 214, 238],
+        [176, 166, 16, 126, 4, 2, 234, 90], [188, 244, 244, 244, 244, 244, 244, 244],
+        [190, 27, 85, 244, 57, 244, 244, 244], [197, 198, 16, 72, 164, 63, 118, 180],
+        [206, 16, 16, 16, 16, 16, 16, 16], [215, 76, 121, 186, 194, 30, 149, 67],
+        [225, 32, 106, 43, 186, 156, 104, 244], [231, 244, 244, 244, 244, 244, 244, 244],
+        [243, 109, 141, 91, 16, 16, 16, 235], [243, 150, 190, 78, 165, 162, 16, 58],
+        [244, 16, 244, 244, 16, 3, 44, 65], [244, 122, 209, 244, 144, 174, 242, 244],
+        [244, 151, 16, 229, 21, 169, 190, 39], [244, 244, 8, 230, 211, 92, 206, 33],
+        [244, 244, 13, 26, 244, 244, 244, 244], [244, 244, 66, 244, 158, 223, 218, 83],
+        [244, 244, 244, 23, 244, 244, 189, 244], [244, 244, 244, 60, 244, 244, 244, 244],
+        [244, 244, 244, 244, 16, 16, 16, 16], [244, 244, 244, 244, 218, 18, 238, 244],
+        [244, 244, 244, 244, 244, 244, 244, 244], [244, 246, 89, 220, 16, 16, 16, 16],
+        [253, 244, 244, 244, 244, 244, 244, 244],
     ];
-    static BITSET: [u64; 255] = [
-        0, 1, 7, 15, 17, 31, 63, 127, 179, 511, 1023, 2047, 2191, 4079, 4087, 8191, 8319, 16384,
-        65535, 131071, 262143, 4128527, 4194303, 8461767, 24870911, 67108863, 134217727, 276824575,
-        335593502, 486341884, 536805376, 536870911, 553648127, 1056964608, 1073692671, 1073741823,
-        1140785663, 2147483647, 4026540127, 4294934783, 8589934591, 15032387515, 64548249055,
-        68191066527, 68719476735, 115913785343, 137438953215, 1095220854783, 1099511627711,
-        1099511627775, 2199023190016, 2199023255551, 4398046511103, 8641373536127, 8791831609343,
-        8795690369023, 8796093022207, 13198434443263, 17592186044415, 35184321757183,
-        70368744112128, 88094074470339, 140737488355327, 140737488355328, 141836999983103,
-        281474976710655, 281474976710656, 563017343310239, 844472174772224, 875211255709695,
-        1125625028935679, 1125899906842623, 1688915364814303, 2119858418286774, 2251795522912255,
-        2251799813685247, 3377704004976767, 3509778554814463, 3905461007941631, 4503595333443583,
-        4503599627370495, 8796093022142464, 9006649498927104, 9007192812290047, 9007199254740991,
-        15762594400829440, 17169970223906821, 17732925109967239, 18014398491652207,
-        18014398509481983, 20266198323101936, 36027697507139583, 36028792723996672,
-        36028792723996703, 36028792728190975, 36028797018963967, 72057594037927935,
-        90071992547409919, 143851303137705983, 144053615424700415, 144115188075855868,
-        144115188075855871, 288230371860938751, 297241973452963840, 301749971126844416,
-        319718190147960832, 576460743713488896, 576460743847706622, 576460752303359999,
-        576460752303423486, 576460752303423487, 790380184120328175, 1152640029630136575,
-        1152917029519358975, 1152921504591118335, 1152921504606845055, 1152921504606846975,
-        1153765996922689951, 2161727885562420159, 2251241253188403424, 2295745090394464220,
-        2305570330330005503, 2305843004918726656, 2305843004919250943, 2305843009196916483,
-        2305843009213693951, 3457638613854978030, 4323455298678290390, 4557642822898941951,
-        4575692405780512767, 4611686017001275199, 4611686018360336384, 4611686018427322368,
-        4611686018427387903, 4656722014700830719, 6843210385291930244, 6881498031078244479,
-        6908521828386340863, 8935141660164089791, 8935423131384840192, 9168765891372858879,
-        9169328841326329855, 9187201948305063935, 9187343239835811327, 9216616637413720063,
-        9223372036854775807, 9223372041149743103, 9223372586610589696, 9223934986808197120,
-        10371930679322607615, 10502394331027995967, 11078855083321979519, 11241233151490523135,
-        13006395723845991295, 13258596753222922239, 13609596598936928288, 13834776580305453567,
-        13907115649320091647, 14082190885810440174, 14123225865944680428, 16212958624174047247,
-        16412803692974677999, 16424062692043104238, 16424062692043104239, 16424062692043243502,
-        16424625641996804079, 16429129241624174575, 16717361816799141887, 16717361816799216127,
-        16788293510930366511, 17005555242810474495, 17293822569102704639, 17581979622616071300,
-        17870283321271910397, 17870283321406070975, 17870283321406128127, 17978369712463020031,
-        18158513764145585631, 18158781978395017215, 18194542490281852927, 18410715276682199039,
-        18428729675200069631, 18428729675200069632, 18433233274827440127, 18437455399478099968,
-        18437736870159843328, 18437736874452713471, 18437736874454812668, 18442240474082181119,
-        18444492273895866367, 18445618173802708993, 18446181192473632767, 18446216308128218879,
-        18446462598732840928, 18446462598732840959, 18446462598732840960, 18446462599806582783,
-        18446462615912710143, 18446462667452317695, 18446463149025525759, 18446463629525450752,
-        18446463698244468735, 18446464796682337663, 18446466966713532671, 18446466996645134335,
-        18446466996779287551, 18446471394825862144, 18446471394825863167, 18446480190918885375,
-        18446498607738650623, 18446532967477018623, 18446602782178705022, 18446603336221163519,
-        18446603336221196287, 18446638520593285119, 18446673709243564031, 18446708893632430079,
-        18446740770879700992, 18446741595513422027, 18446741874686295551, 18446743249075830783,
-        18446743798965862398, 18446744056529672000, 18446744060816261120, 18446744068886102015,
-        18446744069414584320, 18446744069414601696, 18446744069414617087, 18446744069414649855,
-        18446744069456527359, 18446744069548736512, 18446744069548802046, 18446744069683019775,
-        18446744069951455231, 18446744070421282815, 18446744070446333439, 18446744070475743231,
-        18446744070488326143, 18446744071553646463, 18446744071562067967, 18446744073696837631,
-        18446744073701162813, 18446744073707454463, 18446744073709027328, 18446744073709355007,
-        18446744073709419615, 18446744073709486080, 18446744073709520895, 18446744073709543424,
-        18446744073709550079, 18446744073709550595, 18446744073709551579, 18446744073709551599,
-        18446744073709551614, 18446744073709551615,
+    static BITSET_CANONICAL: [u64; 186] = [
+        0b1111111111111111111111111111111111111111111111111111111111111110,
+        0b1111111111111111111111111111111111111111111111111111100111111111,
+        0b1111111111111111111111111111111111111111111111111110000000000000,
+        0b1111111111111111111111111111111111111111111111111000011111111111,
+        0b1111111111111111111111111111111111111111111111110000000000000000,
+        0b1111111111111111111111011111111111111111111111111111110111111111,
+        0b1100000011111111111111111111111111111111111111111111111111111111,
+        0b1111111111111111111111111111111111111111111111111111110000000011,
+        0b1111111111111111111111111111111100011111111111111111111111111111,
+        0b1111111111111111111111111111111100000000000000000111111111111111,
+        0b1111111111111111000001111111111111111111111111111111111111111111,
+        0b1111111111111111000000000000001111111111111111111111111111111111,
+        0b1111111111111111000000000000000000111111111111111111111111111111,
+        0b1111111111000000000000000000000000000000000000000000000000000000,
+        0b1000000000000000000000000000000011111111111111111111111111111111,
+        0b0000000111111111111111111111111111111111111111111111111111111100,
+        0b1111111111111111111111111111111111111111111111111111111111111111,
+        0b1111111111111111111111111111111100111111001111111111111111111111,
+        0b1111111111111111000000111111111111111111111111110000001111111111,
+        0b1111111111111111000000000000111111111111111111111111111111111111,
+        0b0001111111111111111111111111111100000000000000000000000000000000,
+        0b0000011111111111111111111111111000000000000000000000000000000000,
+        0b0000000111111111111111111111111111111111111111111111111111111111,
+        0b0000000001111111111111111111111100000000000000000000000000000000,
+        0b0000000000011111111111111111111111111111111111111111111111111111,
+        0b0000000000000000001111111111111111111111111111110000000000000000,
+        0b0000000000000000000000111111111111111111111111111111111111111111,
+        0b0000000000000000000000000000000000000000000000000000000000010001,
+        0b0000000000000000000000000000000000000000000000000000000010110011,
+        0b0000000000000000000000000000000000000000000000000000100010001111,
+        0b0000000000000000000000000000000000000000000000000000111111101111,
+        0b0000000000000000000000000000000000000000000000000000111111110111,
+        0b0000000000000000000000000000000000000000000000000010000001111111,
+        0b0000000000000000000000000000000000000000001111101111111100001111,
+        0b0000000000000000000000000000000000000000100000010001110111000111,
+        0b0000000000000000000000000000000000000001011110110111111111111111,
+        0b0000000000000000000000000000000000000111111111111111111111111111,
+        0b0000000000000000000000000000000000010000100000000000000111111111,
+        0b0000000000000000000000000000000000010100000000001100000000011110,
+        0b0000000000000000000000000000000000011100111111001111110011111100,
+        0b0000000000000000000000000000000000100000111111111111111111111111,
+        0b0000000000000000000000000000000000111111111111110011111111111111,
+        0b0000000000000000000000000000000001000011111111110000000111111111,
+        0b0000000000000000000000000000000011110000000000000010000001011111,
+        0b0000000000000000000000000000000011111111111111111000000011111111,
+        0b0000000000000000000000000000001110000000000000000000011110111011,
+        0b0000000000000000000000000000111100000111011000000001110111011111,
+        0b0000000000000000000000000000111111100000100000010001100110011111,
+        0b0000000000000000000000000000111111111111111111111111111111111111,
+        0b0000000000000000000000000001101011111100111111111111111111111111,
+        0b0000000000000000000000000001111111111111111111111111111011111111,
+        0b0000000000000000000000001111111100000000001111111111111111111111,
+        0b0000000000000000000000001111111111111111111111111111111110111111,
+        0b0000000000000000000000001111111111111111111111111111111111111111,
+        0b0000000000000000000000011111111111111111111111110000000000000000,
+        0b0000000000000000000001111101101111111001111111111111111101111111,
+        0b0000000000000000000001111111111100000001111111111111111111111111,
+        0b0000000000000000000001111111111111100111111111111111111111111111,
+        0b0000000000000000000001111111111111111111111111111111111111111111,
+        0b0000000000000000000011000000000011111111111111110001111111111111,
+        0b0000000000000000000011111111111111111111111111111111111111111111,
+        0b0000000000000000000111111111111111111100111111111111111111111111,
+        0b0000000000000000010100000001111100000000000000111111111111000011,
+        0b0000000000000000100000001111111111111111111111111111111111111111,
+        0b0000000000000010000000000000111110110000110000000001100110011111,
+        0b0000000000000011000000000000101100000000000000000000000000000000,
+        0b0000000000000011000110111111111111111111111111111111111111111111,
+        0b0000000000000011111111111011111111111111111111111111111111111111,
+        0b0000000000000110000000000000111101000000011000000001110111011111,
+        0b0000000000000111100001111111111111111111111111110000000010110110,
+        0b0000000000000111111111111111111100000000001111111111111111111111,
+        0b0000000000001100000000000000000011111111010111111000000001111111,
+        0b0000000000001100011110000001111111111111111111111111111111111111,
+        0b0000000000001101110111111111111100000000000011111111111111111111,
+        0b0000000000001111111111111111111100000000000011111101111111111111,
+        0b0000000000011111001111111111111111111111111111110000000000000000,
+        0b0000000000011111111111111111111001111111111111111111111111111111,
+        0b0000000000110111111111111111111100000000000000000000000000000000,
+        0b0000000000111100111111111111111100111000000000000000000000000101,
+        0b0000000000111111000000000000000001011110000000100001100110000111,
+        0b0000000000111111111111111111111111111110111011111111000001101111,
+        0b0000000001000111111111111111111111111111111111110000000011110000,
+        0b0000000001111111111111101111111111111111111111001111111111111111,
+        0b0000000001111111111111111111111100000000000000000000000000011111,
+        0b0000000001111111111111111111111100000000001111111111111111111111,
+        0b0000000100111111111111111111111111111111111111111111111111111111,
+        0b0000000111111111000011111111111101111111111111111111111111111111,
+        0b0000000111111111110001111111111111111111111111111111111111111111,
+        0b0000001111111111111111111111111100000000001111111111111111111111,
+        0b0000010000100000000001000000000000000000000000000000000000000000,
+        0b0000010000110000000001111111111111111111111111111111110000000000,
+        0b0000010001101111110111100000000000000000000000000000000000000000,
+        0b0000011111111111111111111111111111111111111111110000011111111111,
+        0b0000101011110111111111101001011011111111111111111111111111101111,
+        0b0000111111111111000000000000000000000000000000000000000011111111,
+        0b0000111111111111111110111110111000001111111111111111101111111111,
+        0b0000111111111111111111111111111111111111000011111111111111111111,
+        0b0000111111111111111111111111111111111111111111111111100001111111,
+        0b0001000000000011000000000000111110110000100000000101100110011111,
+        0b0001111000000000000000000000111100000000000000010001101110111111,
+        0b0001111100111110000000111111111000000000000000000000000011100000,
+        0b0001111111011100000111111111111100001111110011110001111111011100,
+        0b0001111111111111000001111111111111111111111111111111111111111111,
+        0b0001111111111111111111111111111100000000000001111111111111111111,
+        0b0001111111111111111111111111111111111110111111111111111100000011,
+        0b0010111111111011111111111111111111111100011111111111111111101110,
+        0b0011101111111111111111111010111111111111111111111111011111010110,
+        0b0011111110000000000111111111111111111111111111111111111111111111,
+        0b0011111111111111111111111111111110101010111111110011111100111111,
+        0b0011111111111111111111111111111111111100000000001110000000000000,
+        0b0100000010011111111111111111111111111111111110111111111111111111,
+        0b0101111011110111111101111001011010101010100101101110101010000100,
+        0b0101111101111111111111011111111111100000111110000000000001111111,
+        0b0101111111011111111111111111111111111111111111111111111111111111,
+        0b0111101111111111111111111111111111011111110111111110011110111111,
+        0b0111110000000000111111111111111100000000000000001000000000000000,
+        0b0111111100111101111111111111111111111111111111110011110111111111,
+        0b0111111100111111111111111111111111111111111111111111111111111111,
+        0b0111111101111111011111110111111100000000011111111111111111111111,
+        0b0111111101111111111111111111111111111111111111111111110111111111,
+        0b0111111111100111111111111111111111111111111111111111111111111111,
+        0b1000000000000000000000001000000000000000000000000000000000000000,
+        0b1000000000000010000000000000000000000000000000000000000000000000,
+        0b1000111111110000011111111111111111111111111111111111111111111111,
+        0b1001000110111111111111111111111111111111111111111111110100111111,
+        0b1001100110111111111111111111111111111111011011111111001001111111,
+        0b1001110000000000111000011111111000011111111011111111111111111111,
+        0b1011010001111111111111111111111111111111111111111111101101111111,
+        0b1011011111111111111111110111111111111111111111111110111111111111,
+        0b1011110011011111000000000000000000000000000000000000000000100000,
+        0b1011111111111111000000000000000000000000000000000000000111111111,
+        0b1100001101101101111111011111111111111111111110011000011111101110,
+        0b1100001111111111110001110001100011010110001111011100011111101100,
+        0b1110000011111111111111111111100000000000000000000000000000001111,
+        0b1110001111000101111111011111111111111111111110011001111111101111,
+        0b1110001111101101111111011111111111111111111110011001111111101110,
+        0b1110001111101101111111011111111111111111111110011001111111101111,
+        0b1110001111101101111111011111111111111111111110111011111111101110,
+        0b1110001111101111111111011111111111111111111111011101111111101111,
+        0b1110001111111111111111011111111111111111111111011101111111101111,
+        0b1110011111111111111111111111111111111111111111011101111111111111,
+        0b1110011111111111111111111111111111111111111111110000000111111111,
+        0b1110100011111100000000000000000000000000000000000000000000101111,
+        0b1110101111111111110111100110010011011111111111111111111111111111,
+        0b1111001111111111101111010101000000111110001011111111110010000100,
+        0b1111011111111111111111111111111111110111111111111111111111111101,
+        0b1111011111111111111111111111111111111111111111110010000010111111,
+        0b1111100101111111111111111111111111111111111111111111111111111111,
+        0b1111110000000000000000000000111110000000111100000101110111011111,
+        0b1111110000000000111100111111111111111111111111111111111111111111,
+        0b1111110001111111111111111111111100000000000000000011111111111111,
+        0b1111111111011111000000000000000000000000000000000000000000000000,
+        0b1111111111011111111111111111111100000000000000000000000000000000,
+        0b1111111111100000000000000000000000000000000000000000011111111100,
+        0b1111111111111100000000000000000000000000000000000000000000000001,
+        0b1111111111111110000000000000111111111111111000011101111111111111,
+        0b1111111111111110000111111111111111111111111111111111111011111111,
+        0b1111111111111110111111111111111111111111111111111111111111100000,
+        0b1111111111111111000000001000000000011111111111111111111111111111,
+        0b1111111111111111000000001111000000000000000001110000000000000000,
+        0b1111111111111111000000011111111110111111111111111011110101111111,
+        0b1111111111111111000000111111100011111111111100000000000011111111,
+        0b1111111111111111000000111111111111110111111111111111111111111111,
+        0b1111111111111111000001111111111111111111111111111111110000000000,
+        0b1111111111111111001000001011111111111111111111111111111111111111,
+        0b1111111111111111011111110111111100000000011111100111111001111110,
+        0b1111111111111111110000000000000011111110111111111111111111111111,
+        0b1111111111111111111111001111111100000000000000000000000000000000,
+        0b1111111111111111111111011011111100000000000000000000000011001011,
+        0b1111111111111111111111111100000000000111111111111111111111111110,
+        0b1111111111111111111111111111101111111111111111111101011101000000,
+        0b1111111111111111111111111111110011111111100000000000000000000000,
+        0b1111111111111111111111111111111011100000011111111111111111111111,
+        0b1111111111111111111111111111111100000000000000000000000000000000,
+        0b1111111111111111111111111111111100000000000000000100001111100000,
+        0b1111111111111111111111111111111100000010011111111111111111111111,
+        0b1111111111111111111111111111111100000111111111110000000000000000,
+        0b1111111111111111111111111111111100000111111111111111111111111110,
+        0b1111111111111111111111111111111100111100000000001111111111111111,
+        0b1111111111111111111111111111111100111101011111110011110111111111,
+        0b1111111111111111111111111111111101111111011111110111111101111111,
+        0b1111111111111111111111111111111111111111001111011111111111111111,
+        0b1111111111111111111111111111111111111111011111111111111100111101,
+        0b1111111111111111111111111111111111111111111110000000000000000000,
+        0b1111111111111111111111111111111111111111111111011111110001011111,
+        0b1111111111111111111111111111111111111111111111111111111111011011,
+    ];
+    static BITSET_MAPPING: [(u8, u8); 69] = [
+        (0, 128), (0, 142), (0, 175), (0, 176), (0, 63), (0, 60), (0, 59), (0, 54), (0, 52),
+        (0, 51), (0, 48), (0, 47), (0, 31), (0, 21), (0, 4), (1, 53), (1, 43), (1, 37), (1, 36),
+        (1, 29), (1, 21), (1, 7), (2, 128), (2, 144), (2, 51), (2, 32), (3, 181), (3, 49), (3, 33),
+        (3, 17), (4, 128), (4, 48), (4, 176), (4, 16), (5, 14), (5, 12), (5, 6), (6, 136), (6, 160),
+        (6, 3), (7, 54), (7, 38), (8, 163), (8, 32), (9, 177), (9, 32), (10, 149), (10, 16),
+        (11, 16), (11, 133), (12, 162), (12, 32), (13, 10), (13, 128), (14, 160), (14, 1),
+        (15, 135), (15, 62), (16, 128), (17, 32), (18, 17), (19, 16), (20, 32), (21, 31), (22, 135),
+        (23, 137), (24, 139), (25, 48), (26, 150),
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -127,85 +268,193 @@ pub mod alphabetic {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
 
 #[rustfmt::skip]
 pub mod case_ignorable {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 51);
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 24);
     static BITSET_CHUNKS_MAP: [u8; 250] = [
-        36, 19, 16, 26, 29, 40, 47, 38, 42, 5, 0, 9, 23, 25, 34, 3, 30, 0, 0, 0, 0, 0, 21, 31, 39,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 15, 22, 28,
-        33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 32, 1, 11, 0, 0, 0, 44, 8, 18, 50, 41, 49, 45, 37, 43,
-        46, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        6, 20, 0, 0, 0, 48, 0, 0, 27, 12, 0, 0, 10, 0, 0, 0, 0, 2,
+        12, 31, 34, 4, 7, 15, 22, 13, 17, 46, 50, 41, 28, 3, 11, 47, 8, 50, 50, 50, 50, 50, 29, 27,
+        14, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 26, 50, 35, 25, 6, 10, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 42, 50, 9, 49, 36, 50, 50, 50, 19, 43, 33, 23, 16, 1,
+        20, 51, 18, 21, 37, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 39, 50, 45, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 32, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 44, 30, 50, 50, 50, 0, 50, 50, 5, 38, 50, 50, 40, 50, 50, 50, 50, 48,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 8]; 52] = [
-        [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 130], [0, 0, 0, 0, 0, 0, 0, 166],
-        [0, 0, 0, 0, 0, 0, 157, 142], [0, 0, 0, 0, 0, 22, 47, 57], [0, 0, 0, 0, 0, 45, 0, 0],
-        [0, 0, 0, 0, 0, 172, 70, 0], [0, 0, 0, 0, 40, 0, 173, 3], [0, 0, 0, 0, 60, 0, 0, 0],
-        [0, 0, 0, 0, 94, 90, 136, 38], [0, 0, 0, 29, 0, 15, 0, 0], [0, 0, 0, 48, 0, 116, 0, 0],
-        [0, 0, 0, 76, 0, 0, 0, 0], [0, 0, 0, 93, 0, 0, 0, 0], [0, 0, 0, 96, 104, 7, 0, 0],
-        [0, 0, 0, 135, 0, 0, 0, 0], [0, 0, 12, 0, 0, 43, 163, 92], [0, 0, 56, 0, 0, 0, 0, 0],
-        [0, 0, 67, 0, 0, 24, 0, 0], [0, 0, 174, 182, 182, 114, 10, 0], [0, 8, 0, 0, 0, 0, 0, 0],
-        [0, 133, 0, 87, 0, 150, 0, 178], [16, 162, 46, 86, 51, 80, 13, 111],
-        [20, 5, 61, 0, 120, 0, 0, 0], [26, 0, 0, 0, 0, 0, 0, 0], [32, 156, 176, 1, 126, 91, 69, 88],
-        [35, 82, 0, 71, 175, 14, 83, 131], [62, 0, 0, 0, 137, 0, 0, 0],
-        [66, 0, 0, 152, 72, 25, 134, 59], [73, 33, 0, 181, 125, 85, 122, 139],
-        [74, 151, 36, 84, 0, 0, 0, 0], [75, 0, 0, 0, 0, 0, 0, 0],
-        [78, 27, 0, 148, 138, 81, 44, 119], [102, 124, 165, 101, 0, 64, 0, 68],
-        [106, 135, 0, 112, 177, 107, 180, 168], [109, 0, 0, 0, 0, 0, 0, 0],
-        [113, 50, 108, 0, 0, 0, 0, 0], [115, 0, 0, 0, 141, 5, 0, 49],
-        [117, 21, 128, 19, 110, 147, 129, 9], [118, 0, 42, 144, 0, 0, 0, 0],
-        [123, 100, 123, 169, 155, 54, 4, 18], [140, 0, 0, 63, 127, 97, 0, 0],
-        [143, 95, 37, 121, 0, 0, 0, 0], [145, 34, 31, 0, 0, 0, 0, 0], [154, 0, 0, 58, 0, 0, 0, 0],
-        [158, 1, 105, 0, 65, 0, 0, 0], [161, 0, 103, 0, 160, 11, 30, 0],
-        [164, 55, 155, 53, 127, 52, 2, 28], [167, 99, 77, 0, 0, 0, 0, 0],
-        [170, 41, 153, 6, 0, 0, 159, 39], [171, 149, 132, 17, 98, 89, 146, 23],
-        [179, 182, 0, 0, 182, 182, 182, 79],
+        [2, 69, 56, 174, 174, 174, 174, 174], [4, 33, 106, 18, 174, 174, 111, 182],
+        [16, 174, 174, 174, 174, 174, 174, 174], [27, 109, 122, 128, 90, 63, 51, 61],
+        [29, 58, 174, 53, 121, 165, 5, 94], [45, 174, 174, 174, 168, 174, 174, 174],
+        [49, 174, 174, 105, 163, 167, 96, 44], [54, 159, 174, 127, 89, 60, 86, 99],
+        [55, 104, 30, 59, 174, 174, 174, 174], [57, 23, 174, 144, 98, 177, 146, 84],
+        [72, 88, 117, 71, 174, 47, 174, 50], [75, 15, 174, 79, 123, 76, 126, 0],
+        [80, 176, 77, 174, 174, 174, 174, 174], [82, 181, 92, 21, 78, 169, 93, 132],
+        [83, 174, 180, 3, 174, 174, 174, 174], [87, 70, 87, 118, 108, 40, 130, 20],
+        [100, 174, 174, 46, 91, 67, 174, 174], [102, 66, 31, 142, 174, 174, 174, 174],
+        [103, 28, 26, 174, 174, 174, 174, 174], [107, 174, 174, 147, 174, 174, 174, 174],
+        [110, 128, 74, 174, 48, 174, 174, 174], [113, 174, 73, 174, 112, 19, 25, 174],
+        [116, 41, 108, 39, 91, 38, 129, 24], [119, 164, 95, 134, 68, 141, 13, 22],
+        [125, 9, 174, 174, 9, 9, 9, 175], [133, 114, 154, 149, 37, 140, 158, 151],
+        [136, 174, 174, 174, 174, 174, 174, 174], [139, 174, 174, 174, 174, 174, 174, 174],
+        [153, 131, 17, 174, 85, 174, 174, 174], [174, 1, 174, 161, 174, 12, 174, 124],
+        [174, 157, 174, 174, 174, 174, 174, 174], [174, 174, 10, 9, 9, 81, 179, 174],
+        [174, 174, 42, 174, 174, 174, 174, 174], [174, 174, 148, 174, 174, 166, 174, 174],
+        [174, 174, 172, 174, 174, 34, 115, 64], [174, 174, 174, 15, 174, 174, 174, 174],
+        [174, 174, 174, 138, 174, 171, 174, 174], [174, 174, 174, 150, 174, 174, 174, 174],
+        [174, 174, 174, 156, 174, 174, 174, 174], [174, 174, 174, 170, 8, 152, 174, 174],
+        [174, 174, 174, 173, 174, 162, 174, 174], [174, 174, 174, 174, 65, 62, 97, 32],
+        [174, 174, 174, 174, 137, 174, 6, 145], [174, 174, 174, 174, 155, 174, 174, 174],
+        [174, 174, 174, 174, 174, 120, 52, 174], [174, 174, 174, 174, 174, 135, 35, 43],
+        [174, 174, 174, 174, 174, 160, 174, 174], [174, 174, 174, 174, 174, 174, 11, 101],
+        [174, 174, 174, 174, 174, 174, 174, 7], [174, 174, 174, 174, 174, 174, 174, 143],
+        [174, 174, 174, 174, 174, 174, 174, 174], [178, 174, 174, 174, 14, 131, 174, 36],
+    ];
+    static BITSET_CANONICAL: [u64; 128] = [
+        0b1111101111111111111111111111111111111111111111111111111111111111,
+        0b0011000000000000000000000000000000000000000000000000000000000000,
+        0b1111100001111111111111111111111111111111111111111111111111111111,
+        0b0111000000000000000000000000000000000000000000000000000000000000,
+        0b1111111100000000000000000000000000000000000000000000000000000000,
+        0b0000000000000001111111111100000000000000000000000000000000000000,
+        0b1111111111111100000000000000000000000000000000000000000000000000,
+        0b1111100000000000000000000000000000000000000000000000000000000000,
+        0b0000000001111111000000000000000000000000000000000000000000000000,
+        0b1111111111111111111111111111111111111111111111111111111111111111,
+        0b1111111111111111000000000000000000000000000000000000000000000000,
+        0b1010000000000000000000000000000000000000000000000000000000000000,
+        0b1000000000000000100000000000000000000000000000000000000000000000,
+        0b0111111111000000000000000000000000000000000000000000000000000011,
+        0b0101100000000000000000000000000000000000000000000000000000000000,
+        0b0011111100000000000000000000000000000000000000000000000000000000,
+        0b0000000111111111000000000000000000000000000000000000000000000000,
+        0b0000000000000000000000100000000000000000000000000000000001100000,
+        0b0000000000000000000000000000000000000000000000000000000000001101,
+        0b0000000000000000000000000000000000000000000000000000000010111111,
+        0b0000000000000000000000000000000000000000000000000010000000000001,
+        0b0000000000000000000000000000000000000000000000000011111101000000,
+        0b0000000000000000000000000000000000000000000000001001111000000000,
+        0b0000000000000000000000000000000000000000001001000000000000000000,
+        0b0000000000000000000000000000000000000000010111000000010000000000,
+        0b0000000000000000000000000000000000000000101000110000000000000000,
+        0b0000000000000000000000000000000000000011011111111111110000000000,
+        0b0000000000000000000000000000000000001001100000000000000000000000,
+        0b0000000000000000000000000000000000001110011111100000000010000000,
+        0b0000000000000000000000000000000000010111111111110000000000111111,
+        0b0000000000000000000000000000000000011111111111110000000000000000,
+        0b0000000000000000000000000000000000100000000000000010000001100100,
+        0b0000000000000000000000000000000000100000100011111111111001000000,
+        0b0000000000000000000000000000000001000000000000000000000001011100,
+        0b0000000000000000000000000000000010000010000000000000000000000000,
+        0b0000000000000000000000000000000011111111111111111000000000000000,
+        0b0000000000000000000000000000000100001100111100000000000000000000,
+        0b0000000000000000000000000000001111111111111111111111111111111111,
+        0b0000000000000000000000000000110000000000000000000010000000011110,
+        0b0000000000000000000000000000110000000000000000000011000001000000,
+        0b0000000000000000000000000000110000000000011000000010000000011110,
+        0b0000000000000000000000000000110000000000011000000011110111000001,
+        0b0000000000000000000000000000111101100000000000000000000000000000,
+        0b0000000000000000000000000001101100000000000000000000000000000000,
+        0b0000000000000000000000000110000000000000000000001000000000000000,
+        0b0000000000000000000001111101101111111001111111111111111101111111,
+        0b0000000000000000000001111111100010000000000000000000000000000000,
+        0b0000000000000000000011100000000011111000000000000000000000000000,
+        0b0000000000000000000011111011110011100000000000000000000000000000,
+        0b0000000000000000000100000110000000000000000000000000100001000100,
+        0b0000000000000000001000010010000000000000000000000000000000000000,
+        0b0000000000000000001110110011110000000000000000000000000000000011,
+        0b0000000000000000001111000000000000000000000000000000111111100111,
+        0b0000000000000000001111011111111110111111110000000000000000000000,
+        0b0000000000000000001111111111111111111111110000000000000000000000,
+        0b0000000000000000011111001001000000000011000000001111100000000000,
+        0b0000000000000000111111111111111011111000000000000000000000010000,
+        0b0000000000000000111111111111111100000000000010001111111111111111,
+        0b0000000000000001000000000000000011111111111111111111100000000001,
+        0b0000000000000001111111111111111111111111111111110000000000000000,
+        0b0000000000000010000000000000110000000000111111100010000111111110,
+        0b0000000000000011101000110100000000000000000000000000000000000000,
+        0b0000000000001100000000000000000000000000000011000000000000000000,
+        0b0000000000001111111110000000000000000000000000000000000000000100,
+        0b0000000000010000000000000000000000000000000000000000000010110110,
+        0b0000000000011100000000000000000000000000000111000000000000000000,
+        0b0000000000011110000000000000000111000011000000000000000000000000,
+        0b0000000000011111000111111100000000000000000000000000000000000001,
+        0b0000000000011111111011111000000000000000000000000000000000000111,
+        0b0000000000100000000111111111111111111111111111111111111111111111,
+        0b0000000000100011000000000000000000000000000000100011100110000110,
+        0b0000000001011000001100000000000000100000000000000000000000000010,
+        0b0000000001100110011111100000000000000000000000000000000000000000,
+        0b0000000001101101111111001111111111111111111111000000000000000000,
+        0b0000000010111111001010000000000000000000000000000000000000000000,
+        0b0000000011001111111100000000000000000000000000000000000000000000,
+        0b0000000100000000000001111111111111111111111111111111111111111111,
+        0b0000000110010000101000010000000000000000000000000000000000000000,
+        0b0000001010100000000000000000000000000011000000000000000000000000,
+        0b0000001100010000001000011111110111111111111101110000000000000000,
+        0b0000010000000000010000001000000000000000000000000000000000000000,
+        0b0000010000110000111111111111111111111111111111111111111111111111,
+        0b0000011111110010000000000000000000000000000000000000000000000000,
+        0b0000100000111110001111000000000000000000000000000000000000100000,
+        0b0000111000000000000000000000100000000000000000000000000000000000,
+        0b0000111000000100000000011000011100000000000000000000000000000000,
+        0b0001000000000000000000000000000000000000000000000000000000000010,
+        0b0001000000000000000000000000000000000000000000000000000000000110,
+        0b0001000000000001000000000000000000000000000000000001000000001000,
+        0b0001010000000000000000000000000000000000000000000000000000000111,
+        0b0001011111010000000000000000000000000000000000000000000000001111,
+        0b0001100000000000000000000000000000000000000000000000000000000011,
+        0b0001111111110010000000000000000000000000000000000000000000000000,
+        0b0001111111111111111111111111111111111110111111111110000011011111,
+        0b0010010000111111111110000000000000000000000000000000000000000000,
+        0b0010011001111000000000000000000000000000000000000000000000000011,
+        0b0011001111001000000000000000000000000000000000000000000000000111,
+        0b0011111110110000000000000000000000000000000000000000000000000000,
+        0b0100000000000000000000000000000000000100000000000100000010000000,
+        0b0100000000000000000000000000110000000000000000000010000000011110,
+        0b0100000011010011100000000000000000000000000000000000000000000000,
+        0b0110000000000000111000000000000011100000000000001110000000000011,
+        0b0110011011111101111000000000000000000000000000000000000000000000,
+        0b0111100111111000000000000000000000000000000000000000011111111110,
+        0b1000000000000010111111111101111100000000000000000000000000000000,
+        0b1000000000000011111111111111111100000000000000000000000000110000,
+        0b1000010111111000000000000000000000000000000000000000000000000000,
+        0b1000011100000000000000000000000000000000000000001111000001101110,
+        0b1001000000000000000000000000000000000000000000000000000000000010,
+        0b1001111111111000000111111110010101111111010000000000000000000000,
+        0b1010011111111000000000000000000000000000000000000000000000000000,
+        0b1011000000111100000000000000000000000000000000000000000000000000,
+        0b1011010001111110000000000000000000000000000000000000000000000000,
+        0b1011111101111111000000000000000000000000000000000000000000000000,
+        0b1011111111110111100000000000000000000000000000000000000000000000,
+        0b1011111111111111111111111111111111111111111111100000000000000000,
+        0b1100000000000000000000000000000000000000000000000000000000010001,
+        0b1100000110011101000000000000000000000000000000000000000000000000,
+        0b1111110000000000000000000000110000000000000000000010000110111110,
+        0b1111111100000000000000000000000000000000000000000000000000000010,
+        0b1111111111111000000000111000000000000000000000000000000000000000,
+        0b1111111111111111000000000000000000000000000000101000000000000000,
+        0b1111111111111111000000001000000000000000000000000000000000000000,
+        0b1111111111111111111100000000000000000000000000000000000000000000,
+        0b1111111111111111111111111111111100000000000000000000000000000000,
+        0b1111111111111111111111111111111100000000000000000000000000000010,
+        0b1111111111111111111111111111111111111000000000000000000000000000,
+        0b1111111111111111111111111111111111111111111110000000000000000000,
     ];
-    static BITSET: [u64; 183] = [
-        0, 1, 2, 3, 4, 8, 13, 15, 28, 64, 176, 191, 1016, 1792, 2047, 4080, 4096, 8192, 8193,
-        16192, 30720, 32704, 32768, 40448, 131008, 262016, 2097152, 2359296, 6030336, 8323072,
-        10682368, 58719232, 159383552, 234881024, 243138688, 402587711, 536805376, 536879204,
-        546307648, 805306369, 1073741824, 1073741916, 2113929216, 2181038080, 3221225472,
-        3758096384, 4026531840, 4294934528, 4294967296, 4512022528, 5368709120, 17179869183,
-        51539615774, 51539619904, 51545907230, 51545914817, 66035122176, 115964116992, 412316860416,
-        412316893184, 1030792151040, 2199023255648, 8641373536127, 8763880767488, 15397323538432,
-        17303886364672, 18004502906948, 26388279066624, 36421322670080, 65128884076547,
-        65970697670631, 68168642985984, 70093866270720, 70368739983360, 136957967529984,
-        140737488355328, 263882790666240, 281470547525648, 281470682333183, 281474976710655,
-        281474976710656, 281474976710657, 281479271675905, 562675075514368, 562949953355776,
-        563001509683710, 844424930131968, 985162418487296, 1023920203366400, 2251799813685248,
-        3377699721314304, 4494803534348292, 4503599627370678, 6755399441055744, 7881299349733376,
-        8444256867844096, 8725724278030336, 8760633772212225, 8989057312882695, 9042383626829823,
-        9851624185018758, 24822575045541890, 28848986089586688, 30958948903026688,
-        35747322042253312, 53805701016846336, 58529202969772032, 72066390130950143,
-        112767012056334336, 143833713099145216, 189151184399892480, 216172782113783808,
-        220713756545974272, 288301294651703296, 302022650010533887, 504262420777140224,
-        558446353793941504, 572520102629474304, 593978171557150752, 1008806350890729472,
-        1009933895770046464, 1152921504606846976, 1152921504606846978, 1152921504606846982,
-        1153202979583561736, 1441151880758558727, 1715871458028158991, 1729382256910270467,
-        2301902359539744768, 2305843009196908767, 2305843009213693952, 2612078987781865472,
-        2771965570646540291, 3458764513820540928, 3731232291276455943, 4539628424389459968,
-        4589168020290535424, 4611404543450677248, 4611686018494513280, 4611686069967003678,
-        4671217976001691648, 6341068275337658368, 6917775322003857411, 7421334051581067264,
-        8070450532247928832, 8788774672813524990, 9205357638345293827, 9222809086901354496,
-        9223372036854775808, 9223372036854775935, 9223512774343131136, 9224216320050987008,
-        9224497932466651184, 9653465801268658176, 9727775195120332910, 10376293541461622786,
-        11526998316797657088, 11529215046068469760, 12103423998558208000, 12699025049277956096,
-        13005832773892571136, 13798747783286489088, 13832665517980123136, 13835058055282032640,
-        13835058055282163729, 13951307220663664640, 17870283321406128128, 17906312118425092095,
-        18158513697557839871, 18158513749097456062, 18374686479671623680, 18374686479671623682,
-        18444496122186563584, 18445618173802708992, 18446462598732840960, 18446462598733004800,
-        18446463148488654848, 18446726481523507200, 18446744069414584320, 18446744069414584322,
-        18446744073575333888, 18446744073709027328, 18446744073709551615,
+    static BITSET_MAPPING: [(u8, u8); 55] = [
+        (0, 134), (0, 135), (0, 136), (0, 137), (0, 140), (0, 146), (0, 147), (0, 149), (0, 155),
+        (0, 164), (0, 166), (0, 181), (0, 182), (0, 185), (0, 130), (0, 131), (0, 133), (1, 4),
+        (1, 34), (1, 41), (1, 47), (1, 52), (1, 55), (1, 60), (2, 137), (2, 148), (2, 165),
+        (2, 173), (2, 181), (3, 6), (3, 12), (3, 29), (3, 33), (3, 51), (4, 12), (4, 46), (4, 7),
+        (5, 26), (5, 32), (5, 33), (6, 62), (6, 63), (7, 53), (7, 59), (8, 19), (8, 32), (9, 128),
+        (10, 128), (11, 33), (12, 1), (13, 57), (14, 9), (15, 33), (16, 22), (17, 23),
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -214,57 +463,92 @@ pub mod case_ignorable {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
 
 #[rustfmt::skip]
 pub mod cased {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (124, 6);
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (124, 11);
     static BITSET_CHUNKS_MAP: [u8; 123] = [
-        13, 18, 0, 0, 12, 0, 0, 9, 14, 10, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 1, 2, 0, 16, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0,
-        0, 0, 0, 7,
+        18, 0, 17, 17, 5, 17, 17, 9, 4, 7, 17, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+        17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 13, 14, 17, 17, 17, 17,
+        17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 16, 15, 17, 2, 17, 8, 17, 17, 6,
+        17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 12, 17, 17,
+        17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+        1, 17, 17, 17, 17, 10,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 8, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 43, 62, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 10, 0, 50, 62, 58, 20],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 42, 44, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 62, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 31, 0, 62, 62, 62, 0, 62, 62, 62, 62, 54, 26, 27, 24],
-        [0, 0, 39, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 51, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 51, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 25],
-        [0, 22, 19, 37, 62, 62, 36, 61, 62, 62, 18, 12, 0, 30, 49, 38],
-        [0, 29, 9, 0, 34, 52, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [46, 55, 62, 17, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [62, 6, 42, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [62, 56, 33, 60, 28, 57, 62, 62, 62, 62, 48, 35, 40, 45, 47, 5],
-        [62, 62, 59, 62, 41, 53, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [2, 2, 41, 2, 44, 5, 54, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [2, 47, 33, 0, 28, 39, 2, 2, 2, 2, 8, 35, 49, 50, 1, 14],
+        [2, 59, 10, 24, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [45, 46, 2, 20, 18, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [51, 29, 62, 51, 34, 38, 57, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [51, 51, 6, 32, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 6, 53],
+        [51, 51, 6, 55, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [51, 51, 11, 17, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [51, 51, 13, 13, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [51, 51, 31, 51, 2, 2, 2, 51, 2, 2, 2, 2, 4, 26, 27, 25],
+        [51, 51, 51, 51, 2, 52, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [51, 51, 51, 51, 10, 9, 60, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [51, 51, 51, 51, 51, 51, 51, 51, 51, 2, 51, 51, 51, 51, 51, 51],
+        [51, 51, 51, 51, 51, 51, 51, 51, 51, 19, 56, 51, 7, 2, 40, 23],
+        [51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 10, 36, 2, 51],
+        [51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 12, 61, 51, 51],
+        [51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 15, 51, 51, 51],
+        [51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51],
+        [51, 58, 22, 48, 2, 2, 42, 3, 2, 2, 21, 16, 51, 30, 37, 43],
+    ];
+    static BITSET_CANONICAL: [u64; 42] = [
+        0b1111111111111111111111111111111111111111111111111111111111101111,
+        0b1111111111111111111111011111111111111111111111111111110111111111,
+        0b1111111111111111111111111111111111111111111111111111111111111111,
+        0b1111111111111111111111111111111111111111111111111111111111110000,
+        0b1111111111111111111111111111111100111111001111111111111111111111,
+        0b1111111111111111111111111111111100000000011111111111111111111111,
+        0b1111111111111111111111111111111100000000000000000000000000000000,
+        0b1111111111111111111111111111110000000000000000000000000000000000,
+        0b1111111111111111111111110011111111111111111111111111111111111111,
+        0b1111111111111111000000111111111111111111111111110000001111111111,
+        0b1111111111111111000000000000000000000000000000000000000000000000,
+        0b1111111111000000000000000000000000000000000000000000000000000000,
+        0b0000011111111111111111111111111000000000000000000000000000000000,
+        0b0000000000000111111111111111111111111111111111111111111111111111,
+        0b0000000000000000000000000000000000000000000000000000111111110111,
+        0b0000000000000000000000000000000000000000111110000000000001111111,
+        0b0000000000000000000000000001111100000000000000000000000000000011,
+        0b0000000000000000000000111111111111111111111111111111111111111111,
+        0b0000000000000000001000001011111111111111111111111111111111111111,
+        0b0000000000000000001111111111111111111111111111111111111111111111,
+        0b0000000000001100011110000001111111111111111111111111111111111111,
+        0b0000000111111111111111111111111111111111111011111111111111111111,
+        0b0000010000100000000001000000000000000000000000000000000000000000,
+        0b0000011101100000000000000000000000000000000000000000011111111100,
+        0b0000111111111111111111111111111111111111000011111111111111111111,
+        0b0001111111011100000111111111111100001111110011110001111111011100,
+        0b0011111111111111111111111111111110101010111111110011111100111111,
+        0b0101111111011111111111111111111111111111111111111111111111111111,
+        0b0111101111111111111111111111111111011111110111111110011110111111,
+        0b1000000000000010000000000000000000000000000000000000000000000000,
+        0b1011110011001111000000000000000000000000000000000000000000100000,
+        0b1110011111111111111111111111111111111111111111110000000111111111,
+        0b1110011111111111111111111111111111111111111111110010000010111111,
+        0b1110101111111111110111100110010011011111111111111111111111111111,
+        0b1111001000011111101111010101000000111110001011111111110010000100,
+        0b1111011111111111111111111111111111110111111111111111111111111101,
+        0b1111111111111111000000011111111111110111111111111111111111111111,
+        0b1111111111111111111111111111101111111111111111111101011101000000,
+        0b1111111111111111111111111111111100000000000000000100001111100000,
+        0b1111111111111111111111111111111111111111111111011111110001011111,
+        0b1111111111111111111111111111111111111111111111110111100011111111,
+        0b1111111111111111111111111111111111111111111111111111110000000011,
     ];
-    static BITSET: [u64; 63] = [
-        0, 15, 24, 511, 1023, 4087, 65535, 16253055, 134217726, 536805376, 1073741823, 4294967295,
-        133143986179, 4398046511103, 36009005809663, 70368744177663, 2251799813685247,
-        3509778554814463, 144115188074807295, 297241973452963840, 531424756029720572,
-        576460743713488896, 576460743847706622, 1152921504591118335, 2295745090394464220,
-        4557642822898941951, 4611686017001275199, 6908521828386340863, 8935141660164089791,
-        9223934986808197120, 13605092999309557792, 16717361816799216127, 16717361816799223999,
-        17005555242810474495, 17446871633794956420, 17870283321271910397, 17870283321406128127,
-        18410715276682199039, 18428729675200069631, 18428729675200069632, 18437736874452713471,
-        18446462598732840959, 18446462598732840960, 18446464797621878783, 18446466996779287551,
-        18446603336221163519, 18446603336221196287, 18446741874686295551, 18446743249075830783,
-        18446744056529672000, 18446744056529682432, 18446744069414584320, 18446744069414601696,
-        18446744069422972927, 18446744070475743231, 18446744071562067967, 18446744073707454463,
-        18446744073709419615, 18446744073709517055, 18446744073709550595, 18446744073709551599,
-        18446744073709551600, 18446744073709551615,
+    static BITSET_MAPPING: [(u8, u8); 21] = [
+        (0, 55), (0, 50), (0, 44), (0, 43), (0, 27), (0, 17), (1, 14), (1, 12), (1, 6), (2, 128),
+        (3, 128), (4, 32), (5, 169), (6, 32), (7, 30), (8, 157), (9, 17), (10, 16), (11, 10),
+        (12, 32), (13, 157),
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -273,7 +557,8 @@ pub mod cased {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
@@ -287,8 +572,12 @@ pub mod cc {
     static BITSET_INDEX_CHUNKS: [[u8; 1]; 3] = [
         [0], [1], [2],
     ];
-    static BITSET: [u64; 3] = [
-        0, 4294967295, 9223372036854775808,
+    static BITSET_CANONICAL: [u64; 3] = [
+        0b0000000000000000000000000000000000000000000000000000000000000000,
+        0b0000000000000000000000000000000011111111111111111111111111111111,
+        0b1000000000000000000000000000000000000000000000000000000000000000,
+    ];
+    static BITSET_MAPPING: [(u8, u8); 0] = [
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -297,73 +586,163 @@ pub mod cc {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
 
 #[rustfmt::skip]
 pub mod grapheme_extend {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 44);
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 20);
     static BITSET_CHUNKS_MAP: [u8; 245] = [
-        0, 8, 15, 22, 26, 33, 40, 32, 35, 3, 0, 7, 21, 23, 30, 0, 20, 0, 0, 0, 0, 0, 12, 0, 27, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 25, 29, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 5, 0, 28, 1, 10, 0, 0, 0, 37, 6, 17, 43, 34, 42, 38, 31, 36, 39, 13, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 14, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 18, 0, 0,
-        0, 41, 0, 0, 24, 11, 0, 0, 9,
+        42, 34, 28, 23, 6, 11, 18, 10, 13, 40, 42, 35, 22, 3, 9, 42, 21, 42, 42, 42, 42, 42, 30,
+        42, 2, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 24, 5, 8, 42, 42, 42, 42, 42, 42,
+        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 36, 42, 7, 41, 31, 42, 42, 42, 15, 37, 27, 19, 12,
+        0, 16, 44, 14, 17, 29, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 42, 39,
+        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 26, 42, 42, 42, 42, 42, 42, 42,
+        42, 42, 38, 25, 42, 42, 42, 1, 42, 42, 4, 32, 42, 42, 33,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 8]; 45] = [
-        [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 103], [0, 0, 0, 0, 0, 16, 20, 46],
-        [0, 0, 0, 0, 0, 38, 0, 0], [0, 0, 0, 0, 0, 133, 58, 0], [0, 0, 0, 0, 33, 0, 0, 0],
-        [0, 0, 0, 0, 49, 0, 0, 0], [0, 0, 0, 0, 77, 74, 106, 31], [0, 0, 0, 0, 143, 66, 0, 0],
-        [0, 0, 0, 21, 0, 10, 0, 0], [0, 0, 0, 39, 0, 94, 0, 0], [0, 0, 0, 62, 0, 0, 0, 0],
-        [0, 0, 0, 71, 0, 118, 0, 142], [0, 0, 0, 76, 0, 0, 0, 0], [0, 0, 0, 79, 87, 0, 0, 0],
-        [0, 0, 9, 0, 0, 0, 129, 7], [0, 0, 35, 0, 0, 0, 0, 0], [0, 0, 55, 0, 0, 18, 0, 0],
-        [0, 5, 0, 0, 0, 0, 0, 0], [0, 107, 37, 70, 0, 0, 0, 0], [12, 0, 0, 69, 0, 0, 0, 0],
-        [13, 0, 50, 0, 96, 0, 0, 0], [26, 67, 0, 59, 140, 11, 68, 104],
-        [27, 123, 139, 1, 100, 75, 57, 72], [51, 0, 0, 0, 87, 0, 0, 0],
-        [54, 0, 0, 120, 61, 19, 105, 47], [60, 28, 0, 141, 99, 45, 111, 109],
-        [63, 0, 25, 0, 0, 0, 0, 0], [65, 0, 0, 0, 0, 0, 37, 0], [85, 98, 131, 84, 0, 0, 0, 56],
-        [89, 0, 0, 91, 0, 0, 0, 135], [93, 0, 0, 0, 113, 3, 0, 40],
-        [95, 15, 101, 14, 90, 117, 102, 6], [97, 83, 97, 136, 132, 44, 108, 22],
-        [110, 0, 0, 52, 112, 80, 0, 0], [114, 78, 30, 0, 0, 0, 0, 0], [115, 29, 24, 0, 0, 0, 0, 0],
-        [121, 0, 0, 48, 0, 0, 0, 0], [125, 1, 88, 0, 53, 0, 0, 0], [128, 0, 86, 0, 127, 8, 23, 0],
-        [130, 42, 122, 41, 112, 43, 2, 36], [134, 82, 64, 0, 0, 0, 0, 0],
-        [137, 34, 124, 4, 0, 0, 126, 32], [138, 119, 92, 0, 81, 73, 116, 17],
-        [142, 143, 0, 0, 143, 143, 143, 66],
+        [4, 24, 87, 13, 138, 138, 89, 143], [5, 52, 41, 138, 138, 138, 138, 138],
+        [11, 138, 115, 138, 138, 138, 138, 138], [20, 86, 8, 102, 67, 47, 37, 45],
+        [32, 138, 138, 138, 6, 138, 138, 138], [35, 138, 138, 83, 130, 127, 71, 111],
+        [40, 123, 138, 100, 66, 31, 77, 75], [42, 138, 138, 138, 138, 138, 117, 138],
+        [55, 65, 94, 54, 138, 138, 138, 36], [58, 138, 138, 60, 138, 138, 138, 0],
+        [62, 129, 68, 142, 59, 82, 69, 105], [64, 53, 64, 97, 95, 30, 74, 17],
+        [76, 138, 138, 33, 78, 50, 138, 138], [80, 49, 22, 138, 138, 138, 138, 138],
+        [81, 21, 19, 138, 138, 138, 138, 138], [84, 138, 138, 118, 138, 138, 138, 138],
+        [88, 102, 57, 138, 34, 138, 138, 138], [91, 138, 56, 138, 90, 15, 18, 138],
+        [93, 28, 85, 27, 78, 29, 103, 25], [98, 131, 61, 138, 51, 112, 9, 16],
+        [101, 7, 138, 138, 7, 7, 7, 139], [106, 138, 138, 44, 138, 138, 138, 138],
+        [122, 138, 12, 138, 63, 138, 138, 138], [128, 43, 138, 39, 99, 125, 3, 70],
+        [138, 73, 117, 120, 138, 138, 138, 138], [138, 121, 138, 138, 138, 138, 138, 138],
+        [138, 138, 116, 138, 138, 138, 138, 138], [138, 138, 119, 138, 138, 126, 138, 138],
+        [138, 138, 135, 138, 138, 138, 92, 14], [138, 138, 138, 1, 138, 138, 138, 138],
+        [138, 138, 138, 2, 138, 114, 138, 101], [138, 138, 138, 109, 138, 10, 138, 138],
+        [138, 138, 138, 134, 138, 138, 138, 138], [138, 138, 138, 137, 138, 136, 138, 138],
+        [138, 138, 138, 138, 7, 139, 138, 138], [138, 138, 138, 138, 48, 46, 72, 23],
+        [138, 138, 138, 138, 108, 138, 138, 138], [138, 138, 138, 138, 133, 138, 138, 138],
+        [138, 138, 138, 138, 138, 96, 38, 138], [138, 138, 138, 138, 138, 107, 132, 110],
+        [138, 138, 138, 138, 138, 124, 138, 138], [138, 138, 138, 138, 138, 138, 138, 113],
+        [138, 138, 138, 138, 138, 138, 138, 138], [138, 138, 138, 141, 6, 138, 138, 138],
+        [140, 138, 138, 138, 79, 104, 138, 26],
+    ];
+    static BITSET_CANONICAL: [u64; 102] = [
+        0b1111101111111111111111111111111111111111111111111111111111111111,
+        0b0000000000011000000000000000000000000000000000000000000000000000,
+        0b0000000000000011100000000000000000000000000000000000000000000000,
+        0b0000000000000001111111111100000000000000000000000000000000000000,
+        0b1111111100000000000000000000000000000000000000000000000000000000,
+        0b1111100001111111111111111111111111111111111111111111111111111111,
+        0b0000000001111111000000000000000000000000000000000000000000000000,
+        0b1111111111111111111111111111111111111111111111111111111111111111,
+        0b1111111111111111000000000000000000000000000000000000000000000000,
+        0b0111111111000000000000000000000000000000000000000000000000000011,
+        0b0000011111000000000000000000000000000000000000000000000000000000,
+        0b0000000000000000111111000000000000000000000000000000000000000000,
+        0b0000000000000000000000100000000000000000000000000000000001100000,
+        0b0000000000000000000000000000000000000000000000000000000000001101,
+        0b0000000000000000000000000000000000000000000000000000000010110110,
+        0b0000000000000000000000000000000000000000000000000000000010111111,
+        0b0000000000000000000000000000000000000000000000001001111000000000,
+        0b0000000000000000000000000000000000000000100000000010000000000001,
+        0b0000000000000000000000000000000000000000101000110000000000000000,
+        0b0000000000000000000000000000000000000011011111111111110000000000,
+        0b0000000000000000000000000000000000001001100000000000000000000000,
+        0b0000000000000000000000000000000000001110011111100000000010000000,
+        0b0000000000000000000000000000000000100000000000000010000001100100,
+        0b0000000000000000000000000000000000100000000011111111111001000000,
+        0b0000000000000000000000000000000001000000000000000000000001011100,
+        0b0000000000000000000000000000000010000000010111001000010000000000,
+        0b0000000000000000000000000000000100001100111100000000000000000000,
+        0b0000000000000000000000000000110000000000011000000011000001000100,
+        0b0000000000000000000000000000110000000000011000000011110111000001,
+        0b0000000000000000000000000000110000000000100000000010000000011110,
+        0b0000000000000000000000000000110000000000111000000010000000011110,
+        0b0000000000000000000000000000110000000000111111100010000111111110,
+        0b0000000000000000000001111101101111111001111111111111111101111111,
+        0b0000000000000000000001111111100010000000000000000000000000000000,
+        0b0000000000000000000011111011110011100000000000000000000000000000,
+        0b0000000000000000000100000110000000000000000000000000100001000100,
+        0b0000000000000000001000010010000000000000000000000000000000000000,
+        0b0000000000000000001110110011110000000000000000000000000000000011,
+        0b0000000000000000001111000000000000000000000000000000111111100111,
+        0b0000000000000000001111011001111110011111110000000000000000000000,
+        0b0000000000000000001111101110111111111011110000000000000000000000,
+        0b0000000000000000111111111111111011111000000000000000000000010000,
+        0b0000000000000000111111111111111100000000000000001111111111111111,
+        0b0000000000000001000000000000000011111111111111111111100000000000,
+        0b0000000000000001111111111111111111111111111111110000000000000000,
+        0b0000000000000011101000110100000000000000000000000000000000000000,
+        0b0000000000001100000000000000000000000000000011000000000000000000,
+        0b0000000000001111111110000000000000000000000000000000000000000100,
+        0b0000000000011100000000000000000000000000000111000000000000000000,
+        0b0000000000011110000000000000000111000011000000000000000000000000,
+        0b0000000000011111000111111100000000000000100000000000000000000001,
+        0b0000000000011111111011111000000000000000000000000000000000000111,
+        0b0000000000100000000111111111111111111111111111111111111111111111,
+        0b0000000000100011000000000000000000000000000000100011100110000110,
+        0b0000000001000000001100000000000000000000000000000000000000000010,
+        0b0000000001100110011111100000000000000000000000000000000000000000,
+        0b0000000001101101111111001111111111111111111111000000000000000000,
+        0b0000000010111111001010000000000000000000000000000000000000000000,
+        0b0000000011001111111100000000000000000000000000000000000000000000,
+        0b0000001010100000000000000000000000000011000000000000000000000000,
+        0b0000001100010000001000011111110111111111111101110000000000000000,
+        0b0000011001111000000000000000000000000000000000000000000000000011,
+        0b0000011111110010000000000000000000000000000000000000000000000000,
+        0b0000111000000100000000011000011100000000000000000000000000000000,
+        0b0001000000000000000000000000000000000000000000000000000000000110,
+        0b0001000000000000000000000000000000000000000000000001000000001000,
+        0b0001010000000000000000000000000000000000000000000000000000000111,
+        0b0001011111110000000000000000000000000000000000000000000000001111,
+        0b0001111111110010000000000000000000000000000000000000000000000000,
+        0b0001111111111111111111111111111111111110111111111110000011011111,
+        0b0010000000001111111110000000000000000000000000000000000000000000,
+        0b0011001111001000000000000000000000000000000000000000000000000111,
+        0b0011111110110000000000000000000000000000000000000000000000000000,
+        0b0011111111110111100000000000000000000000000000000000000000000000,
+        0b0100000000000000000000000000000000000000000000000000000000000100,
+        0b0100000000000000000000000000110000000000100000000010000000011110,
+        0b0100000011010011100000000000000000000000000000000000000000000000,
+        0b0101000000000000000000000000000000000000000000000000000000000010,
+        0b0101100000000000000000000000000000000000000000000000000000000011,
+        0b0101100000000001000000000000000000000000000000000000000000000000,
+        0b0110011011111101111000000000000000000000000000000000000000000000,
+        0b0111100111111000000000000000000000000000000000000000011111111110,
+        0b0111111111111110000000000000000000000000000000000000000000000000,
+        0b1000000000000011111111111111111100000000000000000000000000110000,
+        0b1000011100000000000000000000000000000000000000001111000001101110,
+        0b1001000000000000000000000000000000000000000000000000000000000010,
+        0b1001111111111000000111111110010101111111010000000000000000000000,
+        0b1010010111111001000000000000000000000000000000000000000000000000,
+        0b1010011111111000000000000000000000000000000000000000000000000000,
+        0b1011000000111100100000000000000000000000000000000000000000000000,
+        0b1011010001111110000000000000000000000000000000000000000000000000,
+        0b1011111101111111000000000000000000000000000000000000000000000000,
+        0b1011111111111111111111111111111111111111111111100000000000000000,
+        0b1100000000000000000000000000000000000000000000000000000000010001,
+        0b1100000110011101000000000000000000000000000000000000000000000000,
+        0b1101000000000000000000000000000000000000000000000000000000000010,
+        0b1111100000000111110000111010000000000000000000000000000000000000,
+        0b1111110000000000000000000000110000000000000000000010000110111110,
+        0b1111111100000000000000000000000000000000000000000000000000000010,
+        0b1111111111111111000000000000000000000000000000100000000000000000,
+        0b1111111111111111111111111111101111111111111110000000000000000000,
+        0b1111111111111111111111111111111100000000000000000000000000000000,
     ];
-    static BITSET: [u64; 144] = [
-        0, 1, 2, 8, 13, 28, 64, 182, 191, 1016, 2032, 2047, 4096, 14336, 16128, 32640, 32768,
-        40448, 131008, 262016, 491520, 8323072, 8396801, 10682368, 58719232, 100663296, 134152192,
-        159383552, 234881024, 243138688, 536879204, 537919040, 805306369, 1073741824, 1073741916,
-        1610612736, 2153546752, 3221225472, 3758096384, 4294967296, 4512022528, 51545911364,
-        51545914817, 51548004382, 51554295838, 51556262398, 68719476736, 137438953472, 412316860416,
-        1030792151040, 2199023255648, 8641373536127, 8763880767488, 17303886364672, 18004502906948,
-        26388279066624, 36421322670080, 65128884076547, 65970697670631, 67755789254656,
-        69200441769984, 70093866270720, 263882790666240, 277076930199552, 281470547525648,
-        281470681808895, 281474976710655, 281479271675904, 562675075514368, 562949953355776,
-        844424930131968, 985162418487296, 1023920203366400, 2251799813685248, 3377699721314304,
-        4494803534348292, 6755399441055744, 7881299349733376, 8444256867844096, 8725724278030336,
-        8760633780600833, 8989057312882695, 9042383626829823, 9851624185018758, 18067175067615234,
-        28848986089586688, 30958948903026688, 35747322042253312, 53805701016846336,
-        58529202969772032, 189151184399892480, 220713756545974272, 466122561432846339,
-        504262420777140224, 558446353793941504, 572520102629474304, 1009933895770046464,
-        1152921504606846982, 1152921504606851080, 1441151880758558727, 1724878657282899983,
-        2301902359539744768, 2305843009196908767, 2305843009213693952, 2310337812748042240,
-        3731232291276455943, 4589168020290535424, 4609293481125347328, 4611686018427387908,
-        4611686069975392286, 4671217976001691648, 5764607523034234882, 6341068275337658371,
-        6341349750314369024, 7421334051581067264, 8788774672813524990, 9205357638345293827,
-        9222809086901354496, 9223372036854775808, 9223372036854775935, 9224497932466651184,
-        9727775195120332910, 10376293541461622786, 11526998316797657088, 11959590285459062784,
-        12103423998558208000, 12699165786766311424, 13005832773892571136, 13798747783286489088,
-        13835058055282032640, 13835058055282163729, 13951307220663664640, 14987979559889010690,
-        17872468738205286400, 17906312118425092095, 18158513697557839871, 18158513749097456062,
-        18374686479671623680, 18374686479671623682, 18446462598732840960, 18446462598732972032,
-        18446744056529158144, 18446744069414584320, 18446744073709551615,
+    static BITSET_MAPPING: [(u8, u8); 42] = [
+        (0, 134), (0, 135), (0, 137), (0, 140), (0, 146), (0, 149), (0, 164), (0, 166), (0, 170),
+        (0, 171), (0, 185), (0, 131), (0, 133), (1, 38), (1, 42), (1, 43), (1, 50), (1, 56),
+        (1, 61), (2, 19), (2, 28), (2, 42), (2, 46), (3, 26), (3, 32), (3, 33), (3, 42), (4, 15),
+        (4, 46), (4, 7), (5, 152), (5, 173), (5, 181), (6, 19), (6, 20), (6, 32), (7, 128),
+        (8, 128), (9, 57), (10, 58), (11, 30), (12, 23),
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -372,57 +751,106 @@ pub mod grapheme_extend {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
 
 #[rustfmt::skip]
 pub mod lowercase {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (122, 6);
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (122, 10);
     static BITSET_CHUNKS_MAP: [u8; 118] = [
-        12, 16, 0, 0, 10, 0, 0, 11, 13, 8, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 2, 1, 0, 17, 0, 9, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14,
+        5, 1, 16, 16, 8, 16, 16, 6, 4, 9, 16, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 12, 13, 16, 16, 16, 16,
+        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 14, 15, 16, 2, 16, 7, 16, 16,
+        17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 11, 16,
+        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+        16, 3,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 16]; 18] = [
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 62, 71, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 9, 0, 50, 42, 44, 28],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 68, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35],
-        [0, 0, 3, 0, 71, 71, 71, 0, 46, 46, 48, 46, 24, 37, 38, 23],
-        [0, 29, 27, 57, 39, 51, 52, 43, 41, 70, 26, 11, 0, 34, 64, 32],
-        [0, 40, 8, 0, 33, 60, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [22, 13, 54, 66, 25, 15, 56, 63, 30, 19, 12, 55, 58, 61, 65, 4],
-        [59, 36, 46, 21, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [59, 49, 45, 47, 18, 69, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [67, 5, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [4, 31, 40, 19, 16, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66],
+        [4, 42, 69, 41, 18, 3, 10, 66, 66, 66, 66, 66, 66, 66, 66, 66],
+        [55, 68, 66, 6, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66],
+        [62, 15, 47, 54, 22, 60, 49, 0, 26, 61, 70, 48, 64, 65, 1, 11],
+        [66, 35, 71, 66, 28, 51, 9, 66, 66, 66, 66, 66, 66, 66, 66, 66],
+        [66, 63, 24, 50, 34, 44, 45, 38, 36, 57, 23, 14, 66, 29, 53, 27],
+        [66, 66, 10, 66, 2, 2, 2, 66, 40, 40, 5, 40, 21, 32, 33, 20],
+        [66, 66, 66, 7, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66],
+        [66, 66, 66, 46, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 30],
+        [66, 66, 66, 59, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66],
+        [66, 66, 66, 66, 56, 8, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66],
+        [66, 66, 66, 66, 66, 66, 66, 66, 66, 3, 66, 66, 66, 66, 66, 66],
+        [66, 66, 66, 66, 66, 66, 66, 66, 66, 17, 13, 66, 43, 37, 39, 25],
+        [66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 4, 52, 2, 66],
+        [66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 12, 66, 66, 66],
+        [66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 58, 66, 66],
+        [66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66],
+        [66, 66, 66, 67, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66],
     ];
-    static BITSET: [u64; 72] = [
-        0, 15, 16, 511, 3063, 65535, 16253055, 134217726, 536805376, 984263338, 4294967295,
-        133143986179, 274877905920, 1099509514240, 4398046445568, 17592185782272, 36009005809663,
-        46912496118442, 187649984473770, 281474972516352, 2251799813685247, 2339875276368554,
-        4503599560261632, 61925590106570972, 71777214282006783, 72057592964186127,
-        144115188074807295, 297241973452963840, 522417556774978824, 576460743713488896,
-        1152921487426978047, 1152921504590069760, 1814856824841797631, 3607524039012697088,
-        4362299189061746720, 4539628424389459968, 4601013482110844927, 4611405638684049471,
-        4674456033467236607, 6172933889249159850, 9223934986808197120, 10663022717737544362,
-        10808545280696953514, 12261519110656315968, 12294970652241842346, 12297829382473033730,
-        12297829382473034410, 12297829382473045332, 12297829382829550250, 12297829383904690175,
-        12298110845996498944, 15324248332066007893, 16596095761559859497, 16717361816799215616,
-        16987577794709946364, 17293822586148356092, 18158513701852807104, 18410715274543104000,
-        18428729675466407935, 18446462598732840960, 18446462598732858304, 18446462598737002495,
-        18446464797621878783, 18446673704966422527, 18446726481523572736, 18446739675663105535,
-        18446739675663106031, 18446742974197923840, 18446744056529682432, 18446744069414584320,
-        18446744073709529733, 18446744073709551615,
+    static BITSET_CANONICAL: [u64; 58] = [
+        0b1111111111111111110000000000000000000000000011111111111111111111,
+        0b1111111111111111111111000000000000000000000000001111110111111111,
+        0b1111111111111111111111111111111111111111111111111111111111111111,
+        0b1111111111111111111111111111111100000000000000000000000000000000,
+        0b1111111111111111000000000000000000000000000000000000000000000000,
+        0b1010101010101010101010101010101010111111111010101010101010101010,
+        0b0000111111111111111111111111111111111111000000000000000000000000,
+        0b0000000000000111111111111111111111111111111111111111111111111111,
+        0b0000000000000000000000000000000000000000000000000000000000001111,
+        0b0000000000000000000000000000000000000000000000000000000000010000,
+        0b0000000000000000000000000000000000000000000000000000000111111111,
+        0b0000000000000000000000000000000000000000000000000000101111110111,
+        0b0000000000000000000000000000000000000000111110000000000001111111,
+        0b0000000000000000000000000000000000111010101010101010101010101010,
+        0b0000000000000000000000000001111100000000000000000000000000000011,
+        0b0000000000000000000000001111111111111111110111111100000000000000,
+        0b0000000000000000001000001011111111111111111111111111111111111111,
+        0b0000000000000000001010101010101010101010101010101010101010101010,
+        0b0000000000000000101010101010101010101010101010101010101010101010,
+        0b0000000000001000010100000001101010101010101010101010101010101010,
+        0b0000000011011100000000001111111100000000110011110000000011011100,
+        0b0000000011111111000000001111111100000000001111110000000011111111,
+        0b0000000011111111111111111111111111000000000000000000000000001111,
+        0b0000000111111111111111111111111111111111111011111111111111111111,
+        0b0000010000100000000001000000000000000000000000000000000000000000,
+        0b0000011101000000000000000000000000000000000000000000010100001000,
+        0b0000111111111111111111111111110000000000000000000000000011111111,
+        0b0001100100101111101010101010101010101010111000110111111111111111,
+        0b0011001000010000100000000000000000000000000010001100010000000000,
+        0b0011110010001010000000000000000000000000000000000000000000100000,
+        0b0011111100000000000000000000000000000000000000000000000000000000,
+        0b0011111111011010000101010110001001111111111111111111111111111111,
+        0b0011111111111111000000001111111100000000111111110000000000111111,
+        0b0100000011011111000000001111111100000000111111110000000011111111,
+        0b0101010110101010101010101010101010101010101010101010101010101010,
+        0b1000000000000010000000000000000000000000000000000000000000000000,
+        0b1001001111111010101010101010101010101010101010101010101010101010,
+        0b1001010111111111101010101010101010101010101010101010101010101010,
+        0b1010101000101001101010101010101010110101010101010101001001000000,
+        0b1010101010100000100000101010101010101010101110100101000010101010,
+        0b1010101010101010101010101010101010101010101010101010101010101010,
+        0b1010101010101010101010101010101010101010101010101101010101010100,
+        0b1010101010101010101010101010101011111111111111111111111111111111,
+        0b1010101010101011101010101010100000000000000000000000000000000000,
+        0b1101010010101010101010101010101010101010101010101010101101010101,
+        0b1110011001010001001011010010101001001110001001000011000100101001,
+        0b1110011111111111111111111111111111111111111111110000000000000000,
+        0b1110101111000000000000000000000000001111111111111111111111111100,
+        0b1111000000000000000000000000001111110111111111111111111111111100,
+        0b1111110000000000000000000000000011111111111111111111111111000000,
+        0b1111111101111111111111111111111110000000000000000000000000000000,
+        0b1111111111111111000000000000000000000000000000000100001111000000,
+        0b1111111111111111000000011111111111110111111111111111111111111111,
+        0b1111111111111111111100000000000000000000000000010000000000000000,
+        0b1111111111111111111111000000000000000000000000001111111111101111,
+        0b1111111111111111111111110000000000000000000000000000000000000000,
+        0b1111111111111111111111111111110000000000000000000000000000000000,
+        0b1111111111111111111111111111111111111111111111111010101010000101,
+    ];
+    static BITSET_MAPPING: [(u8, u8); 14] = [
+        (0, 173), (0, 188), (0, 190), (0, 130), (0, 134), (0, 141), (1, 12), (1, 6), (2, 128),
+        (3, 32), (4, 16), (5, 173), (6, 142), (7, 157),
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -431,58 +859,108 @@ pub mod lowercase {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
 
 #[rustfmt::skip]
 pub mod n {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (253, 2);
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (253, 40);
     static BITSET_CHUNKS_MAP: [u8; 249] = [
-        44, 0, 0, 29, 5, 31, 35, 26, 22, 6, 0, 12, 40, 20, 27, 0, 33, 0, 39, 7, 0, 0, 17, 0, 45,
-        42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 43,
-        23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 10, 16, 21, 0, 37, 34, 18, 36, 32, 15, 25, 24, 13, 0,
-        30, 1, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        14, 0, 3, 0, 0, 0, 0, 4, 15, 0, 0, 11, 0, 38, 0, 8,
+        5, 41, 41, 21, 37, 23, 11, 18, 16, 35, 41, 27, 2, 45, 46, 41, 9, 41, 15, 34, 41, 41, 29,
+        41, 1, 3, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+        41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+        41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 4, 6, 20, 41, 41, 41, 41, 41, 41,
+        41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+        41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 33, 32, 28, 25, 41, 13, 10, 26, 12, 8, 30,
+        19, 17, 43, 41, 7, 38, 41, 41, 0, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+        41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 22, 41, 24,
+        41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+        41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+        41, 41, 41, 31, 41, 39, 41, 41, 41, 41, 36, 30, 41, 41, 44, 41, 14, 41, 42,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 8]; 47] = [
-        [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 11], [0, 0, 0, 0, 0, 0, 0, 47],
-        [0, 0, 0, 0, 0, 0, 0, 72], [0, 0, 0, 0, 0, 2, 0, 0], [0, 0, 0, 0, 0, 31, 0, 45],
-        [0, 0, 0, 0, 0, 53, 0, 0], [0, 0, 0, 0, 0, 65, 9, 0], [0, 0, 0, 0, 6, 0, 0, 0],
-        [0, 0, 0, 0, 15, 0, 0, 0], [0, 0, 0, 0, 37, 44, 4, 0], [0, 0, 0, 7, 0, 15, 0, 0],
-        [0, 0, 0, 33, 0, 0, 0, 49], [0, 0, 0, 35, 0, 15, 0, 0], [0, 0, 0, 36, 0, 43, 0, 0],
-        [0, 0, 0, 47, 0, 0, 0, 0], [0, 0, 0, 52, 23, 3, 0, 13], [0, 0, 0, 54, 0, 0, 0, 0],
-        [0, 0, 0, 62, 47, 0, 0, 0], [0, 0, 14, 0, 0, 0, 0, 0], [0, 0, 16, 0, 0, 15, 47, 0],
-        [0, 0, 25, 0, 0, 0, 0, 0], [0, 2, 15, 0, 0, 0, 0, 0], [0, 15, 0, 0, 0, 0, 0, 47],
-        [0, 15, 0, 2, 51, 0, 0, 0], [0, 15, 0, 15, 0, 0, 0, 0], [0, 15, 0, 15, 36, 0, 0, 0],
-        [0, 16, 0, 0, 0, 0, 0, 0], [0, 25, 0, 0, 0, 22, 0, 0], [0, 25, 0, 47, 0, 0, 0, 2],
-        [0, 26, 0, 0, 0, 15, 25, 0], [0, 31, 0, 31, 0, 41, 0, 34], [0, 32, 0, 47, 65, 0, 0, 39],
-        [0, 46, 2, 0, 0, 71, 1, 0], [0, 57, 20, 28, 0, 64, 29, 0], [0, 59, 0, 31, 0, 42, 0, 31],
-        [0, 60, 0, 0, 24, 10, 0, 5], [0, 63, 30, 61, 18, 0, 55, 70], [0, 66, 38, 0, 56, 0, 0, 0],
-        [0, 69, 19, 68, 0, 0, 0, 0], [15, 0, 0, 0, 0, 8, 0, 17], [25, 0, 0, 31, 0, 0, 0, 0],
-        [25, 21, 67, 0, 0, 0, 0, 0], [40, 0, 0, 15, 2, 0, 0, 48], [47, 0, 58, 0, 0, 0, 0, 0],
-        [50, 0, 0, 0, 0, 0, 12, 0], [73, 27, 0, 0, 0, 0, 0, 0],
+        [7, 23, 67, 67, 67, 67, 67, 67], [32, 67, 67, 67, 67, 67, 66, 67],
+        [50, 67, 67, 67, 67, 49, 67, 18], [52, 20, 8, 67, 67, 67, 67, 67],
+        [52, 67, 67, 53, 67, 67, 67, 67], [54, 67, 38, 67, 67, 67, 67, 67],
+        [58, 67, 67, 50, 48, 67, 67, 31], [67, 22, 67, 67, 67, 50, 52, 67],
+        [67, 24, 67, 54, 0, 67, 67, 26], [67, 30, 48, 67, 67, 46, 14, 67],
+        [67, 37, 72, 60, 67, 42, 64, 67], [67, 39, 67, 53, 67, 28, 67, 53],
+        [67, 40, 67, 67, 51, 65, 67, 63], [67, 41, 13, 3, 57, 67, 56, 1],
+        [67, 43, 25, 67, 36, 67, 67, 67], [67, 45, 19, 44, 67, 67, 67, 67],
+        [67, 48, 50, 67, 67, 67, 67, 67], [67, 50, 67, 48, 33, 67, 67, 67],
+        [67, 50, 67, 50, 62, 67, 67, 67], [67, 50, 67, 50, 67, 67, 67, 67],
+        [67, 50, 67, 67, 67, 67, 67, 54], [67, 52, 67, 54, 67, 67, 67, 48],
+        [67, 52, 67, 67, 67, 21, 67, 67], [67, 53, 67, 53, 67, 27, 67, 11],
+        [67, 67, 17, 67, 67, 67, 67, 67], [67, 67, 52, 67, 67, 67, 67, 67],
+        [67, 67, 67, 2, 54, 67, 67, 67], [67, 67, 67, 12, 67, 67, 67, 9],
+        [67, 67, 67, 34, 6, 15, 67, 59], [67, 67, 67, 35, 67, 67, 67, 67],
+        [67, 67, 67, 54, 67, 67, 67, 67], [67, 67, 67, 62, 67, 68, 67, 67],
+        [67, 67, 67, 67, 10, 5, 55, 67], [67, 67, 67, 67, 50, 67, 67, 67],
+        [67, 67, 67, 67, 67, 0, 61, 67], [67, 67, 67, 67, 67, 4, 67, 67],
+        [67, 67, 67, 67, 67, 48, 67, 67], [67, 67, 67, 67, 67, 53, 67, 29],
+        [67, 67, 67, 67, 67, 67, 67, 16], [67, 67, 67, 67, 67, 67, 67, 47],
+        [67, 67, 67, 67, 67, 67, 67, 54], [67, 67, 67, 67, 67, 67, 67, 67],
+        [67, 67, 67, 67, 71, 67, 67, 67], [67, 67, 67, 70, 67, 50, 67, 67],
+        [67, 67, 67, 73, 67, 50, 67, 67], [67, 67, 69, 67, 67, 50, 54, 67],
+        [67, 69, 67, 67, 67, 67, 67, 67],
+    ];
+    static BITSET_CANONICAL: [u64; 48] = [
+        0b1111111111000000000000000000000000000000000000000000000000000000,
+        0b1111111111111111111111111111111111111111111111001111111111111111,
+        0b1111110000000000000000000000000000000000000000000000000000000000,
+        0b1111100000000000000000000000000000000000000000000000000000000000,
+        0b0001111111111111111111100000000000000000000000000000000000000000,
+        0b0000000111111111111111111111111111111111111111111111111111111111,
+        0b0000000000000000000000000000111100000000000000000000000000000000,
+        0b1111111111111111111111111111111111111111111111111111111111111111,
+        0b1111111111111110000000000000000000000000000000000000001111111111,
+        0b0000001111111111000000111111111100000000000000000000000000000000,
+        0b0000000000001111111111111111111111111111111111111111111110000000,
+        0b0000000000000111111111111100000000000000000000000000000000000000,
+        0b0000000000000001110000000000000000000000000000000000000000000000,
+        0b0000000000000000111111111000000000000000000000000000000000000000,
+        0b0000000000000000000000000000000000000000000000000000001111100111,
+        0b0000000000000000000000000000000000000000000000000000010000000010,
+        0b0000000000000000000000000000000000000000000111111111111111111111,
+        0b0000000000000000000000000000000000000000011111111111111111111111,
+        0b0000000000000000000000000000000000000111111111110000000000000000,
+        0b0000000000000000000000000000000000001111111111111111111111111111,
+        0b0000000000000000000000000000000011111111111111101111111100000000,
+        0b0000000000000000000000000000001111111011111111110000000000000000,
+        0b0000000000000000000111111111111111111111111111110000000000000000,
+        0b0000000000000000011111111111111111111111111111111111111111111111,
+        0b0000000000000000111111111111111111111111111111000000000000000000,
+        0b0000000000011110111011111111111111111111111111111111111111111111,
+        0b0000000000011111111111111111111000000011111111110000000000000000,
+        0b0000000011111100111111111100000000000000000000000000000000000000,
+        0b0000000111111111111111111100000001111111000000000000000000000000,
+        0b0000001111110000111111111100000000000000000000000000000000000000,
+        0b0000001111110001000000000000000000000000000000000000000000000000,
+        0b0000001111111111000000000000000000000011111111110000000000000000,
+        0b0000011100000000000000111111111000000000000000000000000010000000,
+        0b0000111111111111000000000000000000000000000000000000000000000000,
+        0b0000111111111111111111111111111000000000000000000000000000000000,
+        0b0010000000000000000000000000000000000000000000000000000000000000,
+        0b0011111111111111101111111111111111111111111111111111111111111110,
+        0b0110000000000000000000000000000000000000000000000000000111111111,
+        0b0111001000001100000000000000000000000000000000000000000000000000,
+        0b0111111100000000111111111100000000000000000000000000000000000000,
+        0b0111111111111111111111111111111100000000000000000000000000000000,
+        0b1111111000000000000000000000000011111111000000000000000000000000,
+        0b1111111100000000000000000000000011111111000000000000000000000000,
+        0b1111111111111110000000000000000000000000000000000000000000000000,
+        0b1111111111111111111111000000000000000000000000000000000000000000,
+        0b1111111111111111111111111111111100000000000000000000000000000000,
+        0b1111111111111111111111111111111111111111111111110000000000000000,
+        0b1111111111111111111111111111111111111111111111111100000000000000,
     ];
-    static BITSET: [u64; 74] = [
-        0, 999, 1023, 1026, 3072, 4064, 8191, 65408, 65472, 1048575, 1966080, 2097151, 3932160,
-        4063232, 8388607, 67043328, 67044351, 134152192, 264241152, 268435455, 3758096384,
-        4294901504, 17112694784, 64424509440, 549218942976, 4393751543808, 35184372023296,
-        140737488355327, 272678883688448, 279275953455104, 280925220896768, 281200098803712,
-        281474976448512, 492581209243648, 2251524935778304, 2251795518717952, 4503595332403200,
-        4503599627370368, 8708132091985919, 9007190731849728, 17732923532771328, 71212894229889024,
-        144114915328655360, 144115183780888576, 144115188075855871, 284007976623144960,
-        284008251501051904, 287948901175001088, 287948901242044416, 287953294926544896,
-        504407547722072192, 1152640029630136320, 1152921496016912384, 2305840810190438400,
-        2305843009213693952, 3458764513820540928, 4611615649683210238, 6917529027641082367,
-        8217943420044312576, 9151595642915651584, 9223372032559808512, 17870283321406128128,
-        18158513697557839872, 18302628889911885824, 18374686483949813760, 18428729675200069632,
-        18446181123756130304, 18446181123756131327, 18446739675663040512, 18446744069414584320,
-        18446744073709355007, 18446744073709486080, 18446744073709535232, 18446744073709551615,
+    static BITSET_MAPPING: [(u8, u8); 26] = [
+        (0, 10), (0, 16), (0, 26), (0, 39), (0, 42), (0, 48), (0, 58), (1, 186), (1, 172), (2, 28),
+        (2, 54), (3, 22), (3, 48), (4, 23), (4, 55), (5, 140), (5, 176), (6, 49), (6, 50), (7, 128),
+        (8, 47), (9, 32), (10, 172), (11, 26), (12, 47), (13, 32),
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -491,55 +969,97 @@ pub mod n {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
 
 #[rustfmt::skip]
 pub mod uppercase {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (124, 6);
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (124, 3);
     static BITSET_CHUNKS_MAP: [u8; 123] = [
-        12, 15, 0, 0, 11, 0, 0, 8, 5, 9, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 1, 0, 13, 0, 7, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0,
-        0, 0, 4,
+        12, 16, 4, 4, 2, 4, 4, 11, 8, 0, 4, 14, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        4, 4, 4, 5, 4, 13, 4, 10, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        4, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 15, 4, 4,
+        4, 4, 9,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 9, 0, 38, 46, 44, 28],
-        [0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 51, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 60, 62, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 54, 0, 0, 0, 0, 0, 43, 43, 40, 43, 56, 22, 34, 35],
-        [0, 0, 57, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 66, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 30],
-        [0, 10, 0, 11, 50, 37, 36, 45, 47, 5, 0, 0, 0, 49, 17, 53],
-        [14, 0, 60, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [21, 52, 43, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [24, 39, 42, 41, 59, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [58, 65, 29, 16, 48, 63, 31, 19, 55, 61, 64, 32, 27, 20, 15, 3],
+        [8, 8, 2, 57, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [8, 8, 4, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 60],
+        [8, 8, 8, 8, 1, 49, 59, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 54, 8, 8, 8],
+        [8, 8, 8, 8, 8, 8, 8, 8, 8, 17, 13, 8, 31, 37, 35, 23],
+        [8, 8, 8, 8, 8, 8, 8, 8, 8, 63, 8, 8, 8, 8, 8, 8],
+        [8, 8, 8, 8, 42, 20, 66, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [8, 8, 8, 8, 64, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [8, 8, 22, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [8, 8, 45, 8, 8, 8, 8, 8, 34, 34, 65, 34, 47, 19, 27, 28],
+        [8, 51, 8, 14, 41, 30, 29, 36, 38, 10, 8, 8, 8, 40, 16, 44],
+        [15, 8, 1, 11, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [18, 43, 34, 21, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+        [55, 0, 24, 52, 39, 50, 25, 53, 46, 56, 5, 26, 3, 62, 61, 7],
+        [58, 32, 6, 33, 48, 12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
     ];
-    static BITSET: [u64; 67] = [
-        0, 8, 1023, 1024, 8383, 21882, 65535, 1048575, 8388607, 89478485, 134217726, 2139095039,
-        4294967295, 17179869183, 1099511627775, 2199023190016, 4398046445568, 17575006099264,
-        23456248059221, 70368743129088, 140737484161024, 140737488355327, 280378317225728,
-        281470681743392, 281474976710655, 1169903278445909, 2251799813685247, 9007198986305536,
-        9007199254741748, 17977448100528131, 18014398509481983, 288230371856744511,
-        576460735123554305, 576460743713488896, 1080863910568919040, 1080897995681042176,
-        1274187559846268630, 3122495741643543722, 6148633210533183488, 6148914689804861440,
-        6148914690880001365, 6148914691236506283, 6148914691236516865, 6148914691236517205,
-        6151773421467674709, 6184099063146390672, 7638198793012598101, 7783721355972007253,
-        8863084067199903664, 9242793810247811072, 12273810184460391765, 13839347594782259332,
-        13845730589451223040, 16613872850358272000, 16717361816799215616, 17293822586282573568,
-        18374966856193736448, 18428729675200069632, 18442240474149289983, 18446274948748367189,
-        18446462598732840960, 18446462598737035263, 18446466996779287551, 18446726481523637343,
-        18446742974197924863, 18446742974197940223, 18446744069414584320,
+    static BITSET_CANONICAL: [u64; 51] = [
+        0b1111111111111111111111110000000000000000000000000011111111111111,
+        0b1111111111111111000000000000000000000000000000000000000000000000,
+        0b1111111111000000000000000000000000000000000000000000000000000000,
+        0b0000000000011111111111111111111111110000000000000000000000000000,
+        0b1111111111111111111111111111111100000000000000000000000000000000,
+        0b1111111111111111111111110000000000000000000000000000001111111111,
+        0b0101010101010101010101010101010101010101010101010101010000000001,
+        0b0000000000000000000000000000000000000000000000000000010000000000,
+        0b0000000000000000000000000000000000000000000000000000000000000000,
+        0b0000000000000000000000000000000000000000000000000010000010111111,
+        0b0000000000000000000000000000000000000000000000000101010101111010,
+        0b0000000000000000000000000000000000000000000011111111111111111111,
+        0b0000000000000000000000000000000000000000011111111111111111111111,
+        0b0000000000000000000000000000000000000101010101010101010101010101,
+        0b0000000000000000000000000000000001111111011111111111111111111111,
+        0b0000000000000000000000001111111111111111111111111111111111111111,
+        0b0000000000000000000011111111101111111111111111101101011101000000,
+        0b0000000000000000000101010101010101010101010101010101010101010101,
+        0b0000000000000000011111111111111111111111111111111111111111111111,
+        0b0000000000000000111111110000000010101010000000000011111100000000,
+        0b0000000000000000111111111111111100000000000000000000000000100000,
+        0b0000000000000100001010000000010101010101010101010101010101010101,
+        0b0000000000000111111111111111111111111111111111111111111111111111,
+        0b0000000000100000000000000000000000000000000000000000001011110100,
+        0b0000000000111111110111100110010011010000000000000000000000000011,
+        0b0000001111111111111111111111111100000000000000000000000000111111,
+        0b0000011111111111111111111111110000000000000000000000000000000001,
+        0b0000111100000000000000000000000000000000000000000000000000000000,
+        0b0000111100000000000111110000000000001111000000000000111100000000,
+        0b0001000110101110110100101101010110110001110110111100111011010110,
+        0b0010101101010101010101010101010101010101010101010101010010101010,
+        0b0101010101010100010101010101010000000000000000000000000000000000,
+        0b0101010101010101010101010101010100000000000000000000000000000000,
+        0b0101010101010101010101010101010101010101010101010010101010101011,
+        0b0101010101010101010101010101010101010101010101010101010101010101,
+        0b0101010101011111011111010101010101010101010001010010100001010101,
+        0b0101010111010010010101010101010101001010101010101010010010010000,
+        0b0110101000000000010101010101010101010101010101010101010101010101,
+        0b0110110000000101010101010101010101010101010101010101010101010101,
+        0b0111101100000000000000000000000000011111110111111110011110110000,
+        0b1000000001000101000000000000000000000000000000000000000000000000,
+        0b1010101001010101010101010101010101010101010101010101010101010101,
+        0b1100000000001111001111010101000000111110001001110011100010000100,
+        0b1100000000100101111010101001110100000000000000000000000000000000,
+        0b1110011010010000010101010101010101010101000111001000000000000000,
+        0b1110011111111111111111111111111111111111111111110000000000000000,
+        0b1111000000000000000000000000001111111111111111111111111100000000,
+        0b1111111100000000111111110000000000111111000000001111111100000000,
+        0b1111111111111110010101010101010101010101010101010101010101010101,
+        0b1111111111111111000000111111111111111111111111110000001111111111,
+        0b1111111111111111111100000000000000000000000000011111110001011111,
+    ];
+    static BITSET_MAPPING: [(u8, u8); 16] = [
+        (0, 179), (0, 130), (0, 134), (0, 147), (0, 12), (0, 8), (1, 16), (1, 128), (2, 10),
+        (2, 128), (3, 52), (3, 58), (4, 32), (5, 24), (6, 20), (7, 57),
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -548,22 +1068,30 @@ pub mod uppercase {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
 
 #[rustfmt::skip]
 pub mod white_space {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (32, 2);
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (32, 3);
     static BITSET_CHUNKS_MAP: [u8; 22] = [
-        3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1,
+        2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 6]; 4] = [
-        [0, 0, 0, 0, 0, 0], [0, 0, 5, 2, 0, 0], [1, 0, 0, 0, 0, 0], [4, 0, 3, 0, 0, 0],
+        [1, 1, 1, 1, 1, 1], [1, 1, 4, 0, 1, 1], [3, 1, 2, 1, 1, 1], [5, 1, 1, 1, 1, 1],
+    ];
+    static BITSET_CANONICAL: [u64; 5] = [
+        0b0000000000000000000000000000000010000000000000000000000000000000,
+        0b0000000000000000000000000000000000000000000000000000000000000000,
+        0b0000000000000000000000000000000100000000000000000000000000100000,
+        0b0000000000000000000000000000000100000000000000000011111000000000,
+        0b0000000000000000100000110000000000000000000000000000011111111111,
     ];
-    static BITSET: [u64; 6] = [
-        0, 1, 2147483648, 4294967328, 4294983168, 144036023240703,
+    static BITSET_MAPPING: [(u8, u8); 1] = [
+        (0, 33),
     ];
 
     pub fn lookup(c: char) -> bool {
@@ -572,7 +1100,8 @@ pub mod white_space {
             &BITSET_CHUNKS_MAP,
             BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
-            &BITSET,
+            &BITSET_CANONICAL,
+            &BITSET_MAPPING,
         )
     }
 }
diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs
index 39c288dfc61..5e8865fc9e3 100644
--- a/src/tools/unicode-table-generator/src/main.rs
+++ b/src/tools/unicode-table-generator/src/main.rs
@@ -254,12 +254,19 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
     s.push_str(
         "
 #[inline(always)]
-fn range_search<const N: usize, const CHUNK_SIZE: usize, const N1: usize, const N2: usize>(
+fn range_search<
+    const N: usize,
+    const CHUNK_SIZE: usize,
+    const N1: usize,
+    const CANONICAL: usize,
+    const CANONICALIZED: usize,
+>(
     needle: u32,
     chunk_idx_map: &[u8; N],
     (last_chunk_idx, last_chunk_mapping): (u16, u8),
     bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
-    bitset: &[u64; N2],
+    bitset_canonical: &[u64; CANONICAL],
+    bitset_canonicalized: &[(u8, u8); CANONICALIZED],
 ) -> bool {
     let bucket_idx = (needle / 64) as usize;
     let chunk_map_idx = bucket_idx / CHUNK_SIZE;
@@ -273,8 +280,21 @@ fn range_search<const N: usize, const CHUNK_SIZE: usize, const N1: usize, const
     } else {
         chunk_idx_map[chunk_map_idx]
     };
-    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
-    let word = bitset[(idx as usize)];
+    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
+    let word = if idx < CANONICAL {
+        bitset_canonical[idx]
+    } else {
+        let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
+        let mut word = bitset_canonical[real_idx as usize];
+        let should_invert = mapping & (1 << 7) != 0;
+        if should_invert {
+            word = !word;
+        }
+        // Unset the inversion bit
+        let rotate_by = mapping & !(1 << 7);
+        word = word.rotate_left(rotate_by as u32);
+        word
+    };
     (word & (1 << (needle % 64) as u64)) != 0
 }
     ",
diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs
index 5f66bcbebaf..38b36c34042 100644
--- a/src/tools/unicode-table-generator/src/raw_emitter.rs
+++ b/src/tools/unicode-table-generator/src/raw_emitter.rs
@@ -22,8 +22,9 @@
 //! mapping into two separate sets; currently this is not dealt with).
 //!
 //! With that scheme, we now have a single byte for every 64 codepoints. We
-//! further group these by 16 (arbitrarily chosen), and again deduplicate and
-//! store in an array (u8 -> [u8; 16]).
+//! further group these by some constant N (between 1 and 64 per group), and
+//! again deduplicate and store in an array (u8 -> [u8; N]). The constant is
+//! chosen to be optimal in bytes-in-memory for the given dataset.
 //!
 //! The indices into this array represent ranges of 64*16 = 1024 codepoints.
 //!
@@ -37,9 +38,9 @@
 //! down considerably.
 
 use crate::fmt_list;
-use std::collections::{BTreeSet, HashMap};
+use std::collections::{BTreeMap, BTreeSet, HashMap};
 use std::convert::TryFrom;
-use std::fmt::Write;
+use std::fmt::{self, Write};
 use std::ops::Range;
 
 #[derive(Clone)]
@@ -61,6 +62,10 @@ impl RawEmitter {
     }
 
     fn emit_bitset(&mut self, words: &[u64]) {
+        let mut words = words.to_vec();
+        // Ensure that there's a zero word in the dataset, used for padding and
+        // such.
+        words.push(0);
         let unique_words =
             words.iter().cloned().collect::<BTreeSet<_>>().into_iter().collect::<Vec<_>>();
         if unique_words.len() > u8::max_value() as usize {
@@ -68,13 +73,9 @@ impl RawEmitter {
         }
         // needed for the chunk mapping to work
         assert_eq!(unique_words[0], 0, "has a zero word");
+        let canonicalized = Canonicalized::canonicalize(&unique_words);
 
-        let word_indices = unique_words
-            .iter()
-            .cloned()
-            .enumerate()
-            .map(|(idx, word)| (word, u8::try_from(idx).unwrap()))
-            .collect::<HashMap<_, _>>();
+        let word_indices = canonicalized.unique_mapping.clone();
         let compressed_words = words.iter().map(|w| word_indices[w]).collect::<Vec<u8>>();
 
         let mut best = None;
@@ -91,14 +92,32 @@ impl RawEmitter {
         }
         self.emit_chunk_map(word_indices[&0], &compressed_words, best.unwrap().0);
 
+        struct Bits(u64);
+        impl fmt::Debug for Bits {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                write!(f, "0b{:064b}", self.0)
+            }
+        }
+
+        writeln!(
+            &mut self.file,
+            "static BITSET_CANONICAL: [u64; {}] = [{}];",
+            canonicalized.canonical_words.len(),
+            fmt_list(canonicalized.canonical_words.iter().map(|v| Bits(*v))),
+        )
+        .unwrap();
+        self.bytes_used += 8 * canonicalized.canonical_words.len();
         writeln!(
             &mut self.file,
-            "static BITSET: [u64; {}] = [{}];",
-            unique_words.len(),
-            fmt_list(&unique_words),
+            "static BITSET_MAPPING: [(u8, u8); {}] = [{}];",
+            canonicalized.canonicalized_words.len(),
+            fmt_list(&canonicalized.canonicalized_words),
         )
         .unwrap();
-        self.bytes_used += 8 * unique_words.len();
+        // 8 bit index into shifted words, 7 bits for shift + optional flip
+        // We only need it for the words that we removed by applying a shift and
+        // flip to them.
+        self.bytes_used += 2 * canonicalized.canonicalized_words.len();
     }
 
     fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
@@ -170,7 +189,8 @@ impl RawEmitter {
         writeln!(&mut self.file, "        &BITSET_CHUNKS_MAP,").unwrap();
         writeln!(&mut self.file, "        BITSET_LAST_CHUNK_MAP,").unwrap();
         writeln!(&mut self.file, "        &BITSET_INDEX_CHUNKS,").unwrap();
-        writeln!(&mut self.file, "        &BITSET,").unwrap();
+        writeln!(&mut self.file, "        &BITSET_CANONICAL,").unwrap();
+        writeln!(&mut self.file, "        &BITSET_MAPPING,").unwrap();
         writeln!(&mut self.file, "    )").unwrap();
         writeln!(&mut self.file, "}}").unwrap();
     }
@@ -196,3 +216,193 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
     emitter.blank_line();
     emitter.emit_lookup();
 }
+
+struct Canonicalized {
+    canonical_words: Vec<u64>,
+    canonicalized_words: Vec<(u8, u8)>,
+
+    /// Maps an input unique word to the associated index (u8) which is into
+    /// canonical_words or canonicalized_words (in order).
+    unique_mapping: HashMap<u64, u8>,
+}
+
+impl Canonicalized {
+    fn canonicalize(unique_words: &[u64]) -> Self {
+        #[derive(Copy, Clone, Debug)]
+        enum Mapping {
+            Rotate(u32),
+            Invert,
+            RotateAndInvert(u32),
+        }
+
+        // key is the word being mapped to
+        let mut mappings: BTreeMap<u64, Vec<(u64, Mapping)>> = BTreeMap::new();
+        for &a in unique_words {
+            'b: for &b in unique_words {
+                // skip self
+                if a == b {
+                    continue;
+                }
+
+                // All possible distinct rotations
+                for rotation in 1..64 {
+                    if a.rotate_right(rotation) == b {
+                        mappings.entry(b).or_default().push((a, Mapping::Rotate(rotation)));
+                        // We're not interested in further mappings between a and b
+                        continue 'b;
+                    }
+                }
+
+                if (!a) == b {
+                    mappings.entry(b).or_default().push((a, Mapping::Invert));
+                    // We're not interested in further mappings between a and b
+                    continue 'b;
+                }
+
+                // All possible distinct rotations, inverted
+                for rotation in 1..64 {
+                    if (!a.rotate_right(rotation)) == b {
+                        mappings
+                            .entry(b)
+                            .or_default()
+                            .push((a, Mapping::RotateAndInvert(rotation)));
+                        // We're not interested in further mappings between a and b
+                        continue 'b;
+                    }
+                }
+            }
+        }
+        // These are the bitset words which will be represented "raw" (as a u64)
+        let mut canonical_words = Vec::new();
+        // These are mapped words, which will be represented by an index into
+        // the canonical_words and a Mapping; u16 when encoded.
+        let mut canonicalized_words = Vec::new();
+        let mut unique_mapping = HashMap::new();
+
+        #[derive(Debug, PartialEq, Eq)]
+        enum UniqueMapping {
+            Canonical(usize),
+            Canonicalized(usize),
+        }
+
+        while let Some((&to, _)) = mappings.iter().max_by_key(|m| m.1.len()) {
+            // Get the mapping with the most entries. Currently, no mapping can
+            // only exist transitively (i.e., there is no A, B, C such that A
+            // does not map to C and but A maps to B maps to C), so this is
+            // guaranteed to be acceptable.
+            //
+            // In the future, we may need a more sophisticated algorithm to
+            // identify which keys to prefer as canonical.
+            let mapped_from = mappings.remove(&to).unwrap();
+            for (from, how) in &mapped_from {
+                // Remove the entries which mapped to this one.
+                // Noting that it should be associated with the Nth canonical word.
+                //
+                // We do not assert that this is present, because there may be
+                // no mappings to the `from` word; that's fine.
+                mappings.remove(from);
+                assert_eq!(
+                    unique_mapping
+                        .insert(*from, UniqueMapping::Canonicalized(canonicalized_words.len())),
+                    None
+                );
+                canonicalized_words.push((canonical_words.len(), *how));
+
+                // Remove the now-canonicalized word from other mappings,
+                // to ensure that we deprioritize them in the next iteration of
+                // the while loop.
+                for (_, mapped) in &mut mappings {
+                    let mut i = 0;
+                    while i != mapped.len() {
+                        if mapped[i].0 == *from {
+                            mapped.remove(i);
+                        } else {
+                            i += 1;
+                        }
+                    }
+                }
+            }
+            assert!(
+                unique_mapping
+                    .insert(to, UniqueMapping::Canonical(canonical_words.len()))
+                    .is_none()
+            );
+            canonical_words.push(to);
+
+            // Remove the now-canonical word from other mappings, to ensure that
+            // we deprioritize them in the next iteration of the while loop.
+            for (_, mapped) in &mut mappings {
+                let mut i = 0;
+                while i != mapped.len() {
+                    if mapped[i].0 == to {
+                        mapped.remove(i);
+                    } else {
+                        i += 1;
+                    }
+                }
+            }
+        }
+
+        // Any words which we couldn't shrink, just stick into the canonical
+        // words.
+        //
+        // FIXME: work harder -- there are more possibilities for mapping
+        // functions (e.g., multiplication, shifting instead of rotation, etc.)
+        // We'll probably always have some slack though so this loop will still
+        // be needed.
+        for &w in unique_words {
+            if !unique_mapping.contains_key(&w) {
+                assert!(
+                    unique_mapping
+                        .insert(w, UniqueMapping::Canonical(canonical_words.len()))
+                        .is_none()
+                );
+                canonical_words.push(w);
+            }
+        }
+        assert_eq!(canonicalized_words.len() + canonical_words.len(), unique_words.len());
+        assert_eq!(unique_mapping.len(), unique_words.len());
+
+        let unique_mapping = unique_mapping
+            .into_iter()
+            .map(|(key, value)| {
+                (
+                    key,
+                    match value {
+                        UniqueMapping::Canonicalized(idx) => {
+                            u8::try_from(canonical_words.len() + idx).unwrap()
+                        }
+                        UniqueMapping::Canonical(idx) => u8::try_from(idx).unwrap(),
+                    },
+                )
+            })
+            .collect::<HashMap<_, _>>();
+
+        let mut distinct_indices = BTreeSet::new();
+        for &w in unique_words {
+            let idx = unique_mapping.get(&w).unwrap();
+            assert!(distinct_indices.insert(idx));
+        }
+
+        let canonicalized_words = canonicalized_words
+            .into_iter()
+            .map(|v| {
+                (
+                    u8::try_from(v.0).unwrap(),
+                    match v.1 {
+                        Mapping::RotateAndInvert(amount) => {
+                            assert!(amount < (1 << 7));
+                            1 << 7 | (amount as u8)
+                        }
+                        Mapping::Rotate(amount) => {
+                            assert!(amount < (1 << 7));
+                            amount as u8
+                        }
+                        Mapping::Invert => 1 << 7,
+                    },
+                )
+            })
+            .collect::<Vec<(u8, u8)>>();
+        Canonicalized { unique_mapping, canonical_words, canonicalized_words }
+    }
+}