about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMark Rousskov <mark.simulacrum@gmail.com>2020-03-20 18:38:08 -0400
committerMark Rousskov <mark.simulacrum@gmail.com>2020-03-20 18:38:08 -0400
commit6c7691a37bf485b28fecb6856e6ede8fa952f99e (patch)
treecd2e5a10ff2983623e49ca808f44c273b8992c36
parent580a6342ef9d435d241b74e86b99dc1131a526f8 (diff)
downloadrust-6c7691a37bf485b28fecb6856e6ede8fa952f99e.tar.gz
rust-6c7691a37bf485b28fecb6856e6ede8fa952f99e.zip
Pre-pop zero chunks before mapping LAST_CHUNK_MAP
This avoids wasting a small amount of space for some of the data sets.

The chunk resizing is caused by but not directly related to changes in this
commit.

Alphabetic     : 3036 bytes
Case_Ignorable : 2133 bytes    (- 3 bytes)
Cased          : 934 bytes
Cc             : 32 bytes
Grapheme_Extend: 1760 bytes    (-14 bytes)
Lowercase      : 985 bytes
N              : 1220 bytes    (- 5 bytes)
Uppercase      : 934 bytes
White_Space    : 97 bytes
Total table sizes: 11131 bytes (-22 bytes)
-rw-r--r--src/libcore/unicode/unicode_data.rs160
-rw-r--r--src/tools/unicode-table-generator/src/raw_emitter.rs24
2 files changed, 88 insertions, 96 deletions
diff --git a/src/libcore/unicode/unicode_data.rs b/src/libcore/unicode/unicode_data.rs
index a89f3481f49..7a72f080e33 100644
--- a/src/libcore/unicode/unicode_data.rs
+++ b/src/libcore/unicode/unicode_data.rs
@@ -134,49 +134,41 @@ pub mod alphabetic {
 
 #[rustfmt::skip]
 pub mod case_ignorable {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (896, 33);
-    static BITSET_CHUNKS_MAP: [u8; 125] = [
-        25, 14, 21, 30, 28, 4, 17, 23, 22, 0, 0, 16, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 13, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 3, 6, 9, 0, 7, 11, 32, 31, 26, 29, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0,
-        10, 0, 8, 0, 19, 0, 12, 0, 1,
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 51);
+    static BITSET_CHUNKS_MAP: [u8; 250] = [
+        36, 19, 16, 26, 29, 40, 47, 38, 42, 5, 0, 9, 23, 25, 34, 3, 30, 0, 0, 0, 0, 0, 21, 31, 39,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 15, 22, 28,
+        33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 32, 1, 11, 0, 0, 0, 44, 8, 18, 50, 41, 49, 45, 37, 43,
+        46, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        6, 20, 0, 0, 0, 48, 0, 0, 27, 12, 0, 0, 10, 0, 0, 0, 0, 2,
     ];
-    static BITSET_INDEX_CHUNKS: [[u8; 16]; 34] = [
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 166],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 47, 57],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 173, 3],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 90, 136, 38],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 104, 7, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 78, 27, 0, 148, 138, 81, 44, 119],
-        [0, 0, 0, 0, 0, 0, 0, 0, 154, 0, 0, 58, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 167, 99, 77, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 48, 0, 116, 0, 0],
-        [0, 0, 0, 0, 0, 172, 70, 0, 0, 8, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 60, 0, 0, 0, 0, 0, 67, 0, 0, 24, 0, 0],
-        [0, 0, 0, 29, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 135, 0, 0, 0, 0, 16, 162, 46, 86, 51, 80, 13, 111],
-        [0, 0, 12, 0, 0, 43, 163, 92, 35, 82, 0, 71, 175, 14, 83, 131],
-        [0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 133, 0, 87, 0, 150, 0, 178, 75, 0, 0, 0, 0, 0, 0, 0],
-        [20, 5, 61, 0, 120, 0, 0, 0, 32, 156, 176, 1, 126, 91, 69, 88],
-        [26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [62, 0, 0, 0, 137, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0],
-        [66, 0, 0, 152, 72, 25, 134, 59, 102, 124, 165, 101, 0, 64, 0, 68],
-        [73, 33, 0, 181, 125, 85, 122, 139, 123, 100, 123, 169, 155, 54, 4, 18],
-        [74, 151, 36, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [106, 135, 0, 112, 177, 107, 180, 168, 0, 0, 0, 0, 0, 0, 157, 142],
-        [109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [113, 50, 108, 0, 0, 0, 0, 0, 0, 0, 174, 182, 182, 114, 10, 0],
-        [115, 0, 0, 0, 141, 5, 0, 49, 145, 34, 31, 0, 0, 0, 0, 0],
-        [118, 0, 42, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [143, 95, 37, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0],
-        [161, 0, 103, 0, 160, 11, 30, 0, 0, 0, 0, 93, 0, 0, 0, 0],
-        [164, 55, 155, 53, 127, 52, 2, 28, 117, 21, 128, 19, 110, 147, 129, 9],
-        [170, 41, 153, 6, 0, 0, 159, 39, 158, 1, 105, 0, 65, 0, 0, 0],
-        [171, 149, 132, 17, 98, 89, 146, 23, 140, 0, 0, 63, 127, 97, 0, 0],
-        [179, 182, 0, 0, 182, 182, 182, 79, 0, 0, 0, 0, 0, 0, 0, 0],
+    static BITSET_INDEX_CHUNKS: [[u8; 8]; 52] = [
+        [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 130], [0, 0, 0, 0, 0, 0, 0, 166],
+        [0, 0, 0, 0, 0, 0, 157, 142], [0, 0, 0, 0, 0, 22, 47, 57], [0, 0, 0, 0, 0, 45, 0, 0],
+        [0, 0, 0, 0, 0, 172, 70, 0], [0, 0, 0, 0, 40, 0, 173, 3], [0, 0, 0, 0, 60, 0, 0, 0],
+        [0, 0, 0, 0, 94, 90, 136, 38], [0, 0, 0, 29, 0, 15, 0, 0], [0, 0, 0, 48, 0, 116, 0, 0],
+        [0, 0, 0, 76, 0, 0, 0, 0], [0, 0, 0, 93, 0, 0, 0, 0], [0, 0, 0, 96, 104, 7, 0, 0],
+        [0, 0, 0, 135, 0, 0, 0, 0], [0, 0, 12, 0, 0, 43, 163, 92], [0, 0, 56, 0, 0, 0, 0, 0],
+        [0, 0, 67, 0, 0, 24, 0, 0], [0, 0, 174, 182, 182, 114, 10, 0], [0, 8, 0, 0, 0, 0, 0, 0],
+        [0, 133, 0, 87, 0, 150, 0, 178], [16, 162, 46, 86, 51, 80, 13, 111],
+        [20, 5, 61, 0, 120, 0, 0, 0], [26, 0, 0, 0, 0, 0, 0, 0], [32, 156, 176, 1, 126, 91, 69, 88],
+        [35, 82, 0, 71, 175, 14, 83, 131], [62, 0, 0, 0, 137, 0, 0, 0],
+        [66, 0, 0, 152, 72, 25, 134, 59], [73, 33, 0, 181, 125, 85, 122, 139],
+        [74, 151, 36, 84, 0, 0, 0, 0], [75, 0, 0, 0, 0, 0, 0, 0],
+        [78, 27, 0, 148, 138, 81, 44, 119], [102, 124, 165, 101, 0, 64, 0, 68],
+        [106, 135, 0, 112, 177, 107, 180, 168], [109, 0, 0, 0, 0, 0, 0, 0],
+        [113, 50, 108, 0, 0, 0, 0, 0], [115, 0, 0, 0, 141, 5, 0, 49],
+        [117, 21, 128, 19, 110, 147, 129, 9], [118, 0, 42, 144, 0, 0, 0, 0],
+        [123, 100, 123, 169, 155, 54, 4, 18], [140, 0, 0, 63, 127, 97, 0, 0],
+        [143, 95, 37, 121, 0, 0, 0, 0], [145, 34, 31, 0, 0, 0, 0, 0], [154, 0, 0, 58, 0, 0, 0, 0],
+        [158, 1, 105, 0, 65, 0, 0, 0], [161, 0, 103, 0, 160, 11, 30, 0],
+        [164, 55, 155, 53, 127, 52, 2, 28], [167, 99, 77, 0, 0, 0, 0, 0],
+        [170, 41, 153, 6, 0, 0, 159, 39], [171, 149, 132, 17, 98, 89, 146, 23],
+        [179, 182, 0, 0, 182, 182, 182, 79],
     ];
     static BITSET: [u64; 183] = [
         0, 1, 2, 3, 4, 8, 13, 15, 28, 64, 176, 191, 1016, 1792, 2047, 4080, 4096, 8192, 8193,
@@ -288,11 +280,12 @@ pub mod cased {
 
 #[rustfmt::skip]
 pub mod cc {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (0, 0);
-    static BITSET_CHUNKS_MAP: [u8; 0] = [
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (2, 1);
+    static BITSET_CHUNKS_MAP: [u8; 2] = [
+        1, 2,
     ];
-    static BITSET_INDEX_CHUNKS: [[u8; 5]; 1] = [
-        [1, 2, 1, 0, 0],
+    static BITSET_INDEX_CHUNKS: [[u8; 1]; 3] = [
+        [0], [1], [2],
     ];
     static BITSET: [u64; 3] = [
         0, 4294967295, 9223372036854775808,
@@ -311,46 +304,37 @@ pub mod cc {
 
 #[rustfmt::skip]
 pub mod grapheme_extend {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (896, 30);
-    static BITSET_CHUNKS_MAP: [u8; 123] = [
-        4, 15, 21, 27, 25, 3, 18, 23, 17, 0, 0, 14, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 2, 7, 10, 0, 8, 12, 29, 28, 24, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0,
-        11, 0, 9, 0, 19, 0, 13,
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 44);
+    static BITSET_CHUNKS_MAP: [u8; 245] = [
+        0, 8, 15, 22, 26, 33, 40, 32, 35, 3, 0, 7, 21, 23, 30, 0, 20, 0, 0, 0, 0, 0, 12, 0, 27, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 25, 29, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 5, 0, 28, 1, 10, 0, 0, 0, 37, 6, 17, 43, 34, 42, 38, 31, 36, 39, 13, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 14, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 18, 0, 0,
+        0, 41, 0, 0, 24, 11, 0, 0, 9,
     ];
-    static BITSET_INDEX_CHUNKS: [[u8; 16]; 31] = [
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 20, 46],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 74, 106, 31],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 143, 66, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, 87, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 107, 37, 70, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 37, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 0, 48, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 0, 134, 82, 64, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 39, 0, 94, 0, 0],
-        [0, 0, 0, 0, 0, 133, 58, 0, 0, 5, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 55, 0, 0, 18, 0, 0],
-        [0, 0, 0, 21, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 71, 0, 118, 0, 142, 0, 0, 0, 0, 0, 0, 0, 0],
-        [0, 0, 9, 0, 0, 0, 129, 7, 26, 67, 0, 59, 140, 11, 68, 104],
-        [0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [12, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [13, 0, 50, 0, 96, 0, 0, 0, 27, 123, 139, 1, 100, 75, 57, 72],
-        [51, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0],
-        [54, 0, 0, 120, 61, 19, 105, 47, 85, 98, 131, 84, 0, 0, 0, 56],
-        [60, 28, 0, 141, 99, 45, 111, 109, 97, 83, 97, 136, 132, 44, 108, 22],
-        [63, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-        [89, 0, 0, 91, 0, 0, 0, 135, 0, 0, 0, 0, 0, 0, 0, 0],
-        [93, 0, 0, 0, 113, 3, 0, 40, 115, 29, 24, 0, 0, 0, 0, 0],
-        [114, 78, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0],
-        [128, 0, 86, 0, 127, 8, 23, 0, 0, 0, 0, 76, 0, 0, 0, 0],
-        [130, 42, 122, 41, 112, 43, 2, 36, 95, 15, 101, 14, 90, 117, 102, 6],
-        [137, 34, 124, 4, 0, 0, 126, 32, 125, 1, 88, 0, 53, 0, 0, 0],
-        [138, 119, 92, 0, 81, 73, 116, 17, 110, 0, 0, 52, 112, 80, 0, 0],
-        [142, 143, 0, 0, 143, 143, 143, 66, 0, 0, 0, 0, 0, 0, 0, 0],
+    static BITSET_INDEX_CHUNKS: [[u8; 8]; 45] = [
+        [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 103], [0, 0, 0, 0, 0, 16, 20, 46],
+        [0, 0, 0, 0, 0, 38, 0, 0], [0, 0, 0, 0, 0, 133, 58, 0], [0, 0, 0, 0, 33, 0, 0, 0],
+        [0, 0, 0, 0, 49, 0, 0, 0], [0, 0, 0, 0, 77, 74, 106, 31], [0, 0, 0, 0, 143, 66, 0, 0],
+        [0, 0, 0, 21, 0, 10, 0, 0], [0, 0, 0, 39, 0, 94, 0, 0], [0, 0, 0, 62, 0, 0, 0, 0],
+        [0, 0, 0, 71, 0, 118, 0, 142], [0, 0, 0, 76, 0, 0, 0, 0], [0, 0, 0, 79, 87, 0, 0, 0],
+        [0, 0, 9, 0, 0, 0, 129, 7], [0, 0, 35, 0, 0, 0, 0, 0], [0, 0, 55, 0, 0, 18, 0, 0],
+        [0, 5, 0, 0, 0, 0, 0, 0], [0, 107, 37, 70, 0, 0, 0, 0], [12, 0, 0, 69, 0, 0, 0, 0],
+        [13, 0, 50, 0, 96, 0, 0, 0], [26, 67, 0, 59, 140, 11, 68, 104],
+        [27, 123, 139, 1, 100, 75, 57, 72], [51, 0, 0, 0, 87, 0, 0, 0],
+        [54, 0, 0, 120, 61, 19, 105, 47], [60, 28, 0, 141, 99, 45, 111, 109],
+        [63, 0, 25, 0, 0, 0, 0, 0], [65, 0, 0, 0, 0, 0, 37, 0], [85, 98, 131, 84, 0, 0, 0, 56],
+        [89, 0, 0, 91, 0, 0, 0, 135], [93, 0, 0, 0, 113, 3, 0, 40],
+        [95, 15, 101, 14, 90, 117, 102, 6], [97, 83, 97, 136, 132, 44, 108, 22],
+        [110, 0, 0, 52, 112, 80, 0, 0], [114, 78, 30, 0, 0, 0, 0, 0], [115, 29, 24, 0, 0, 0, 0, 0],
+        [121, 0, 0, 48, 0, 0, 0, 0], [125, 1, 88, 0, 53, 0, 0, 0], [128, 0, 86, 0, 127, 8, 23, 0],
+        [130, 42, 122, 41, 112, 43, 2, 36], [134, 82, 64, 0, 0, 0, 0, 0],
+        [137, 34, 124, 4, 0, 0, 126, 32], [138, 119, 92, 0, 81, 73, 116, 17],
+        [142, 143, 0, 0, 143, 143, 143, 66],
     ];
     static BITSET: [u64; 144] = [
         0, 1, 2, 8, 13, 28, 64, 182, 191, 1016, 2032, 2047, 4096, 14336, 16128, 32640, 32768,
@@ -454,8 +438,8 @@ pub mod lowercase {
 
 #[rustfmt::skip]
 pub mod n {
-    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (254, 0);
-    static BITSET_CHUNKS_MAP: [u8; 254] = [
+    static BITSET_LAST_CHUNK_MAP: (u16, u8) = (253, 2);
+    static BITSET_CHUNKS_MAP: [u8; 249] = [
         44, 0, 0, 29, 5, 31, 35, 26, 22, 6, 0, 12, 40, 20, 27, 0, 33, 0, 39, 7, 0, 0, 17, 0, 45,
         42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 43,
@@ -464,7 +448,7 @@ pub mod n {
         30, 1, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        14, 0, 3, 0, 0, 0, 0, 4, 15, 0, 0, 11, 0, 38, 0, 8, 0, 0, 0, 0, 2,
+        14, 0, 3, 0, 0, 0, 0, 4, 15, 0, 0, 11, 0, 38, 0, 8,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 8]; 47] = [
         [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 11], [0, 0, 0, 0, 0, 0, 0, 47],
diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs
index 5d4a4c0e044..5f66bcbebaf 100644
--- a/src/tools/unicode-table-generator/src/raw_emitter.rs
+++ b/src/tools/unicode-table-generator/src/raw_emitter.rs
@@ -67,7 +67,7 @@ impl RawEmitter {
             panic!("cannot pack {} into 8 bits", unique_words.len());
         }
         // needed for the chunk mapping to work
-        assert_eq!(unique_words[0], 0, "first word is all zeros");
+        assert_eq!(unique_words[0], 0, "has a zero word");
 
         let word_indices = unique_words
             .iter()
@@ -80,7 +80,7 @@ impl RawEmitter {
         let mut best = None;
         for length in 1..=64 {
             let mut temp = self.clone();
-            temp.emit_chunk_map(&compressed_words, length);
+            temp.emit_chunk_map(word_indices[&0], &compressed_words, length);
             if let Some((_, size)) = best {
                 if temp.bytes_used < size {
                     best = Some((length, temp.bytes_used));
@@ -89,7 +89,7 @@ impl RawEmitter {
                 best = Some((length, temp.bytes_used));
             }
         }
-        self.emit_chunk_map(&compressed_words, best.unwrap().0);
+        self.emit_chunk_map(word_indices[&0], &compressed_words, best.unwrap().0);
 
         writeln!(
             &mut self.file,
@@ -101,12 +101,12 @@ impl RawEmitter {
         self.bytes_used += 8 * unique_words.len();
     }
 
-    fn emit_chunk_map(&mut self, compressed_words: &[u8], chunk_length: usize) {
+    fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
         let mut compressed_words = compressed_words.to_vec();
         for _ in 0..(chunk_length - (compressed_words.len() % chunk_length)) {
             // pad out bitset index with zero words so we have all chunks of
             // chunkchunk_length
-            compressed_words.push(0);
+            compressed_words.push(zero_at);
         }
 
         let mut chunks = BTreeSet::new();
@@ -123,6 +123,14 @@ impl RawEmitter {
         for chunk in compressed_words.chunks(chunk_length) {
             chunk_indices.push(chunk_map[chunk]);
         }
+
+        // If one of the chunks has all of the entries point to the bitset
+        // word filled with zeros, then pop those off the end -- we know they
+        // are useless.
+        let zero_chunk_idx = chunks.iter().position(|chunk| chunk.iter().all(|e| *e == zero_at));
+        while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
+            chunk_indices.pop();
+        }
         writeln!(
             &mut self.file,
             "static BITSET_LAST_CHUNK_MAP: (u16, u8) = ({}, {});",
@@ -131,9 +139,9 @@ impl RawEmitter {
         )
         .unwrap();
         self.bytes_used += 3;
-        // Strip out the empty pieces, presuming our above pop() made us now
-        // have some trailing zeros.
-        while let Some(0) = chunk_indices.last() {
+        // Try to pop again, now that we've recorded a non-zero pointing index
+        // into the LAST_CHUNK_MAP.
+        while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
             chunk_indices.pop();
         }
         writeln!(