about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/libcore/unicode/unicode_data.rs22
-rw-r--r--src/tools/unicode-table-generator/src/range_search.rs9
-rw-r--r--src/tools/unicode-table-generator/src/raw_emitter.rs24
3 files changed, 9 insertions, 46 deletions
diff --git a/src/libcore/unicode/unicode_data.rs b/src/libcore/unicode/unicode_data.rs
index 72ea8ce0381..48caa21fb0a 100644
--- a/src/libcore/unicode/unicode_data.rs
+++ b/src/libcore/unicode/unicode_data.rs
@@ -10,7 +10,6 @@ fn bitset_search<
 >(
     needle: u32,
     chunk_idx_map: &[u8; N],
-    last_chunk_idx: u16,
     bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
     bitset_canonical: &[u64; CANONICAL],
     bitset_canonicalized: &[(u8, u8); CANONICALIZED],
@@ -18,12 +17,8 @@ fn bitset_search<
     let bucket_idx = (needle / 64) as usize;
     let chunk_map_idx = bucket_idx / CHUNK_SIZE;
     let chunk_piece = bucket_idx % CHUNK_SIZE;
-    // The last entry of `chunk_idx_map` actually should be at `last_chunk_idx`,
-    // so we need to remap it
-    let chunk_idx = if chunk_map_idx < (chunk_idx_map.len() - 1) {
-        chunk_idx_map[chunk_map_idx]
-    } else if chunk_map_idx == last_chunk_idx as usize {
-        chunk_idx_map[chunk_idx_map.len() - 1]
+    let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
+        v
     } else {
         return false;
     };
@@ -317,12 +312,12 @@ pub mod grapheme_extend {
 
 #[rustfmt::skip]
 pub mod lowercase {
-    const BITSET_LAST_CHUNK_MAP: u16 = 122;
-    static BITSET_CHUNKS_MAP: [u8; 119] = [
+    static BITSET_CHUNKS_MAP: [u8; 123] = [
         13, 16, 0, 0, 8, 0, 0, 11, 12, 9, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 3, 1, 0, 14, 0, 7, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 6,
+        0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0,
+        0, 0, 6,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 16]; 18] = [
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
@@ -408,7 +403,6 @@ pub mod lowercase {
         super::bitset_search(
             c as u32,
             &BITSET_CHUNKS_MAP,
-            BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
             &BITSET_CANONICAL,
             &BITSET_MAPPING,
@@ -449,13 +443,12 @@ pub mod n {
 
 #[rustfmt::skip]
 pub mod uppercase {
-    const BITSET_LAST_CHUNK_MAP: u16 = 124;
-    static BITSET_CHUNKS_MAP: [u8; 124] = [
+    static BITSET_CHUNKS_MAP: [u8; 125] = [
         12, 15, 5, 5, 0, 5, 5, 2, 4, 11, 5, 14, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
         5, 5, 5, 6, 5, 13, 5, 10, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
         5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 16, 5, 5,
-        5, 5, 9, 3,
+        5, 5, 9, 5, 3,
     ];
     static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
         [41, 41, 5, 33, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 5, 0],
@@ -529,7 +522,6 @@ pub mod uppercase {
         super::bitset_search(
             c as u32,
             &BITSET_CHUNKS_MAP,
-            BITSET_LAST_CHUNK_MAP,
             &BITSET_INDEX_CHUNKS,
             &BITSET_CANONICAL,
             &BITSET_MAPPING,
diff --git a/src/tools/unicode-table-generator/src/range_search.rs b/src/tools/unicode-table-generator/src/range_search.rs
index 49e65521c98..39b47ce703f 100644
--- a/src/tools/unicode-table-generator/src/range_search.rs
+++ b/src/tools/unicode-table-generator/src/range_search.rs
@@ -8,7 +8,6 @@ fn bitset_search<
 >(
     needle: u32,
     chunk_idx_map: &[u8; N],
-    last_chunk_idx: u16,
     bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
     bitset_canonical: &[u64; CANONICAL],
     bitset_canonicalized: &[(u8, u8); CANONICALIZED],
@@ -16,12 +15,8 @@ fn bitset_search<
     let bucket_idx = (needle / 64) as usize;
     let chunk_map_idx = bucket_idx / CHUNK_SIZE;
     let chunk_piece = bucket_idx % CHUNK_SIZE;
-    // The last entry of `chunk_idx_map` actually should be at `last_chunk_idx`,
-    // so we need to remap it
-    let chunk_idx = if chunk_map_idx < (chunk_idx_map.len() - 1) {
-        chunk_idx_map[chunk_map_idx]
-    } else if chunk_map_idx == last_chunk_idx as usize {
-        chunk_idx_map[chunk_idx_map.len() - 1]
+    let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
+        v
     } else {
         return false;
     };
diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs
index db9d04b3fa9..dd0746cf695 100644
--- a/src/tools/unicode-table-generator/src/raw_emitter.rs
+++ b/src/tools/unicode-table-generator/src/raw_emitter.rs
@@ -139,7 +139,6 @@ impl RawEmitter {
         writeln!(&mut self.file, "    super::bitset_search(",).unwrap();
         writeln!(&mut self.file, "        c as u32,").unwrap();
         writeln!(&mut self.file, "        &BITSET_CHUNKS_MAP,").unwrap();
-        writeln!(&mut self.file, "        BITSET_LAST_CHUNK_MAP,").unwrap();
         writeln!(&mut self.file, "        &BITSET_INDEX_CHUNKS,").unwrap();
         writeln!(&mut self.file, "        &BITSET_CANONICAL,").unwrap();
         writeln!(&mut self.file, "        &BITSET_MAPPING,").unwrap();
@@ -170,29 +169,6 @@ impl RawEmitter {
             chunk_indices.push(chunk_map[chunk]);
         }
 
-        // If one of the chunks has all of the entries point to the bitset
-        // word filled with zeros, then pop those off the end -- we know they
-        // are useless.
-        let zero_chunk_idx = chunks.iter().position(|chunk| chunk.iter().all(|e| *e == zero_at));
-        while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
-            chunk_indices.pop();
-        }
-        // We do not count the LAST_CHUNK_MAP as adding bytes because it's a
-        // small constant whose values are inlined directly into the instruction
-        // stream.
-        writeln!(
-            &mut self.file,
-            "const BITSET_LAST_CHUNK_MAP: u16 = {};",
-            chunk_indices.len() - 1,
-        )
-        .unwrap();
-        let nonzero = chunk_indices.pop().unwrap();
-        // Try to pop again, now that we've recorded a non-zero pointing index
-        // into the LAST_CHUNK_MAP.
-        while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
-            chunk_indices.pop();
-        }
-        chunk_indices.push(nonzero);
         writeln!(
             &mut self.file,
             "static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",