diff options
| author | Mark Rousskov <mark.simulacrum@gmail.com> | 2020-03-21 12:20:18 -0400 |
|---|---|---|
| committer | Mark Rousskov <mark.simulacrum@gmail.com> | 2020-03-21 15:21:31 -0400 |
| commit | 5f71d98f90354f9ee67c2b77c8607fbc9169d63e (patch) | |
| tree | 0482b32e9a4ffadb13873905794c65050931ec3b /src/libcore | |
| parent | 7b29b70d6ea52e9324f9328bed9beb6cf516c1ce (diff) | |
| download | rust-5f71d98f90354f9ee67c2b77c8607fbc9169d63e.tar.gz rust-5f71d98f90354f9ee67c2b77c8607fbc9169d63e.zip | |
Deduplicate test and primary range_search definitions
This ensures that what we test is what we get for final results as well.
Diffstat (limited to 'src/libcore')
| -rw-r--r-- | src/libcore/unicode/mod.rs | 45 | ||||
| -rw-r--r-- | src/libcore/unicode/unicode_data.rs | 51 |
2 files changed, 50 insertions, 46 deletions
diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 39532166a0b..94a2507e26c 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -32,48 +32,3 @@ pub use unicode_data::lowercase::lookup as Lowercase; pub use unicode_data::n::lookup as N; pub use unicode_data::uppercase::lookup as Uppercase; pub use unicode_data::white_space::lookup as White_Space; - -#[inline(always)] -fn range_search< - const N: usize, - const CHUNK_SIZE: usize, - const N1: usize, - const CANONICAL: usize, - const CANONICALIZED: usize, ->( - needle: u32, - chunk_idx_map: &[u8; N], - (last_chunk_idx, last_chunk_mapping): (u16, u8), - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], - bitset_canonical: &[u64; CANONICAL], - bitset_canonicalized: &[(u8, u8); CANONICALIZED], -) -> bool { - let bucket_idx = (needle / 64) as usize; - let chunk_map_idx = bucket_idx / CHUNK_SIZE; - let chunk_piece = bucket_idx % CHUNK_SIZE; - let chunk_idx = if chunk_map_idx >= N { - if chunk_map_idx == last_chunk_idx as usize { - last_chunk_mapping - } else { - return false; - } - } else { - chunk_idx_map[chunk_map_idx] - }; - let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; - let word = if idx < CANONICAL { - bitset_canonical[idx] - } else { - let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; - let mut word = bitset_canonical[real_idx as usize]; - let should_invert = mapping & (1 << 6) != 0; - if should_invert { - word = !word; - } - // Unset the inversion bit - let rotate_by = mapping & !(1 << 6); - word = word.rotate_left(rotate_by as u32); - word - }; - (word & (1 << (needle % 64) as u64)) != 0 -} diff --git a/src/libcore/unicode/unicode_data.rs b/src/libcore/unicode/unicode_data.rs index bae6d8ea953..5b1efbaa28f 100644 --- a/src/libcore/unicode/unicode_data.rs +++ b/src/libcore/unicode/unicode_data.rs @@ -1,5 +1,54 @@ ///! This file is generated by src/tools/unicode-table-generator; do not edit manually! -use super::range_search; + +#[inline(always)] +fn range_search< + const N: usize, + const CHUNK_SIZE: usize, + const N1: usize, + const CANONICAL: usize, + const CANONICALIZED: usize, +>( + needle: u32, + chunk_idx_map: &[u8; N], + (last_chunk_idx, last_chunk_mapping): (u16, u8), + bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], + bitset_canonical: &[u64; CANONICAL], + bitset_canonicalized: &[(u8, u8); CANONICALIZED], +) -> bool { + let bucket_idx = (needle / 64) as usize; + let chunk_map_idx = bucket_idx / CHUNK_SIZE; + let chunk_piece = bucket_idx % CHUNK_SIZE; + let chunk_idx = if chunk_map_idx >= N { + if chunk_map_idx == last_chunk_idx as usize { + last_chunk_mapping + } else { + return false; + } + } else { + chunk_idx_map[chunk_map_idx] + }; + let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; + let word = if idx < CANONICAL { + bitset_canonical[idx] + } else { + let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; + let mut word = bitset_canonical[real_idx as usize]; + let should_invert = mapping & (1 << 6) != 0; + if should_invert { + word = !word; + } + // Lower 6 bits + let quantity = mapping & ((1 << 6) - 1); + if mapping & (1 << 7) != 0 { + // shift + word >>= quantity as u64; + } else { + word = word.rotate_left(quantity as u32); + } + word + }; + (word & (1 << (needle % 64) as u64)) != 0 +} pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0); |
