diff options
| author | Mark Rousskov <mark.simulacrum@gmail.com> | 2020-03-21 12:20:18 -0400 |
|---|---|---|
| committer | Mark Rousskov <mark.simulacrum@gmail.com> | 2020-03-21 15:21:31 -0400 |
| commit | 5f71d98f90354f9ee67c2b77c8607fbc9169d63e (patch) | |
| tree | 0482b32e9a4ffadb13873905794c65050931ec3b | |
| parent | 7b29b70d6ea52e9324f9328bed9beb6cf516c1ce (diff) | |
| download | rust-5f71d98f90354f9ee67c2b77c8607fbc9169d63e.tar.gz rust-5f71d98f90354f9ee67c2b77c8607fbc9169d63e.zip | |
Deduplicate test and primary range_search definitions
This ensures that what we test is what we get for final results as well.
| -rw-r--r-- | src/libcore/unicode/mod.rs | 45 | ||||
| -rw-r--r-- | src/libcore/unicode/unicode_data.rs | 51 | ||||
| -rw-r--r-- | src/tools/unicode-table-generator/src/main.rs | 59 | ||||
| -rw-r--r-- | src/tools/unicode-table-generator/src/range_search.rs | 49 |
4 files changed, 103 insertions, 101 deletions
diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 39532166a0b..94a2507e26c 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -32,48 +32,3 @@ pub use unicode_data::lowercase::lookup as Lowercase; pub use unicode_data::n::lookup as N; pub use unicode_data::uppercase::lookup as Uppercase; pub use unicode_data::white_space::lookup as White_Space; - -#[inline(always)] -fn range_search< - const N: usize, - const CHUNK_SIZE: usize, - const N1: usize, - const CANONICAL: usize, - const CANONICALIZED: usize, ->( - needle: u32, - chunk_idx_map: &[u8; N], - (last_chunk_idx, last_chunk_mapping): (u16, u8), - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], - bitset_canonical: &[u64; CANONICAL], - bitset_canonicalized: &[(u8, u8); CANONICALIZED], -) -> bool { - let bucket_idx = (needle / 64) as usize; - let chunk_map_idx = bucket_idx / CHUNK_SIZE; - let chunk_piece = bucket_idx % CHUNK_SIZE; - let chunk_idx = if chunk_map_idx >= N { - if chunk_map_idx == last_chunk_idx as usize { - last_chunk_mapping - } else { - return false; - } - } else { - chunk_idx_map[chunk_map_idx] - }; - let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; - let word = if idx < CANONICAL { - bitset_canonical[idx] - } else { - let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; - let mut word = bitset_canonical[real_idx as usize]; - let should_invert = mapping & (1 << 6) != 0; - if should_invert { - word = !word; - } - // Unset the inversion bit - let rotate_by = mapping & !(1 << 6); - word = word.rotate_left(rotate_by as u32); - word - }; - (word & (1 << (needle % 64) as u64)) != 0 -} diff --git a/src/libcore/unicode/unicode_data.rs b/src/libcore/unicode/unicode_data.rs index bae6d8ea953..5b1efbaa28f 100644 --- a/src/libcore/unicode/unicode_data.rs +++ b/src/libcore/unicode/unicode_data.rs @@ -1,5 +1,54 @@ ///! This file is generated by src/tools/unicode-table-generator; do not edit manually! -use super::range_search; + +#[inline(always)] +fn range_search< + const N: usize, + const CHUNK_SIZE: usize, + const N1: usize, + const CANONICAL: usize, + const CANONICALIZED: usize, +>( + needle: u32, + chunk_idx_map: &[u8; N], + (last_chunk_idx, last_chunk_mapping): (u16, u8), + bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], + bitset_canonical: &[u64; CANONICAL], + bitset_canonicalized: &[(u8, u8); CANONICALIZED], +) -> bool { + let bucket_idx = (needle / 64) as usize; + let chunk_map_idx = bucket_idx / CHUNK_SIZE; + let chunk_piece = bucket_idx % CHUNK_SIZE; + let chunk_idx = if chunk_map_idx >= N { + if chunk_map_idx == last_chunk_idx as usize { + last_chunk_mapping + } else { + return false; + } + } else { + chunk_idx_map[chunk_map_idx] + }; + let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; + let word = if idx < CANONICAL { + bitset_canonical[idx] + } else { + let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; + let mut word = bitset_canonical[real_idx as usize]; + let should_invert = mapping & (1 << 6) != 0; + if should_invert { + word = !word; + } + // Lower 6 bits + let quantity = mapping & ((1 << 6) - 1); + if mapping & (1 << 7) != 0 { + // shift + word >>= quantity as u64; + } else { + word = word.rotate_left(quantity as u32); + } + word + }; + (word & (1 << (needle % 64) as u64)) != 0 +} pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0); diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index 65ece05043a..af23c166871 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -181,7 +181,10 @@ fn main() { "///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n", ); - table_file.push_str("use super::range_search;\n\n"); + // Include the range search function + table_file.push('\n'); + table_file.push_str(include_str!("range_search.rs")); + table_file.push('\n'); table_file.push_str(&version()); @@ -251,60 +254,6 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String s.push_str(&format!("#[path = \"{}\"]\n", data_path)); s.push_str("mod unicode_data;\n\n"); - s.push_str( - " -#[inline(always)] -fn range_search< - const N: usize, - const CHUNK_SIZE: usize, - const N1: usize, - const CANONICAL: usize, - const CANONICALIZED: usize, ->( - needle: u32, - chunk_idx_map: &[u8; N], - (last_chunk_idx, last_chunk_mapping): (u16, u8), - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], - bitset_canonical: &[u64; CANONICAL], - bitset_canonicalized: &[(u8, u8); CANONICALIZED], -) -> bool { - let bucket_idx = (needle / 64) as usize; - let chunk_map_idx = bucket_idx / CHUNK_SIZE; - let chunk_piece = bucket_idx % CHUNK_SIZE; - let chunk_idx = if chunk_map_idx >= N { - if chunk_map_idx == last_chunk_idx as usize { - last_chunk_mapping - } else { - return false; - } - } else { - chunk_idx_map[chunk_map_idx] - }; - let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; - let word = if idx < CANONICAL { - bitset_canonical[idx] - } else { - let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; - let mut word = bitset_canonical[real_idx as usize]; - let should_invert = mapping & (1 << 6) != 0; - if should_invert { - word = !word; - } - // Lower 6 bits - let quantity = mapping & ((1 << 6) - 1); - if mapping & (1 << 7) != 0 { - // shift - word >>= quantity as u64; - } else { - word = word.rotate_left(quantity as u32); - } - word - }; - (word & (1 << (needle % 64) as u64)) != 0 -} - ", - ); - s.push_str("\nfn main() {\n"); for (property, ranges) in ranges { diff --git a/src/tools/unicode-table-generator/src/range_search.rs b/src/tools/unicode-table-generator/src/range_search.rs new file mode 100644 index 00000000000..a0bc1e6aec5 --- /dev/null +++ b/src/tools/unicode-table-generator/src/range_search.rs @@ -0,0 +1,49 @@ +#[inline(always)] +fn range_search< + const N: usize, + const CHUNK_SIZE: usize, + const N1: usize, + const CANONICAL: usize, + const CANONICALIZED: usize, +>( + needle: u32, + chunk_idx_map: &[u8; N], + (last_chunk_idx, last_chunk_mapping): (u16, u8), + bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], + bitset_canonical: &[u64; CANONICAL], + bitset_canonicalized: &[(u8, u8); CANONICALIZED], +) -> bool { + let bucket_idx = (needle / 64) as usize; + let chunk_map_idx = bucket_idx / CHUNK_SIZE; + let chunk_piece = bucket_idx % CHUNK_SIZE; + let chunk_idx = if chunk_map_idx >= N { + if chunk_map_idx == last_chunk_idx as usize { + last_chunk_mapping + } else { + return false; + } + } else { + chunk_idx_map[chunk_map_idx] + }; + let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; + let word = if idx < CANONICAL { + bitset_canonical[idx] + } else { + let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; + let mut word = bitset_canonical[real_idx as usize]; + let should_invert = mapping & (1 << 6) != 0; + if should_invert { + word = !word; + } + // Lower 6 bits + let quantity = mapping & ((1 << 6) - 1); + if mapping & (1 << 7) != 0 { + // shift + word >>= quantity as u64; + } else { + word = word.rotate_left(quantity as u32); + } + word + }; + (word & (1 << (needle % 64) as u64)) != 0 +} |
