diff options
| -rw-r--r-- | src/libcore/unicode/mod.rs | 54 |
1 files changed, 49 insertions, 5 deletions
diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index e424174f554..b6eaf06aa7f 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -1,15 +1,59 @@ #![unstable(feature = "unicode_internals", issue = "none")] #![allow(missing_docs)] -mod bool_trie; pub(crate) mod printable; -pub(crate) mod tables; +mod unicode_data; pub(crate) mod version; +use version::UnicodeVersion; + +/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of +/// `char` and `str` methods are based on. +#[unstable(feature = "unicode_version", issue = "49726")] +pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion { + major: unicode_data::UNICODE_VERSION.0, + minor: unicode_data::UNICODE_VERSION.1, + micro: unicode_data::UNICODE_VERSION.2, + _priv: (), +}; + // For use in liballoc, not re-exported in libstd. pub mod derived_property { - pub use crate::unicode::tables::derived_property::{Case_Ignorable, Cased}; + pub use super::{Case_Ignorable, Cased}; } -pub mod conversions { - pub use crate::unicode::tables::conversions::{to_lower, to_upper}; + +pub use unicode_data::alphabetic::lookup as Alphabetic; +pub use unicode_data::case_ignorable::lookup as Case_Ignorable; +pub use unicode_data::cased::lookup as Cased; +pub use unicode_data::cc::lookup as Cc; +pub use unicode_data::conversions; +pub use unicode_data::grapheme_extend::lookup as Grapheme_Extend; +pub use unicode_data::lowercase::lookup as Lowercase; +pub use unicode_data::n::lookup as N; +pub use unicode_data::uppercase::lookup as Uppercase; +pub use unicode_data::white_space::lookup as White_Space; + +#[inline(always)] +fn range_search<const N: usize, const N1: usize, const N2: usize>( + needle: u32, + chunk_idx_map: &[u8; N], + (last_chunk_idx, last_chunk_mapping): (u16, u8), + bitset_chunk_idx: &[[u8; 16]; N1], + bitset: &[u64; N2], +) -> bool { + let bucket_idx = (needle / 64) as usize; + let chunk_map_idx = bucket_idx / 16; + let chunk_piece = bucket_idx % 16; + let chunk_idx = if chunk_map_idx >= N { + if chunk_map_idx == last_chunk_idx as usize { + last_chunk_mapping + } else { + return false; + } + } else { + chunk_idx_map[chunk_map_idx] + }; + let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece]; + let word = bitset[(idx as usize)]; + (word & (1 << (needle % 64) as u64)) != 0 } |
