diff options
| author | Clar Charr <clar@charr.xyz> | 2017-12-31 02:08:15 -0500 |
|---|---|---|
| committer | Clar Charr <clar@charr.xyz> | 2018-01-02 22:51:22 -0500 |
| commit | b4b3ddd59e2a041646f7b300ad727a5d4f48a488 (patch) | |
| tree | fba5ba31c8811e1cdec7ca5efa28f2e1048bedea /src/libstd_unicode/unicode.py | |
| parent | b65f0bedd2f22d9661ecb7092f07746dc2ccfb0d (diff) | |
| download | rust-b4b3ddd59e2a041646f7b300ad727a5d4f48a488.tar.gz rust-b4b3ddd59e2a041646f7b300ad727a5d4f48a488.zip | |
Move static code outside of unciode.py.
Diffstat (limited to 'src/libstd_unicode/unicode.py')
| -rwxr-xr-x | src/libstd_unicode/unicode.py | 125 |
1 files changed, 9 insertions, 116 deletions
diff --git a/src/libstd_unicode/unicode.py b/src/libstd_unicode/unicode.py index df79760894e..a8629493086 100755 --- a/src/libstd_unicode/unicode.py +++ b/src/libstd_unicode/unicode.py @@ -38,6 +38,9 @@ preamble = '''// Copyright 2012-2016 The Rust Project Developers. See the COPYRI // NOTE: The following code was generated by "./unicode.py", do not edit directly #![allow(missing_docs, non_upper_case_globals, non_snake_case)] + +use version::UnicodeVersion; +use bool_trie::{BoolTrie, SmallBoolTrie}; ''' # Mapping taken from Table 12 from: @@ -274,24 +277,7 @@ def load_properties(f, interestingprops): def escape_char(c): return "'\\u{%x}'" % c if c != 0 else "'\\0'" -def emit_bsearch_range_table(f): - f.write(""" -fn bsearch_range_table(c: char, r: &'static [(char, char)]) -> bool { - use core::cmp::Ordering::{Equal, Less, Greater}; - r.binary_search_by(|&(lo, hi)| { - if c < lo { - Greater - } else if hi < c { - Less - } else { - Equal - } - }) - .is_ok() -}\n -""") - -def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True, +def emit_table(f, name, t_data, t_type = "&[(char, char)]", is_pub=True, pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))): pub_string = "" if is_pub: @@ -307,77 +293,6 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True, format_table_content(f, data, 8) f.write("\n ];\n\n") -def emit_trie_lookup_range_table(f): - f.write(""" - -// BoolTrie is a trie for representing a set of Unicode codepoints. It is -// implemented with postfix compression (sharing of identical child nodes), -// which gives both compact size and fast lookup. -// -// The space of Unicode codepoints is divided into 3 subareas, each -// represented by a trie with different depth. In the first (0..0x800), there -// is no trie structure at all; each u64 entry corresponds to a bitvector -// effectively holding 64 bool values. -// -// In the second (0x800..0x10000), each child of the root node represents a -// 64-wide subrange, but instead of storing the full 64-bit value of the leaf, -// the trie stores an 8-bit index into a shared table of leaf values. This -// exploits the fact that in reasonable sets, many such leaves can be shared. -// -// In the third (0x10000..0x110000), each child of the root node represents a -// 4096-wide subrange, and the trie stores an 8-bit index into a 64-byte slice -// of a child tree. Each of these 64 bytes represents an index into the table -// of shared 64-bit leaf values. This exploits the sparse structure in the -// non-BMP range of most Unicode sets. -pub struct BoolTrie { - // 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences) - r1: [u64; 32], // leaves - - // 0x800..0x10000 (corresponding to 3 byte utf-8 sequences) - r2: [u8; 992], // first level - r3: &'static [u64], // leaves - - // 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences) - r4: [u8; 256], // first level - r5: &'static [u8], // second level - r6: &'static [u64], // leaves -} - -fn trie_range_leaf(c: usize, bitmap_chunk: u64) -> bool { - ((bitmap_chunk >> (c & 63)) & 1) != 0 -} - -fn trie_lookup_range_table(c: char, r: &'static BoolTrie) -> bool { - let c = c as usize; - if c < 0x800 { - trie_range_leaf(c, r.r1[c >> 6]) - } else if c < 0x10000 { - let child = r.r2[(c >> 6) - 0x20]; - trie_range_leaf(c, r.r3[child as usize]) - } else { - let child = r.r4[(c >> 12) - 0x10]; - let leaf = r.r5[((child as usize) << 6) + ((c >> 6) & 0x3f)]; - trie_range_leaf(c, r.r6[leaf as usize]) - } -} - -pub struct SmallBoolTrie { - r1: &'static [u8], // first level - r2: &'static [u64], // leaves -} - -impl SmallBoolTrie { - fn lookup(&self, c: char) -> bool { - let c = c as usize; - match self.r1.get(c >> 6) { - Some(&child) => trie_range_leaf(c, self.r2[child as usize]), - None => false, - } - } -} - -""") - def compute_trie(rawdata, chunksize): root = [] childmap = {} @@ -410,7 +325,7 @@ def emit_bool_trie(f, name, t_data, is_pub=True): pub_string = "" if is_pub: pub_string = "pub " - f.write(" %sconst %s: &'static super::BoolTrie = &super::BoolTrie {\n" % (pub_string, name)) + f.write(" %sconst %s: &super::BoolTrie = &super::BoolTrie {\n" % (pub_string, name)) f.write(" r1: [\n") data = ','.join('0x%016x' % chunk for chunk in chunks[0:0x800 // CHUNK]) format_table_content(f, data, 12) @@ -458,7 +373,7 @@ def emit_small_bool_trie(f, name, t_data, is_pub=True): pub_string = "" if is_pub: pub_string = "pub " - f.write(" %sconst %s: &'static super::SmallBoolTrie = &super::SmallBoolTrie {\n" + f.write(" %sconst %s: &super::SmallBoolTrie = &super::SmallBoolTrie {\n" % (pub_string, name)) (r1, r2) = compute_trie(chunks, 1) @@ -486,7 +401,7 @@ def emit_property_module(f, mod, tbl, emit): else: emit_bool_trie(f, "%s_table" % cat, tbl[cat]) f.write(" pub fn %s(c: char) -> bool {\n" % cat) - f.write(" super::trie_lookup_range_table(c, %s_table)\n" % cat) + f.write(" %s_table.lookup(c)\n" % cat) f.write(" }\n\n") f.write("}\n\n") @@ -510,12 +425,12 @@ def emit_conversions_module(f, to_upper, to_lower, to_title): } } - fn bsearch_case_table(c: char, table: &'static [(char, [char; 3])]) -> Option<usize> { + fn bsearch_case_table(c: char, table: &[(char, [char; 3])]) -> Option<usize> { table.binary_search_by(|&(key, _)| key.cmp(&c)).ok() } """) - t_type = "&'static [(char, [char; 3])]" + t_type = "&[(char, [char; 3])]" pfun = lambda x: "(%s,[%s,%s,%s])" % ( escape_char(x[0]), escape_char(x[1][0]), escape_char(x[1][1]), escape_char(x[1][2])) emit_table(f, "to_lowercase_table", @@ -557,24 +472,6 @@ if __name__ == "__main__": pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode" unicode_version = re.search(pattern, readme.read()).groups() rf.write(""" -/// Represents a Unicode Version. -/// -/// See also: <http://www.unicode.org/versions/> -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct UnicodeVersion { - /// Major version. - pub major: u32, - - /// Minor version. - pub minor: u32, - - /// Micro (or Update) version. - pub micro: u32, - - // Private field to keep struct expandable. - _priv: (), -} - /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of /// `CharExt` and `UnicodeStrPrelude` traits are based on. pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion { @@ -596,10 +493,6 @@ pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion { norm_props = load_properties("DerivedNormalizationProps.txt", ["Full_Composition_Exclusion"]) - # trie_lookup_table is used in all the property modules below - emit_trie_lookup_range_table(rf) - # emit_bsearch_range_table(rf) - # category tables for (name, cat, pfuns) in ("general_category", gencats, ["N", "Cc"]), \ ("derived_property", derived, want_derived), \ |
