diff options
Diffstat (limited to 'src/etc/unicode.py')
| -rwxr-xr-x | src/etc/unicode.py | 163 |
1 files changed, 0 insertions, 163 deletions
diff --git a/src/etc/unicode.py b/src/etc/unicode.py index a740e837fdd..2c147419470 100755 --- a/src/etc/unicode.py +++ b/src/etc/unicode.py @@ -395,46 +395,6 @@ def emit_conversions_module(f, to_upper, to_lower, to_title): is_pub=False, t_type = t_type, pfun=pfun) f.write("}\n\n") -def emit_grapheme_module(f, grapheme_table, grapheme_cats): - f.write("""pub mod grapheme { - use core::slice::SliceExt; - pub use self::GraphemeCat::*; - use core::result::Result::{Ok, Err}; - - #[allow(non_camel_case_types)] - #[derive(Clone, Copy)] - pub enum GraphemeCat { -""") - for cat in grapheme_cats + ["Any"]: - f.write(" GC_" + cat + ",\n") - f.write(""" } - - fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> GraphemeCat { - use core::cmp::Ordering::{Equal, Less, Greater}; - match r.binary_search_by(|&(lo, hi, _)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }) { - Ok(idx) => { - let (_, _, cat) = r[idx]; - cat - } - Err(_) => GC_Any - } - } - - pub fn grapheme_category(c: char) -> GraphemeCat { - bsearch_range_value_table(c, grapheme_cat_table) - } - -""") - - emit_table(f, "grapheme_cat_table", grapheme_table, "&'static [(char, char, GraphemeCat)]", - pfun=lambda x: "(%s,%s,GC_%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]), - is_pub=False) - f.write("}\n") - def emit_charwidth_module(f, width_table): f.write("pub mod charwidth {\n") f.write(" use core::option::Option;\n") @@ -497,79 +457,6 @@ def emit_norm_module(f, canon, compat, combine, norm_props): canon_comp_keys = canon_comp.keys() canon_comp_keys.sort() - f.write("pub mod normalization {\n") - - def mkdata_fun(table): - def f(char): - data = "(%s,&[" % escape_char(char) - first = True - for d in table[char]: - if not first: - data += "," - first = False - data += escape_char(d) - data += "])" - return data - return f - - f.write(" // Canonical decompositions\n") - emit_table(f, "canonical_table", canon_keys, "&'static [(char, &'static [char])]", - pfun=mkdata_fun(canon)) - - f.write(" // Compatibility decompositions\n") - emit_table(f, "compatibility_table", compat_keys, "&'static [(char, &'static [char])]", - pfun=mkdata_fun(compat)) - - def comp_pfun(char): - data = "(%s,&[" % escape_char(char) - canon_comp[char].sort(lambda x, y: x[0] - y[0]) - first = True - for pair in canon_comp[char]: - if not first: - data += "," - first = False - data += "(%s,%s)" % (escape_char(pair[0]), escape_char(pair[1])) - data += "])" - return data - - f.write(" // Canonical compositions\n") - emit_table(f, "composition_table", canon_comp_keys, - "&'static [(char, &'static [(char, char)])]", pfun=comp_pfun) - - f.write(""" - fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 { - use core::cmp::Ordering::{Equal, Less, Greater}; - use core::slice::SliceExt; - use core::result::Result::{Ok, Err}; - match r.binary_search_by(|&(lo, hi, _)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }) { - Ok(idx) => { - let (_, _, result) = r[idx]; - result - } - Err(_) => 0 - } - }\n -""") - - emit_table(f, "combining_class_table", combine, "&'static [(char, char, u8)]", is_pub=False, - pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2])) - - f.write(""" #[deprecated(reason = "use the crates.io `unicode-normalization` lib instead", - since = "1.0.0")] - #[unstable(feature = "unicode", - reason = "this functionality will be moved to crates.io")] - pub fn canonical_combining_class(c: char) -> u8 { - bsearch_range_value_table(c, combining_class_table) - } - -} - -""") - def remove_from_wtable(wtable, val): wtable_out = [] while wtable: @@ -649,53 +536,3 @@ pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s); # normalizations and conversions module emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props) emit_conversions_module(rf, to_upper, to_lower, to_title) - - ### character width module - width_table = [] - for zwcat in ["Me", "Mn", "Cf"]: - width_table.extend(map(lambda (lo, hi): (lo, hi, 0, 0), gencats[zwcat])) - width_table.append((4448, 4607, 0, 0)) - - # get widths, except those that are explicitly marked zero-width above - ea_widths = load_east_asian_width(["W", "F", "A"], ["Me", "Mn", "Cf"]) - # these are doublewidth - for dwcat in ["W", "F"]: - width_table.extend(map(lambda (lo, hi): (lo, hi, 2, 2), ea_widths[dwcat])) - width_table.extend(map(lambda (lo, hi): (lo, hi, 1, 2), ea_widths["A"])) - - width_table.sort(key=lambda w: w[0]) - - # soft hyphen is not zero width in preformatted text; it's used to indicate - # a hyphen inserted to facilitate a linebreak. - width_table = remove_from_wtable(width_table, 173) - - # optimize the width table by collapsing adjacent entities when possible - width_table = optimize_width_table(width_table) - emit_charwidth_module(rf, width_table) - - ### grapheme cluster module - # from http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Break_Property_Values - grapheme_cats = load_properties("auxiliary/GraphemeBreakProperty.txt", []) - - # Control - # Note 1: - # This category also includes Cs (surrogate codepoints), but Rust's `char`s are - # Unicode Scalar Values only, and surrogates are thus invalid `char`s. - # Thus, we have to remove Cs from the Control category - # Note 2: - # 0x0a and 0x0d (CR and LF) are not in the Control category for Graphemes. - # However, the Graphemes iterator treats these as a special case, so they - # should be included in grapheme_cats["Control"] for our implementation. - grapheme_cats["Control"] = group_cat(list( - (set(ungroup_cat(grapheme_cats["Control"])) - | set(ungroup_cat(grapheme_cats["CR"])) - | set(ungroup_cat(grapheme_cats["LF"]))) - - set(ungroup_cat([surrogate_codepoints])))) - del(grapheme_cats["CR"]) - del(grapheme_cats["LF"]) - - grapheme_table = [] - for cat in grapheme_cats: - grapheme_table.extend([(x, y, cat) for (x, y) in grapheme_cats[cat]]) - grapheme_table.sort(key=lambda w: w[0]) - emit_grapheme_module(rf, grapheme_table, grapheme_cats.keys()) |
