diff options
| author | Florian Zeitz <florob@babelmonkeys.de> | 2013-08-07 20:48:10 +0200 |
|---|---|---|
| committer | Florian Zeitz <florob@babelmonkeys.de> | 2013-08-21 11:50:07 +0200 |
| commit | 83f4bee44f077c8f45eb2bd314aee7f2af8ee0dc (patch) | |
| tree | 613182897f6b20a1017c65cffe83ba35f6daffec /src/etc/unicode.py | |
| parent | d4d856b129b7af6e78f1a04d6e942997559d06f8 (diff) | |
| download | rust-83f4bee44f077c8f45eb2bd314aee7f2af8ee0dc.tar.gz rust-83f4bee44f077c8f45eb2bd314aee7f2af8ee0dc.zip | |
Add Unicode decomposition mappings to std::unicode
Diffstat (limited to 'src/etc/unicode.py')
| -rwxr-xr-x | src/etc/unicode.py | 130 |
1 files changed, 99 insertions, 31 deletions
diff --git a/src/etc/unicode.py b/src/etc/unicode.py index afcbc0a9859..48c14c0cd71 100755 --- a/src/etc/unicode.py +++ b/src/etc/unicode.py @@ -178,50 +178,118 @@ def emit_property_module_old(f, mod, tbl): f.write(" }\n\n") f.write("}\n") +def format_table_content(f, content, indent): + line = " "*indent + first = True + for chunk in content.split(","): + if len(line) + len(chunk) < 98: + if first: + line += chunk + else: + line += ", " + chunk + first = False + else: + f.write(line + ",\n") + line = " "*indent + chunk + f.write(line) + def emit_decomp_module(f, canon, compat): canon_keys = canon.keys() canon_keys.sort() compat_keys = compat.keys() compat_keys.sort() - f.write("mod decompose {\n\n"); - f.write(" export canonical, compatibility;\n\n") - f.write(" fn canonical(c: char, i: block(char)) " - + "{ d(c, i, false); }\n\n") - f.write(" fn compatibility(c: char, i: block(char)) " + f.write("pub mod decompose {\n"); + f.write(" use option::Option;\n"); + f.write(" use option::{Some, None};\n"); + f.write(" use vec::ImmutableVector;\n"); + f.write(""" + fn bsearch_table(c: char, r: &'static [(char, &'static [char])]) -> Option<&'static [char]> { + use cmp::{Equal, Less, Greater}; + match r.bsearch(|&(val, _)| { + if c == val { Equal } + else if val < c { Less } + else { Greater } + }) { + Some(idx) => { + let (_, result) = r[idx]; + Some(result) + } + None => None + } + }\n\n +""") + f.write(" // Canonical decompositions\n") + f.write(" static canonical_table : &'static [(char, &'static [char])] = &[\n") + data = "" + first = True + for char in canon_keys: + if not first: + data += "," + first = False + data += "(%s,&[" % escape_char(char) + first2 = True + for d in canon[char]: + if not first2: + data += "," + first2 = False + data += escape_char(d) + data += "])" + format_table_content(f, data, 8) + f.write("\n ];\n\n") + f.write(" // Compatibility decompositions\n") + f.write(" static compatibility_table : &'static [(char, &'static [char])] = &[\n") + data = "" + first = True + for char in compat_keys: + if not first: + data += "," + first = False + data += "(%s,&[" % escape_char(char) + first2 = True + for d in compat[char]: + if not first2: + data += "," + first2 = False + data += escape_char(d) + data += "])" + format_table_content(f, data, 8) + f.write("\n ];\n\n") + f.write(" pub fn canonical(c: char, i: &fn(char)) " + + "{ d(c, i, false); }\n\n") + f.write(" pub fn compatibility(c: char, i: &fn(char)) " +"{ d(c, i, true); }\n\n") - f.write(" fn d(c: char, i: block(char), k: bool) {\n") + f.write(" fn d(c: char, i: &fn(char), k: bool) {\n") + f.write(" use iterator::Iterator;\n"); - f.write(" if c <= '\\x7f' { i(c); ret; }\n") + f.write(" if c <= '\\x7f' { i(c); return; }\n") # First check the canonical decompositions - f.write(" // Canonical decomposition\n") - f.write(" alt c {\n") - for char in canon_keys: - f.write(" %s {\n" % escape_char(char)) - for d in canon[char]: - f.write(" d(%s, i, k);\n" - % escape_char(d)) - f.write(" }\n") - - f.write(" _ { }\n") - f.write(" }\n\n") + f.write(""" + match bsearch_table(c, canonical_table) { + Some(canon) => { + for x in canon.iter() { + d(*x, |b| i(b), k); + } + return; + } + None => () + }\n\n""") # Bottom out if we're not doing compat. - f.write(" if !k { i(c); ret; }\n\n ") + f.write(" if !k { i(c); return; }\n") # Then check the compatibility decompositions - f.write(" // Compatibility decomposition\n") - f.write(" alt c {\n") - for char in compat_keys: - f.write(" %s {\n" % escape_char(char)) - for d in compat[char]: - f.write(" d(%s, i, k);\n" - % escape_char(d)) - f.write(" }\n") - - f.write(" _ { }\n") - f.write(" }\n\n") + f.write(""" + match bsearch_table(c, compatibility_table) { + Some(compat) => { + for x in compat.iter() { + d(*x, |b| i(b), k); + } + return; + } + None => () + }\n\n""") # Finally bottom out. f.write(" i(c);\n") @@ -256,7 +324,7 @@ rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGH emit_property_module(rf, "general_category", gencats) -#emit_decomp_module(rf, canon_decomp, compat_decomp) +emit_decomp_module(rf, canon_decomp, compat_decomp) derived = load_derived_core_properties("DerivedCoreProperties.txt") emit_property_module(rf, "derived_property", derived) |
