about summary refs log tree commit diff
path: root/src/etc
diff options
context:
space:
mode:
authorFlorian Zeitz <florob@babelmonkeys.de>2013-08-07 20:48:10 +0200
committerFlorian Zeitz <florob@babelmonkeys.de>2013-08-21 11:50:07 +0200
commit83f4bee44f077c8f45eb2bd314aee7f2af8ee0dc (patch)
tree613182897f6b20a1017c65cffe83ba35f6daffec /src/etc
parentd4d856b129b7af6e78f1a04d6e942997559d06f8 (diff)
downloadrust-83f4bee44f077c8f45eb2bd314aee7f2af8ee0dc.tar.gz
rust-83f4bee44f077c8f45eb2bd314aee7f2af8ee0dc.zip
Add Unicode decomposition mappings to std::unicode
Diffstat (limited to 'src/etc')
-rwxr-xr-xsrc/etc/unicode.py130
1 files changed, 99 insertions, 31 deletions
diff --git a/src/etc/unicode.py b/src/etc/unicode.py
index afcbc0a9859..48c14c0cd71 100755
--- a/src/etc/unicode.py
+++ b/src/etc/unicode.py
@@ -178,50 +178,118 @@ def emit_property_module_old(f, mod, tbl):
         f.write("    }\n\n")
     f.write("}\n")
 
+def format_table_content(f, content, indent):
+    line = " "*indent
+    first = True
+    for chunk in content.split(","):
+        if len(line) + len(chunk) < 98:
+            if first:
+                line += chunk
+            else:
+                line += ", " + chunk
+            first = False
+        else:
+            f.write(line + ",\n")
+            line = " "*indent + chunk
+    f.write(line)
+
 def emit_decomp_module(f, canon, compat):
     canon_keys = canon.keys()
     canon_keys.sort()
 
     compat_keys = compat.keys()
     compat_keys.sort()
-    f.write("mod decompose {\n\n");
-    f.write("    export canonical, compatibility;\n\n")
-    f.write("    fn canonical(c: char, i: block(char)) "
-            + "{ d(c, i, false); }\n\n")
-    f.write("    fn compatibility(c: char, i: block(char)) "
+    f.write("pub mod decompose {\n");
+    f.write("    use option::Option;\n");
+    f.write("    use option::{Some, None};\n");
+    f.write("    use vec::ImmutableVector;\n");
+    f.write("""
+    fn bsearch_table(c: char, r: &'static [(char, &'static [char])]) -> Option<&'static [char]> {
+        use cmp::{Equal, Less, Greater};
+        match r.bsearch(|&(val, _)| {
+            if c == val { Equal }
+            else if val < c { Less }
+            else { Greater }
+        }) {
+            Some(idx) => {
+                let (_, result) = r[idx];
+                Some(result)
+            }
+            None => None
+        }
+    }\n\n
+""")
+    f.write("    // Canonical decompositions\n")
+    f.write("    static canonical_table : &'static [(char, &'static [char])] = &[\n")
+    data = ""
+    first = True
+    for char in canon_keys:
+        if not first:
+            data += ","
+        first = False
+        data += "(%s,&[" % escape_char(char)
+        first2 = True
+        for d in canon[char]:
+            if not first2:
+                data += ","
+            first2 = False
+            data += escape_char(d)
+        data += "])"
+    format_table_content(f, data, 8)
+    f.write("\n    ];\n\n")
+    f.write("    // Compatibility decompositions\n")
+    f.write("    static compatibility_table : &'static [(char, &'static [char])] = &[\n")
+    data = ""
+    first = True
+    for char in compat_keys:
+        if not first:
+            data += ","
+        first = False
+        data += "(%s,&[" % escape_char(char)
+        first2 = True
+        for d in compat[char]:
+            if not first2:
+                data += ","
+            first2 = False
+            data += escape_char(d)
+        data += "])"
+    format_table_content(f, data, 8)
+    f.write("\n    ];\n\n")
+    f.write("    pub fn canonical(c: char, i: &fn(char)) "
+        + "{ d(c, i, false); }\n\n")
+    f.write("    pub fn compatibility(c: char, i: &fn(char)) "
             +"{ d(c, i, true); }\n\n")
-    f.write("    fn d(c: char, i: block(char), k: bool) {\n")
+    f.write("    fn d(c: char, i: &fn(char), k: bool) {\n")
+    f.write("        use iterator::Iterator;\n");
 
-    f.write("        if c <= '\\x7f' { i(c); ret; }\n")
+    f.write("        if c <= '\\x7f' { i(c); return; }\n")
 
     # First check the canonical decompositions
-    f.write("        // Canonical decomposition\n")
-    f.write("        alt c {\n")
-    for char in canon_keys:
-        f.write("          %s {\n" % escape_char(char))
-        for d in canon[char]:
-            f.write("            d(%s, i, k);\n"
-                    % escape_char(d))
-        f.write("          }\n")
-
-    f.write("          _ { }\n")
-    f.write("        }\n\n")
+    f.write("""
+        match bsearch_table(c, canonical_table) {
+            Some(canon) => {
+                for x in canon.iter() {
+                    d(*x, |b| i(b), k);
+                }
+                return;
+            }
+            None => ()
+        }\n\n""")
 
     # Bottom out if we're not doing compat.
-    f.write("        if !k { i(c); ret; }\n\n ")
+    f.write("        if !k { i(c); return; }\n")
 
     # Then check the compatibility decompositions
-    f.write("        // Compatibility decomposition\n")
-    f.write("        alt c {\n")
-    for char in compat_keys:
-        f.write("          %s {\n" % escape_char(char))
-        for d in compat[char]:
-            f.write("            d(%s, i, k);\n"
-                    % escape_char(d))
-        f.write("          }\n")
-
-    f.write("          _ { }\n")
-    f.write("        }\n\n")
+    f.write("""
+        match bsearch_table(c, compatibility_table) {
+            Some(compat) => {
+                for x in compat.iter() {
+                    d(*x, |b| i(b), k);
+                }
+                return;
+            }
+            None => ()
+        }\n\n""")
 
     # Finally bottom out.
     f.write("        i(c);\n")
@@ -256,7 +324,7 @@ rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGH
 
 emit_property_module(rf, "general_category", gencats)
 
-#emit_decomp_module(rf, canon_decomp, compat_decomp)
+emit_decomp_module(rf, canon_decomp, compat_decomp)
 
 derived = load_derived_core_properties("DerivedCoreProperties.txt")
 emit_property_module(rf, "derived_property", derived)