about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Zeitz <florob@babelmonkeys.de>2014-05-12 19:56:41 +0200
committerAlex Crichton <alex@alexcrichton.com>2014-05-13 17:24:07 -0700
commit74ad0236747469f9646916d1916dee2598076161 (patch)
treedb2d8c1fe2d8461e4bcbfb7fe8acba782e66693f
parent21867fa1279c38189faccfb430c8bd6bffe0ef9e (diff)
downloadrust-74ad0236747469f9646916d1916dee2598076161.tar.gz
rust-74ad0236747469f9646916d1916dee2598076161.zip
std, core: Generate unicode.rs using unicode.py
-rwxr-xr-xsrc/etc/unicode.py131
-rw-r--r--src/libcore/unicode.rs11
-rw-r--r--src/libstd/unicode.rs4
3 files changed, 85 insertions, 61 deletions
diff --git a/src/etc/unicode.py b/src/etc/unicode.py
index d5c74e36734..e98c65ca50e 100755
--- a/src/etc/unicode.py
+++ b/src/etc/unicode.py
@@ -169,7 +169,7 @@ fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
         else if hi < c { Less }
         else { Greater }
     }) != None
-}\n\n
+}\n
 """);
 
 def emit_property_module(f, mod, tbl):
@@ -193,11 +193,11 @@ def emit_property_module(f, mod, tbl):
         f.write("    pub fn %s(c: char) -> bool {\n" % cat)
         f.write("        super::bsearch_range_table(c, %s_table)\n" % cat)
         f.write("    }\n\n")
-    f.write("}\n")
+    f.write("}\n\n")
 
 
 def emit_conversions_module(f, lowerupper, upperlower):
-    f.write("pub mod conversions {\n")
+    f.write("pub mod conversions {")
     f.write("""
     use cmp::{Equal, Less, Greater};
     use slice::ImmutableVector;
@@ -225,13 +225,14 @@ def emit_conversions_module(f, lowerupper, upperlower):
             else { Greater }
         })
     }
+
 """);
     emit_caseconversion_table(f, "LuLl", upperlower)
     emit_caseconversion_table(f, "LlLu", lowerupper)
     f.write("}\n")
 
 def emit_caseconversion_table(f, name, table):
-    f.write("   static %s_table : &'static [(char, char)] = &[\n" % name)
+    f.write("    static %s_table : &'static [(char, char)] = &[\n" % name)
     sorted_table = sorted(table.iteritems(), key=operator.itemgetter(0))
     ix = 0
     for key, value in sorted_table:
@@ -255,7 +256,7 @@ def format_table_content(f, content, indent):
             line = " "*indent + chunk
     f.write(line)
 
-def emit_decomp_module(f, canon, compat, combine):
+def emit_core_decomp_module(f, canon, compat):
     canon_keys = canon.keys()
     canon_keys.sort()
 
@@ -279,23 +280,6 @@ def emit_decomp_module(f, canon, compat, combine):
             }
             None => None
         }
-    }\n
-""")
-
-    f.write("""
-    fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
-        use cmp::{Equal, Less, Greater};
-        match r.bsearch(|&(lo, hi, _)| {
-            if lo <= c && c <= hi { Equal }
-            else if hi < c { Less }
-            else { Greater }
-        }) {
-            Some(idx) => {
-                let (_, _, result) = r[idx];
-                result
-            }
-            None => 0
-        }
     }\n\n
 """)
 
@@ -337,21 +321,10 @@ def emit_decomp_module(f, canon, compat, combine):
     format_table_content(f, data, 8)
     f.write("\n    ];\n\n")
 
-    f.write("    static combining_class_table : &'static [(char, char, u8)] = &[\n")
-    ix = 0
-    for pair in combine:
-        f.write(ch_prefix(ix))
-        f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2]))
-        ix += 1
-    f.write("\n    ];\n")
-
     f.write("    pub fn canonical(c: char, i: |char|) "
         + "{ d(c, i, false); }\n\n")
     f.write("    pub fn compatibility(c: char, i: |char|) "
             +"{ d(c, i, true); }\n\n")
-    f.write("    pub fn canonical_combining_class(c: char) -> u8 {\n"
-        + "        bsearch_range_value_table(c, combining_class_table)\n"
-        + "    }\n\n")
     f.write("    fn d(c: char, i: |char|, k: bool) {\n")
     f.write("        use iter::Iterator;\n");
 
@@ -389,17 +362,43 @@ def emit_decomp_module(f, canon, compat, combine):
     f.write("    }\n")
     f.write("}\n\n")
 
-r = "unicode.rs"
-for i in [r]:
-    if os.path.exists(i):
-        os.remove(i);
-rf = open(r, "w")
+def emit_std_decomp_module(f, combine):
+    f.write("pub mod decompose {\n");
+    f.write("    use option::{Some, None};\n");
+    f.write("    use slice::ImmutableVector;\n");
 
-(canon_decomp, compat_decomp, gencats,
- combines, lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
+    f.write("""
+    fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
+        use cmp::{Equal, Less, Greater};
+        match r.bsearch(|&(lo, hi, _)| {
+            if lo <= c && c <= hi { Equal }
+            else if hi < c { Less }
+            else { Greater }
+        }) {
+            Some(idx) => {
+                let (_, _, result) = r[idx];
+                result
+            }
+            None => 0
+        }
+    }\n\n
+""")
+
+    f.write("    static combining_class_table : &'static [(char, char, u8)] = &[\n")
+    ix = 0
+    for pair in combine:
+        f.write(ch_prefix(ix))
+        f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2]))
+        ix += 1
+    f.write("\n    ];\n\n")
 
-# Preamble
-rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
+    f.write("    pub fn canonical_combining_class(c: char) -> u8 {\n"
+        + "        bsearch_range_value_table(c, combining_class_table)\n"
+        + "    }\n")
+    f.write("}\n")
+
+
+preamble = '''// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -409,23 +408,45 @@ rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGH
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// The following code was generated by "src/etc/unicode.py"
+// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
+
+#![allow(missing_doc, non_uppercase_statics)]
+
+'''
+
+(canon_decomp, compat_decomp, gencats,
+ combines, lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
+
+def gen_core_unicode():
+    r = "core_unicode.rs"
+    if os.path.exists(r):
+        os.remove(r);
+    with open(r, "w") as rf:
+        # Preamble
+        rf.write(preamble)
 
-#![allow(missing_doc)]
-#![allow(non_uppercase_statics)]
+        emit_bsearch_range_table(rf);
+        emit_property_module(rf, "general_category", gencats)
 
-''')
+        emit_core_decomp_module(rf, canon_decomp, compat_decomp)
 
-emit_bsearch_range_table(rf);
-emit_property_module(rf, "general_category", gencats)
+        derived = load_properties("DerivedCoreProperties.txt",
+                ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
 
-emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
+        emit_property_module(rf, "derived_property", derived)
 
-derived = load_properties("DerivedCoreProperties.txt",
-        ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
+        props = load_properties("PropList.txt", ["White_Space"])
+        emit_property_module(rf, "property", props)
+        emit_conversions_module(rf, lowerupper, upperlower)
 
-emit_property_module(rf, "derived_property", derived)
+def gen_std_unicode():
+    r = "std_unicode.rs"
+    if os.path.exists(r):
+        os.remove(r);
+    with open(r, "w") as rf:
+        # Preamble
+        rf.write(preamble)
+        emit_std_decomp_module(rf, combines)
 
-props = load_properties("PropList.txt", ["White_Space"])
-emit_property_module(rf, "property", props)
-emit_conversions_module(rf, lowerupper, upperlower)
+gen_core_unicode()
+gen_std_unicode()
diff --git a/src/libcore/unicode.rs b/src/libcore/unicode.rs
index db016ad8807..b3298bde055 100644
--- a/src/libcore/unicode.rs
+++ b/src/libcore/unicode.rs
@@ -1,4 +1,4 @@
-// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -8,10 +8,11 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// The following code was generated by "src/etc/unicode.py"
+// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
 
 #![allow(missing_doc, non_uppercase_statics)]
 
+
 fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
     use cmp::{Equal, Less, Greater};
     use slice::ImmutableVector;
@@ -102,6 +103,7 @@ pub mod general_category {
     }
 
 }
+
 pub mod decompose {
     use option::Option;
     use option::{Some, None};
@@ -123,7 +125,6 @@ pub mod decompose {
     }
 
 
-
     // Canonical decompositions
     static canonical_table : &'static [(char, &'static [char])] = &[
         ('\xc0', &['\x41', '\u0300']), ('\xc1', &['\x41', '\u0301']), ('\xc2', &['\x41', '\u0302']),
@@ -3968,6 +3969,7 @@ pub mod derived_property {
     pub fn XID_Start(c: char) -> bool {
         super::bsearch_range_table(c, XID_Start_table)
     }
+
 }
 
 pub mod property {
@@ -3983,6 +3985,7 @@ pub mod property {
     pub fn White_Space(c: char) -> bool {
         super::bsearch_range_table(c, White_Space_table)
     }
+
 }
 
 pub mod conversions {
@@ -4501,7 +4504,7 @@ pub mod conversions {
         ('\U00010426', '\U0001044e'), ('\U00010427', '\U0001044f')
     ];
 
-   static LlLu_table : &'static [(char, char)] = &[
+    static LlLu_table : &'static [(char, char)] = &[
         ('\x61', '\x41'), ('\x62', '\x42'),
         ('\x63', '\x43'), ('\x64', '\x44'),
         ('\x65', '\x45'), ('\x66', '\x46'),
diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs
index be6e5d040a7..d534b30221b 100644
--- a/src/libstd/unicode.rs
+++ b/src/libstd/unicode.rs
@@ -1,4 +1,4 @@
-// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// The following code was generated by "src/etc/unicode.py"
+// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
 
 #![allow(missing_doc, non_uppercase_statics)]