about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Zeitz <florob@babelmonkeys.de>2014-05-12 22:25:38 +0200
committerAlex Crichton <alex@alexcrichton.com>2014-05-13 17:24:07 -0700
commit8c54d5bf406fbfdbebd1a4553f430fca02b2c117 (patch)
tree69caac0e477cd22152e15605f5c3699bdf406a95
parent2f71b72a12f05d3efc6b507b12102ce52c6415dd (diff)
downloadrust-8c54d5bf406fbfdbebd1a4553f430fca02b2c117.tar.gz
rust-8c54d5bf406fbfdbebd1a4553f430fca02b2c117.zip
core: Move Hangul decomposition into unicode.rs
-rwxr-xr-xsrc/etc/unicode.py77
-rw-r--r--src/libcore/char.rs54
-rw-r--r--src/libcore/unicode.rs48
3 files changed, 110 insertions, 69 deletions
diff --git a/src/etc/unicode.py b/src/etc/unicode.py
index e98c65ca50e..f079ef73cd8 100755
--- a/src/etc/unicode.py
+++ b/src/etc/unicode.py
@@ -321,17 +321,24 @@ def emit_core_decomp_module(f, canon, compat):
     format_table_content(f, data, 8)
     f.write("\n    ];\n\n")
 
-    f.write("    pub fn canonical(c: char, i: |char|) "
-        + "{ d(c, i, false); }\n\n")
-    f.write("    pub fn compatibility(c: char, i: |char|) "
-            +"{ d(c, i, true); }\n\n")
-    f.write("    fn d(c: char, i: |char|, k: bool) {\n")
-    f.write("        use iter::Iterator;\n");
+    f.write("""
+    pub fn decompose_canonical(c: char, i: |char|) { d(c, i, false); }
 
-    f.write("        if c <= '\\x7f' { i(c); return; }\n")
+    pub fn decompose_compatible(c: char, i: |char|) { d(c, i, true); }
 
-    # First check the canonical decompositions
-    f.write("""
+    fn d(c: char, i: |char|, k: bool) {
+        use iter::Iterator;
+
+        // 7-bit ASCII never decomposes
+        if c <= '\\x7f' { i(c); return; }
+
+        // Perform decomposition for Hangul
+        if (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) {
+            decompose_hangul(c, i);
+            return;
+        }
+
+        // First check the canonical decompositions
         match bsearch_table(c, canonical_table) {
             Some(canon) => {
                 for x in canon.iter() {
@@ -340,13 +347,12 @@ def emit_core_decomp_module(f, canon, compat):
                 return;
             }
             None => ()
-        }\n\n""")
+        }
 
-    # Bottom out if we're not doing compat.
-    f.write("        if !k { i(c); return; }\n")
+        // Bottom out if we're not doing compat.
+        if !k { i(c); return; }
 
-    # Then check the compatibility decompositions
-    f.write("""
+        // Then check the compatibility decompositions
         match bsearch_table(c, compatibility_table) {
             Some(compat) => {
                 for x in compat.iter() {
@@ -355,12 +361,45 @@ def emit_core_decomp_module(f, canon, compat):
                 return;
             }
             None => ()
-        }\n\n""")
+        }
 
-    # Finally bottom out.
-    f.write("        i(c);\n")
-    f.write("    }\n")
-    f.write("}\n\n")
+        // Finally bottom out.
+        i(c);
+    }
+
+    // Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
+    static S_BASE: u32 = 0xAC00;
+    static L_BASE: u32 = 0x1100;
+    static V_BASE: u32 = 0x1161;
+    static T_BASE: u32 = 0x11A7;
+    static L_COUNT: u32 = 19;
+    static V_COUNT: u32 = 21;
+    static T_COUNT: u32 = 28;
+    static N_COUNT: u32 = (V_COUNT * T_COUNT);
+    static S_COUNT: u32 = (L_COUNT * N_COUNT);
+
+    // Decompose a precomposed Hangul syllable
+    fn decompose_hangul(s: char, f: |char|) {
+        use cast::transmute;
+
+        let si = s as u32 - S_BASE;
+
+        let li = si / N_COUNT;
+        unsafe {
+            f(transmute(L_BASE + li));
+
+            let vi = (si % N_COUNT) / T_COUNT;
+            f(transmute(V_BASE + vi));
+
+            let ti = si % T_COUNT;
+            if ti > 0 {
+                f(transmute(T_BASE + ti));
+            }
+        }
+    }
+}
+
+""")
 
 def emit_std_decomp_module(f, combine):
     f.write("pub mod decompose {\n");
diff --git a/src/libcore/char.rs b/src/libcore/char.rs
index ca5e56f0649..71a2d75715b 100644
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@@ -27,7 +27,12 @@
 use mem::transmute;
 use option::{None, Option, Some};
 use iter::{Iterator, range_step};
-use unicode::{derived_property, property, general_category, decompose, conversions};
+use unicode::{derived_property, property, general_category, conversions};
+
+/// Returns the canonical decomposition of a character.
+pub use unicode::decompose::decompose_canonical;
+/// Returns the compatibility decomposition of a character.
+pub use unicode::decompose::decompose_compatible;
 
 #[cfg(not(test))] use cmp::{Eq, Ord, TotalEq, TotalOrd, Ordering};
 #[cfg(not(test))] use default::Default;
@@ -285,53 +290,6 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
     }
 }
 
-// Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
-static S_BASE: u32 = 0xAC00;
-static L_BASE: u32 = 0x1100;
-static V_BASE: u32 = 0x1161;
-static T_BASE: u32 = 0x11A7;
-static L_COUNT: u32 = 19;
-static V_COUNT: u32 = 21;
-static T_COUNT: u32 = 28;
-static N_COUNT: u32 = (V_COUNT * T_COUNT);
-static S_COUNT: u32 = (L_COUNT * N_COUNT);
-
-// Decompose a precomposed Hangul syllable
-fn decompose_hangul(s: char, f: |char|) {
-    let si = s as u32 - S_BASE;
-
-    let li = si / N_COUNT;
-    unsafe {
-        f(transmute(L_BASE + li));
-
-        let vi = (si % N_COUNT) / T_COUNT;
-        f(transmute(V_BASE + vi));
-
-        let ti = si % T_COUNT;
-        if ti > 0 {
-            f(transmute(T_BASE + ti));
-        }
-    }
-}
-
-/// Returns the canonical decomposition of a character
-pub fn decompose_canonical(c: char, f: |char|) {
-    if (c as u32) < S_BASE || (c as u32) >= (S_BASE + S_COUNT) {
-        decompose::canonical(c, f);
-    } else {
-        decompose_hangul(c, f);
-    }
-}
-
-/// Returns the compatibility decomposition of a character
-pub fn decompose_compatible(c: char, f: |char|) {
-    if (c as u32) < S_BASE || (c as u32) >= (S_BASE + S_COUNT) {
-        decompose::compatibility(c, f);
-    } else {
-        decompose_hangul(c, f);
-    }
-}
-
 ///
 /// Returns the hexadecimal Unicode escape of a `char`
 ///
diff --git a/src/libcore/unicode.rs b/src/libcore/unicode.rs
index b3298bde055..bffde2323bf 100644
--- a/src/libcore/unicode.rs
+++ b/src/libcore/unicode.rs
@@ -2121,14 +2121,24 @@ pub mod decompose {
         &['\u53ef'])
     ];
 
-    pub fn canonical(c: char, i: |char|) { d(c, i, false); }
 
-    pub fn compatibility(c: char, i: |char|) { d(c, i, true); }
+    pub fn decompose_canonical(c: char, i: |char|) { d(c, i, false); }
+
+    pub fn decompose_compatible(c: char, i: |char|) { d(c, i, true); }
 
     fn d(c: char, i: |char|, k: bool) {
         use iter::Iterator;
+
+        // 7-bit ASCII never decomposes
         if c <= '\x7f' { i(c); return; }
 
+        // Perform decomposition for Hangul
+        if (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) {
+            decompose_hangul(c, i);
+            return;
+        }
+
+        // First check the canonical decompositions
         match bsearch_table(c, canonical_table) {
             Some(canon) => {
                 for x in canon.iter() {
@@ -2139,8 +2149,10 @@ pub mod decompose {
             None => ()
         }
 
+        // Bottom out if we're not doing compat.
         if !k { i(c); return; }
 
+        // Then check the compatibility decompositions
         match bsearch_table(c, compatibility_table) {
             Some(compat) => {
                 for x in compat.iter() {
@@ -2151,8 +2163,40 @@ pub mod decompose {
             None => ()
         }
 
+        // Finally bottom out.
         i(c);
     }
+
+    // Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
+    static S_BASE: u32 = 0xAC00;
+    static L_BASE: u32 = 0x1100;
+    static V_BASE: u32 = 0x1161;
+    static T_BASE: u32 = 0x11A7;
+    static L_COUNT: u32 = 19;
+    static V_COUNT: u32 = 21;
+    static T_COUNT: u32 = 28;
+    static N_COUNT: u32 = (V_COUNT * T_COUNT);
+    static S_COUNT: u32 = (L_COUNT * N_COUNT);
+
+    // Decompose a precomposed Hangul syllable
+    fn decompose_hangul(s: char, f: |char|) {
+        use mem::transmute;
+
+        let si = s as u32 - S_BASE;
+
+        let li = si / N_COUNT;
+        unsafe {
+            f(transmute(L_BASE + li));
+
+            let vi = (si % N_COUNT) / T_COUNT;
+            f(transmute(V_BASE + vi));
+
+            let ti = si % T_COUNT;
+            if ti > 0 {
+                f(transmute(T_BASE + ti));
+            }
+        }
+    }
 }
 
 pub mod derived_property {