about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-08-04 17:06:19 +0000
committerbors <bors@rust-lang.org>2014-08-04 17:06:19 +0000
commitefe1f7ee9efb5da5613f2cff4f9b810d2d5992d4 (patch)
tree67297aa25589ccc189255bc45ed3ec371b7c2b36 /src
parent31590bd34900403a18079bf4623cd35f9da0c100 (diff)
parent7ece0abe64bf7c5bdd03e4cbecdb914f470eb846 (diff)
downloadrust-efe1f7ee9efb5da5613f2cff4f9b810d2d5992d4.tar.gz
rust-efe1f7ee9efb5da5613f2cff4f9b810d2d5992d4.zip
auto merge of #15986 : Florob/rust/nfKc-new, r=alexcrichton
This adds a new `Recompositions` iterator, which performs canonical composition on the result of the `Decompositions` iterator (which is canonical or compatibility decomposition). In effect this implements Unicode normalization forms C and KC.
Diffstat (limited to 'src')
-rwxr-xr-xsrc/etc/unicode.py35
-rw-r--r--src/libcollections/str.rs228
-rw-r--r--src/libunicode/lib.rs4
-rw-r--r--src/libunicode/normalize.rs (renamed from src/libunicode/decompose.rs)47
-rw-r--r--src/libunicode/tables.rs304
5 files changed, 581 insertions, 37 deletions
diff --git a/src/etc/unicode.py b/src/etc/unicode.py
index f1761c5719a..5424cd3b3ab 100755
--- a/src/etc/unicode.py
+++ b/src/etc/unicode.py
@@ -464,13 +464,26 @@ def emit_charwidth_module(f, width_table):
             pfun=lambda x: "(%s,%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2], x[3]))
     f.write("}\n\n")
 
-def emit_norm_module(f, canon, compat, combine):
+def emit_norm_module(f, canon, compat, combine, norm_props):
     canon_keys = canon.keys()
     canon_keys.sort()
 
     compat_keys = compat.keys()
     compat_keys.sort()
 
+    canon_comp = {}
+    comp_exclusions = norm_props["Full_Composition_Exclusion"]
+    for char in canon_keys:
+        if True in map(lambda (lo, hi): lo <= char <= hi, comp_exclusions):
+            continue
+        decomp = canon[char]
+        if len(decomp) == 2:
+            if not canon_comp.has_key(decomp[0]):
+                canon_comp[decomp[0]] = []
+            canon_comp[decomp[0]].append( (decomp[1], char) )
+    canon_comp_keys = canon_comp.keys()
+    canon_comp_keys.sort()
+
     f.write("pub mod normalization {\n")
 
     def mkdata_fun(table):
@@ -494,6 +507,22 @@ def emit_norm_module(f, canon, compat, combine):
     emit_table(f, "compatibility_table", compat_keys, "&'static [(char, &'static [char])]",
         pfun=mkdata_fun(compat))
 
+    def comp_pfun(char):
+        data = "(%s,&[" % escape_char(char)
+        canon_comp[char].sort(lambda x, y: x[0] - y[0])
+        first = True
+        for pair in canon_comp[char]:
+            if not first:
+                data += ","
+            first = False
+            data += "(%s,%s)" % (escape_char(pair[0]), escape_char(pair[1]))
+        data += "])"
+        return data
+
+    f.write("    // Canonical compositions\n")
+    emit_table(f, "composition_table", canon_comp_keys,
+        "&'static [(char, &'static [(char, char)])]", pfun=comp_pfun)
+
     f.write("""
     fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
         use core::option::{Some, None};
@@ -579,6 +608,8 @@ if __name__ == "__main__":
         scripts = load_properties("Scripts.txt", [])
         props = load_properties("PropList.txt",
                 ["White_Space", "Join_Control", "Noncharacter_Code_Point"])
+        norm_props = load_properties("DerivedNormalizationProps.txt",
+                     ["Full_Composition_Exclusion"])
 
         # grapheme cluster category from DerivedCoreProperties
         # the rest are defined below
@@ -612,7 +643,7 @@ if __name__ == "__main__":
         emit_regex_module(rf, allcats, perl_words)
 
         # normalizations and conversions module
-        emit_norm_module(rf, canon_decomp, compat_decomp, combines)
+        emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props)
         emit_conversions_module(rf, lowerupper, upperlower)
 
         ### character width module
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index f013557f9a5..f5a274b7677 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -77,8 +77,9 @@ use core::cmp;
 use core::iter::AdditiveIterator;
 use core::mem;
 
-use {Collection, MutableSeq};
+use {Collection, Deque, MutableSeq};
 use hash;
+use ringbuf::RingBuf;
 use string::String;
 use unicode;
 use vec::Vec;
@@ -302,6 +303,106 @@ impl<'a> Iterator<char> for Decompositions<'a> {
     }
 }
 
+#[deriving(Clone)]
+enum RecompositionState {
+    Composing,
+    Purging,
+    Finished
+}
+
+/// External iterator for a string's recomposition's characters.
+/// Use with the `std::iter` module.
+#[deriving(Clone)]
+pub struct Recompositions<'a> {
+    iter: Decompositions<'a>,
+    state: RecompositionState,
+    buffer: RingBuf<char>,
+    composee: Option<char>,
+    last_ccc: Option<u8>
+}
+
+impl<'a> Iterator<char> for Recompositions<'a> {
+    #[inline]
+    fn next(&mut self) -> Option<char> {
+        loop {
+            match self.state {
+                Composing => {
+                    for ch in self.iter {
+                        let ch_class = unicode::char::canonical_combining_class(ch);
+                        if self.composee.is_none() {
+                            if ch_class != 0 {
+                                return Some(ch);
+                            }
+                            self.composee = Some(ch);
+                            continue;
+                        }
+                        let k = self.composee.clone().unwrap();
+
+                        match self.last_ccc {
+                            None => {
+                                match unicode::char::compose(k, ch) {
+                                    Some(r) => {
+                                        self.composee = Some(r);
+                                        continue;
+                                    }
+                                    None => {
+                                        if ch_class == 0 {
+                                            self.composee = Some(ch);
+                                            return Some(k);
+                                        }
+                                        self.buffer.push(ch);
+                                        self.last_ccc = Some(ch_class);
+                                    }
+                                }
+                            }
+                            Some(l_class) => {
+                                if l_class >= ch_class {
+                                    // `ch` is blocked from `composee`
+                                    if ch_class == 0 {
+                                        self.composee = Some(ch);
+                                        self.last_ccc = None;
+                                        self.state = Purging;
+                                        return Some(k);
+                                    }
+                                    self.buffer.push(ch);
+                                    self.last_ccc = Some(ch_class);
+                                    continue;
+                                }
+                                match unicode::char::compose(k, ch) {
+                                    Some(r) => {
+                                        self.composee = Some(r);
+                                        continue;
+                                    }
+                                    None => {
+                                        self.buffer.push(ch);
+                                        self.last_ccc = Some(ch_class);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    self.state = Finished;
+                    if self.composee.is_some() {
+                        return self.composee.take();
+                    }
+                }
+                Purging => {
+                    match self.buffer.pop_front() {
+                        None => self.state = Composing,
+                        s => return s
+                    }
+                }
+                Finished => {
+                    match self.buffer.pop_front() {
+                        None => return self.composee.take(),
+                        s => return s
+                    }
+                }
+            }
+        }
+    }
+}
+
 /// Replace all occurrences of one string with another
 ///
 /// # Arguments
@@ -744,6 +845,32 @@ pub trait StrAllocating: Str {
             kind: Compatible
         }
     }
+
+    /// An Iterator over the string in Unicode Normalization Form C
+    /// (canonical decomposition followed by canonical composition).
+    #[inline]
+    fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
+        Recompositions {
+            iter: self.nfd_chars(),
+            state: Composing,
+            buffer: RingBuf::new(),
+            composee: None,
+            last_ccc: None
+        }
+    }
+
+    /// An Iterator over the string in Unicode Normalization Form KC
+    /// (compatibility decomposition followed by canonical composition).
+    #[inline]
+    fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
+        Recompositions {
+            iter: self.nfkd_chars(),
+            state: Composing,
+            buffer: RingBuf::new(),
+            composee: None,
+            last_ccc: None
+        }
+    }
 }
 
 impl<'a> StrAllocating for &'a str {
@@ -1754,39 +1881,80 @@ mod tests {
 
     #[test]
     fn test_nfd_chars() {
-        assert_eq!("abc".nfd_chars().collect::<String>(), String::from_str("abc"));
-        assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(),
-                   String::from_str("d\u0307\u01c4"));
-        assert_eq!("\u2026".nfd_chars().collect::<String>(), String::from_str("\u2026"));
-        assert_eq!("\u2126".nfd_chars().collect::<String>(), String::from_str("\u03a9"));
-        assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(),
-                   String::from_str("d\u0323\u0307"));
-        assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(),
-                   String::from_str("d\u0323\u0307"));
-        assert_eq!("a\u0301".nfd_chars().collect::<String>(), String::from_str("a\u0301"));
-        assert_eq!("\u0301a".nfd_chars().collect::<String>(), String::from_str("\u0301a"));
-        assert_eq!("\ud4db".nfd_chars().collect::<String>(),
-                   String::from_str("\u1111\u1171\u11b6"));
-        assert_eq!("\uac1c".nfd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
+        macro_rules! t {
+            ($input: expr, $expected: expr) => {
+                assert_eq!($input.nfd_chars().collect::<String>(), $expected.into_string());
+            }
+        }
+        t!("abc", "abc");
+        t!("\u1e0b\u01c4", "d\u0307\u01c4");
+        t!("\u2026", "\u2026");
+        t!("\u2126", "\u03a9");
+        t!("\u1e0b\u0323", "d\u0323\u0307");
+        t!("\u1e0d\u0307", "d\u0323\u0307");
+        t!("a\u0301", "a\u0301");
+        t!("\u0301a", "\u0301a");
+        t!("\ud4db", "\u1111\u1171\u11b6");
+        t!("\uac1c", "\u1100\u1162");
     }
 
     #[test]
     fn test_nfkd_chars() {
-        assert_eq!("abc".nfkd_chars().collect::<String>(), String::from_str("abc"));
-        assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(),
-                   String::from_str("d\u0307DZ\u030c"));
-        assert_eq!("\u2026".nfkd_chars().collect::<String>(), String::from_str("..."));
-        assert_eq!("\u2126".nfkd_chars().collect::<String>(), String::from_str("\u03a9"));
-        assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(),
-                   String::from_str("d\u0323\u0307"));
-        assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(),
-                   String::from_str("d\u0323\u0307"));
-        assert_eq!("a\u0301".nfkd_chars().collect::<String>(), String::from_str("a\u0301"));
-        assert_eq!("\u0301a".nfkd_chars().collect::<String>(),
-                   String::from_str("\u0301a"));
-        assert_eq!("\ud4db".nfkd_chars().collect::<String>(),
-                   String::from_str("\u1111\u1171\u11b6"));
-        assert_eq!("\uac1c".nfkd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
+        macro_rules! t {
+            ($input: expr, $expected: expr) => {
+                assert_eq!($input.nfkd_chars().collect::<String>(), $expected.into_string());
+            }
+        }
+        t!("abc", "abc");
+        t!("\u1e0b\u01c4", "d\u0307DZ\u030c");
+        t!("\u2026", "...");
+        t!("\u2126", "\u03a9");
+        t!("\u1e0b\u0323", "d\u0323\u0307");
+        t!("\u1e0d\u0307", "d\u0323\u0307");
+        t!("a\u0301", "a\u0301");
+        t!("\u0301a", "\u0301a");
+        t!("\ud4db", "\u1111\u1171\u11b6");
+        t!("\uac1c", "\u1100\u1162");
+    }
+
+    #[test]
+    fn test_nfc_chars() {
+        macro_rules! t {
+            ($input: expr, $expected: expr) => {
+                assert_eq!($input.nfc_chars().collect::<String>(), $expected.into_string());
+            }
+        }
+        t!("abc", "abc");
+        t!("\u1e0b\u01c4", "\u1e0b\u01c4");
+        t!("\u2026", "\u2026");
+        t!("\u2126", "\u03a9");
+        t!("\u1e0b\u0323", "\u1e0d\u0307");
+        t!("\u1e0d\u0307", "\u1e0d\u0307");
+        t!("a\u0301", "\xe1");
+        t!("\u0301a", "\u0301a");
+        t!("\ud4db", "\ud4db");
+        t!("\uac1c", "\uac1c");
+        t!("a\u0300\u0305\u0315\u05aeb", "\xe0\u05ae\u0305\u0315b");
+    }
+
+    #[test]
+    fn test_nfkc_chars() {
+        macro_rules! t {
+            ($input: expr, $expected: expr) => {
+                assert_eq!($input.nfkc_chars().collect::<String>(), $expected.into_string());
+            }
+        }
+        t!("abc", "abc");
+        t!("\u1e0b\u01c4", "\u1e0bD\u017d");
+        t!("\u2026", "...");
+        t!("\u2126", "\u03a9");
+        t!("\u1e0b\u0323", "\u1e0d\u0307");
+        t!("\u1e0d\u0307", "\u1e0d\u0307");
+        t!("a\u0301", "\xe1");
+        t!("\u0301a", "\u0301a");
+        t!("\ud4db", "\ud4db");
+        t!("\uac1c", "\uac1c");
+        t!("a\u0300\u0305\u0315\u05aeb", "\xe0\u05ae\u0305\u0315b");
     }
 
     #[test]
diff --git a/src/libunicode/lib.rs b/src/libunicode/lib.rs
index c923799e812..239966cfd47 100644
--- a/src/libunicode/lib.rs
+++ b/src/libunicode/lib.rs
@@ -35,7 +35,7 @@ extern crate core;
 // regex module
 pub use tables::regex;
 
-mod decompose;
+mod normalize;
 mod tables;
 mod u_char;
 mod u_str;
@@ -61,7 +61,7 @@ pub mod char {
     pub use core::char::{from_digit, escape_unicode, escape_default};
     pub use core::char::{len_utf8_bytes, Char};
 
-    pub use decompose::{decompose_canonical, decompose_compatible};
+    pub use normalize::{decompose_canonical, decompose_compatible, compose};
 
     pub use tables::normalization::canonical_combining_class;
 
diff --git a/src/libunicode/decompose.rs b/src/libunicode/normalize.rs
index 25e06bf7c13..df0be09aea1 100644
--- a/src/libunicode/decompose.rs
+++ b/src/libunicode/normalize.rs
@@ -13,12 +13,12 @@
   for Unicode characters.
   */
 
+use core::cmp::{Equal, Less, Greater};
 use core::option::{Option, Some, None};
 use core::slice::ImmutableVector;
-use tables::normalization::{canonical_table, compatibility_table};
+use tables::normalization::{canonical_table, compatibility_table, composition_table};
 
-fn bsearch_table(c: char, r: &'static [(char, &'static [char])]) -> Option<&'static [char]> {
-    use core::cmp::{Equal, Less, Greater};
+fn bsearch_table<T>(c: char, r: &'static [(char, &'static [T])]) -> Option<&'static [T]> {
     match r.bsearch(|&(val, _)| {
         if c == val { Equal }
         else if val < c { Less }
@@ -80,6 +80,27 @@ fn d(c: char, i: |char|, k: bool) {
     i(c);
 }
 
+pub fn compose(a: char, b: char) -> Option<char> {
+    compose_hangul(a, b).or_else(|| {
+        match bsearch_table(a, composition_table) {
+            None => None,
+            Some(candidates) => {
+                match candidates.bsearch(|&(val, _)| {
+                    if b == val { Equal }
+                    else if val < b { Less }
+                    else { Greater }
+                }) {
+                    Some(idx) => {
+                        let (_, result) = candidates[idx];
+                        Some(result)
+                    }
+                    None => None
+                }
+            }
+        }
+    })
+}
+
 // Constants from Unicode 6.3.0 Section 3.12 Conjoining Jamo Behavior
 static S_BASE: u32 = 0xAC00;
 static L_BASE: u32 = 0x1100;
@@ -92,6 +113,7 @@ static N_COUNT: u32 = (V_COUNT * T_COUNT);
 static S_COUNT: u32 = (L_COUNT * N_COUNT);
 
 // Decompose a precomposed Hangul syllable
+#[inline(always)]
 fn decompose_hangul(s: char, f: |char|) {
     use core::mem::transmute;
 
@@ -110,3 +132,22 @@ fn decompose_hangul(s: char, f: |char|) {
         }
     }
 }
+
+// Compose a pair of Hangul Jamo
+#[inline(always)]
+fn compose_hangul(a: char, b: char) -> Option<char> {
+    use core::mem::transmute;
+    let l = a as u32;
+    let v = b as u32;
+    // Compose an LPart and a VPart
+    if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
+        let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
+        return unsafe { Some(transmute(r)) };
+    }
+    // Compose an LVPart and a TPart
+    if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) && T_BASE <= v && v < (T_BASE+T_COUNT) {
+        let r = l + (v - T_BASE);
+        return unsafe { Some(transmute(r)) };
+    }
+    None
+}
diff --git a/src/libunicode/tables.rs b/src/libunicode/tables.rs
index 650f93abd3e..4332fc596c9 100644
--- a/src/libunicode/tables.rs
+++ b/src/libunicode/tables.rs
@@ -5922,6 +5922,310 @@ pub mod normalization {
         &['\u53ef'])
     ];
 
+    // Canonical compositions
+    pub static composition_table: &'static [(char, &'static [(char, char)])] = &[
+        ('\x3c', &[('\u0338', '\u226e')]), ('\x3d', &[('\u0338', '\u2260')]), ('\x3e', &[('\u0338',
+        '\u226f')]), ('\x41', &[('\u0300', '\xc0'), ('\u0301', '\xc1'), ('\u0302', '\xc2'),
+        ('\u0303', '\xc3'), ('\u0304', '\u0100'), ('\u0306', '\u0102'), ('\u0307', '\u0226'),
+        ('\u0308', '\xc4'), ('\u0309', '\u1ea2'), ('\u030a', '\xc5'), ('\u030c', '\u01cd'),
+        ('\u030f', '\u0200'), ('\u0311', '\u0202'), ('\u0323', '\u1ea0'), ('\u0325', '\u1e00'),
+        ('\u0328', '\u0104')]), ('\x42', &[('\u0307', '\u1e02'), ('\u0323', '\u1e04'), ('\u0331',
+        '\u1e06')]), ('\x43', &[('\u0301', '\u0106'), ('\u0302', '\u0108'), ('\u0307', '\u010a'),
+        ('\u030c', '\u010c'), ('\u0327', '\xc7')]), ('\x44', &[('\u0307', '\u1e0a'), ('\u030c',
+        '\u010e'), ('\u0323', '\u1e0c'), ('\u0327', '\u1e10'), ('\u032d', '\u1e12'), ('\u0331',
+        '\u1e0e')]), ('\x45', &[('\u0300', '\xc8'), ('\u0301', '\xc9'), ('\u0302', '\xca'),
+        ('\u0303', '\u1ebc'), ('\u0304', '\u0112'), ('\u0306', '\u0114'), ('\u0307', '\u0116'),
+        ('\u0308', '\xcb'), ('\u0309', '\u1eba'), ('\u030c', '\u011a'), ('\u030f', '\u0204'),
+        ('\u0311', '\u0206'), ('\u0323', '\u1eb8'), ('\u0327', '\u0228'), ('\u0328', '\u0118'),
+        ('\u032d', '\u1e18'), ('\u0330', '\u1e1a')]), ('\x46', &[('\u0307', '\u1e1e')]), ('\x47',
+        &[('\u0301', '\u01f4'), ('\u0302', '\u011c'), ('\u0304', '\u1e20'), ('\u0306', '\u011e'),
+        ('\u0307', '\u0120'), ('\u030c', '\u01e6'), ('\u0327', '\u0122')]), ('\x48', &[('\u0302',
+        '\u0124'), ('\u0307', '\u1e22'), ('\u0308', '\u1e26'), ('\u030c', '\u021e'), ('\u0323',
+        '\u1e24'), ('\u0327', '\u1e28'), ('\u032e', '\u1e2a')]), ('\x49', &[('\u0300', '\xcc'),
+        ('\u0301', '\xcd'), ('\u0302', '\xce'), ('\u0303', '\u0128'), ('\u0304', '\u012a'),
+        ('\u0306', '\u012c'), ('\u0307', '\u0130'), ('\u0308', '\xcf'), ('\u0309', '\u1ec8'),
+        ('\u030c', '\u01cf'), ('\u030f', '\u0208'), ('\u0311', '\u020a'), ('\u0323', '\u1eca'),
+        ('\u0328', '\u012e'), ('\u0330', '\u1e2c')]), ('\x4a', &[('\u0302', '\u0134')]), ('\x4b',
+        &[('\u0301', '\u1e30'), ('\u030c', '\u01e8'), ('\u0323', '\u1e32'), ('\u0327', '\u0136'),
+        ('\u0331', '\u1e34')]), ('\x4c', &[('\u0301', '\u0139'), ('\u030c', '\u013d'), ('\u0323',
+        '\u1e36'), ('\u0327', '\u013b'), ('\u032d', '\u1e3c'), ('\u0331', '\u1e3a')]), ('\x4d',
+        &[('\u0301', '\u1e3e'), ('\u0307', '\u1e40'), ('\u0323', '\u1e42')]), ('\x4e', &[('\u0300',
+        '\u01f8'), ('\u0301', '\u0143'), ('\u0303', '\xd1'), ('\u0307', '\u1e44'), ('\u030c',
+        '\u0147'), ('\u0323', '\u1e46'), ('\u0327', '\u0145'), ('\u032d', '\u1e4a'), ('\u0331',
+        '\u1e48')]), ('\x4f', &[('\u0300', '\xd2'), ('\u0301', '\xd3'), ('\u0302', '\xd4'),
+        ('\u0303', '\xd5'), ('\u0304', '\u014c'), ('\u0306', '\u014e'), ('\u0307', '\u022e'),
+        ('\u0308', '\xd6'), ('\u0309', '\u1ece'), ('\u030b', '\u0150'), ('\u030c', '\u01d1'),
+        ('\u030f', '\u020c'), ('\u0311', '\u020e'), ('\u031b', '\u01a0'), ('\u0323', '\u1ecc'),
+        ('\u0328', '\u01ea')]), ('\x50', &[('\u0301', '\u1e54'), ('\u0307', '\u1e56')]), ('\x52',
+        &[('\u0301', '\u0154'), ('\u0307', '\u1e58'), ('\u030c', '\u0158'), ('\u030f', '\u0210'),
+        ('\u0311', '\u0212'), ('\u0323', '\u1e5a'), ('\u0327', '\u0156'), ('\u0331', '\u1e5e')]),
+        ('\x53', &[('\u0301', '\u015a'), ('\u0302', '\u015c'), ('\u0307', '\u1e60'), ('\u030c',
+        '\u0160'), ('\u0323', '\u1e62'), ('\u0326', '\u0218'), ('\u0327', '\u015e')]), ('\x54',
+        &[('\u0307', '\u1e6a'), ('\u030c', '\u0164'), ('\u0323', '\u1e6c'), ('\u0326', '\u021a'),
+        ('\u0327', '\u0162'), ('\u032d', '\u1e70'), ('\u0331', '\u1e6e')]), ('\x55', &[('\u0300',
+        '\xd9'), ('\u0301', '\xda'), ('\u0302', '\xdb'), ('\u0303', '\u0168'), ('\u0304', '\u016a'),
+        ('\u0306', '\u016c'), ('\u0308', '\xdc'), ('\u0309', '\u1ee6'), ('\u030a', '\u016e'),
+        ('\u030b', '\u0170'), ('\u030c', '\u01d3'), ('\u030f', '\u0214'), ('\u0311', '\u0216'),
+        ('\u031b', '\u01af'), ('\u0323', '\u1ee4'), ('\u0324', '\u1e72'), ('\u0328', '\u0172'),
+        ('\u032d', '\u1e76'), ('\u0330', '\u1e74')]), ('\x56', &[('\u0303', '\u1e7c'), ('\u0323',
+        '\u1e7e')]), ('\x57', &[('\u0300', '\u1e80'), ('\u0301', '\u1e82'), ('\u0302', '\u0174'),
+        ('\u0307', '\u1e86'), ('\u0308', '\u1e84'), ('\u0323', '\u1e88')]), ('\x58', &[('\u0307',
+        '\u1e8a'), ('\u0308', '\u1e8c')]), ('\x59', &[('\u0300', '\u1ef2'), ('\u0301', '\xdd'),
+        ('\u0302', '\u0176'), ('\u0303', '\u1ef8'), ('\u0304', '\u0232'), ('\u0307', '\u1e8e'),
+        ('\u0308', '\u0178'), ('\u0309', '\u1ef6'), ('\u0323', '\u1ef4')]), ('\x5a', &[('\u0301',
+        '\u0179'), ('\u0302', '\u1e90'), ('\u0307', '\u017b'), ('\u030c', '\u017d'), ('\u0323',
+        '\u1e92'), ('\u0331', '\u1e94')]), ('\x61', &[('\u0300', '\xe0'), ('\u0301', '\xe1'),
+        ('\u0302', '\xe2'), ('\u0303', '\xe3'), ('\u0304', '\u0101'), ('\u0306', '\u0103'),
+        ('\u0307', '\u0227'), ('\u0308', '\xe4'), ('\u0309', '\u1ea3'), ('\u030a', '\xe5'),
+        ('\u030c', '\u01ce'), ('\u030f', '\u0201'), ('\u0311', '\u0203'), ('\u0323', '\u1ea1'),
+        ('\u0325', '\u1e01'), ('\u0328', '\u0105')]), ('\x62', &[('\u0307', '\u1e03'), ('\u0323',
+        '\u1e05'), ('\u0331', '\u1e07')]), ('\x63', &[('\u0301', '\u0107'), ('\u0302', '\u0109'),
+        ('\u0307', '\u010b'), ('\u030c', '\u010d'), ('\u0327', '\xe7')]), ('\x64', &[('\u0307',
+        '\u1e0b'), ('\u030c', '\u010f'), ('\u0323', '\u1e0d'), ('\u0327', '\u1e11'), ('\u032d',
+        '\u1e13'), ('\u0331', '\u1e0f')]), ('\x65', &[('\u0300', '\xe8'), ('\u0301', '\xe9'),
+        ('\u0302', '\xea'), ('\u0303', '\u1ebd'), ('\u0304', '\u0113'), ('\u0306', '\u0115'),
+        ('\u0307', '\u0117'), ('\u0308', '\xeb'), ('\u0309', '\u1ebb'), ('\u030c', '\u011b'),
+        ('\u030f', '\u0205'), ('\u0311', '\u0207'), ('\u0323', '\u1eb9'), ('\u0327', '\u0229'),
+        ('\u0328', '\u0119'), ('\u032d', '\u1e19'), ('\u0330', '\u1e1b')]), ('\x66', &[('\u0307',
+        '\u1e1f')]), ('\x67', &[('\u0301', '\u01f5'), ('\u0302', '\u011d'), ('\u0304', '\u1e21'),
+        ('\u0306', '\u011f'), ('\u0307', '\u0121'), ('\u030c', '\u01e7'), ('\u0327', '\u0123')]),
+        ('\x68', &[('\u0302', '\u0125'), ('\u0307', '\u1e23'), ('\u0308', '\u1e27'), ('\u030c',
+        '\u021f'), ('\u0323', '\u1e25'), ('\u0327', '\u1e29'), ('\u032e', '\u1e2b'), ('\u0331',
+        '\u1e96')]), ('\x69', &[('\u0300', '\xec'), ('\u0301', '\xed'), ('\u0302', '\xee'),
+        ('\u0303', '\u0129'), ('\u0304', '\u012b'), ('\u0306', '\u012d'), ('\u0308', '\xef'),
+        ('\u0309', '\u1ec9'), ('\u030c', '\u01d0'), ('\u030f', '\u0209'), ('\u0311', '\u020b'),
+        ('\u0323', '\u1ecb'), ('\u0328', '\u012f'), ('\u0330', '\u1e2d')]), ('\x6a', &[('\u0302',
+        '\u0135'), ('\u030c', '\u01f0')]), ('\x6b', &[('\u0301', '\u1e31'), ('\u030c', '\u01e9'),
+        ('\u0323', '\u1e33'), ('\u0327', '\u0137'), ('\u0331', '\u1e35')]), ('\x6c', &[('\u0301',
+        '\u013a'), ('\u030c', '\u013e'), ('\u0323', '\u1e37'), ('\u0327', '\u013c'), ('\u032d',
+        '\u1e3d'), ('\u0331', '\u1e3b')]), ('\x6d', &[('\u0301', '\u1e3f'), ('\u0307', '\u1e41'),
+        ('\u0323', '\u1e43')]), ('\x6e', &[('\u0300', '\u01f9'), ('\u0301', '\u0144'), ('\u0303',
+        '\xf1'), ('\u0307', '\u1e45'), ('\u030c', '\u0148'), ('\u0323', '\u1e47'), ('\u0327',
+        '\u0146'), ('\u032d', '\u1e4b'), ('\u0331', '\u1e49')]), ('\x6f', &[('\u0300', '\xf2'),
+        ('\u0301', '\xf3'), ('\u0302', '\xf4'), ('\u0303', '\xf5'), ('\u0304', '\u014d'), ('\u0306',
+        '\u014f'), ('\u0307', '\u022f'), ('\u0308', '\xf6'), ('\u0309', '\u1ecf'), ('\u030b',
+        '\u0151'), ('\u030c', '\u01d2'), ('\u030f', '\u020d'), ('\u0311', '\u020f'), ('\u031b',
+        '\u01a1'), ('\u0323', '\u1ecd'), ('\u0328', '\u01eb')]), ('\x70', &[('\u0301', '\u1e55'),
+        ('\u0307', '\u1e57')]), ('\x72', &[('\u0301', '\u0155'), ('\u0307', '\u1e59'), ('\u030c',
+        '\u0159'), ('\u030f', '\u0211'), ('\u0311', '\u0213'), ('\u0323', '\u1e5b'), ('\u0327',
+        '\u0157'), ('\u0331', '\u1e5f')]), ('\x73', &[('\u0301', '\u015b'), ('\u0302', '\u015d'),
+        ('\u0307', '\u1e61'), ('\u030c', '\u0161'), ('\u0323', '\u1e63'), ('\u0326', '\u0219'),
+        ('\u0327', '\u015f')]), ('\x74', &[('\u0307', '\u1e6b'), ('\u0308', '\u1e97'), ('\u030c',
+        '\u0165'), ('\u0323', '\u1e6d'), ('\u0326', '\u021b'), ('\u0327', '\u0163'), ('\u032d',
+        '\u1e71'), ('\u0331', '\u1e6f')]), ('\x75', &[('\u0300', '\xf9'), ('\u0301', '\xfa'),
+        ('\u0302', '\xfb'), ('\u0303', '\u0169'), ('\u0304', '\u016b'), ('\u0306', '\u016d'),
+        ('\u0308', '\xfc'), ('\u0309', '\u1ee7'), ('\u030a', '\u016f'), ('\u030b', '\u0171'),
+        ('\u030c', '\u01d4'), ('\u030f', '\u0215'), ('\u0311', '\u0217'), ('\u031b', '\u01b0'),
+        ('\u0323', '\u1ee5'), ('\u0324', '\u1e73'), ('\u0328', '\u0173'), ('\u032d', '\u1e77'),
+        ('\u0330', '\u1e75')]), ('\x76', &[('\u0303', '\u1e7d'), ('\u0323', '\u1e7f')]), ('\x77',
+        &[('\u0300', '\u1e81'), ('\u0301', '\u1e83'), ('\u0302', '\u0175'), ('\u0307', '\u1e87'),
+        ('\u0308', '\u1e85'), ('\u030a', '\u1e98'), ('\u0323', '\u1e89')]), ('\x78', &[('\u0307',
+        '\u1e8b'), ('\u0308', '\u1e8d')]), ('\x79', &[('\u0300', '\u1ef3'), ('\u0301', '\xfd'),
+        ('\u0302', '\u0177'), ('\u0303', '\u1ef9'), ('\u0304', '\u0233'), ('\u0307', '\u1e8f'),
+        ('\u0308', '\xff'), ('\u0309', '\u1ef7'), ('\u030a', '\u1e99'), ('\u0323', '\u1ef5')]),
+        ('\x7a', &[('\u0301', '\u017a'), ('\u0302', '\u1e91'), ('\u0307', '\u017c'), ('\u030c',
+        '\u017e'), ('\u0323', '\u1e93'), ('\u0331', '\u1e95')]), ('\xa8', &[('\u0300', '\u1fed'),
+        ('\u0301', '\u0385'), ('\u0342', '\u1fc1')]), ('\xc2', &[('\u0300', '\u1ea6'), ('\u0301',
+        '\u1ea4'), ('\u0303', '\u1eaa'), ('\u0309', '\u1ea8')]), ('\xc4', &[('\u0304', '\u01de')]),
+        ('\xc5', &[('\u0301', '\u01fa')]), ('\xc6', &[('\u0301', '\u01fc'), ('\u0304', '\u01e2')]),
+        ('\xc7', &[('\u0301', '\u1e08')]), ('\xca', &[('\u0300', '\u1ec0'), ('\u0301', '\u1ebe'),
+        ('\u0303', '\u1ec4'), ('\u0309', '\u1ec2')]), ('\xcf', &[('\u0301', '\u1e2e')]), ('\xd4',
+        &[('\u0300', '\u1ed2'), ('\u0301', '\u1ed0'), ('\u0303', '\u1ed6'), ('\u0309', '\u1ed4')]),
+        ('\xd5', &[('\u0301', '\u1e4c'), ('\u0304', '\u022c'), ('\u0308', '\u1e4e')]), ('\xd6',
+        &[('\u0304', '\u022a')]), ('\xd8', &[('\u0301', '\u01fe')]), ('\xdc', &[('\u0300',
+        '\u01db'), ('\u0301', '\u01d7'), ('\u0304', '\u01d5'), ('\u030c', '\u01d9')]), ('\xe2',
+        &[('\u0300', '\u1ea7'), ('\u0301', '\u1ea5'), ('\u0303', '\u1eab'), ('\u0309', '\u1ea9')]),
+        ('\xe4', &[('\u0304', '\u01df')]), ('\xe5', &[('\u0301', '\u01fb')]), ('\xe6', &[('\u0301',
+        '\u01fd'), ('\u0304', '\u01e3')]), ('\xe7', &[('\u0301', '\u1e09')]), ('\xea', &[('\u0300',
+        '\u1ec1'), ('\u0301', '\u1ebf'), ('\u0303', '\u1ec5'), ('\u0309', '\u1ec3')]), ('\xef',
+        &[('\u0301', '\u1e2f')]), ('\xf4', &[('\u0300', '\u1ed3'), ('\u0301', '\u1ed1'), ('\u0303',
+        '\u1ed7'), ('\u0309', '\u1ed5')]), ('\xf5', &[('\u0301', '\u1e4d'), ('\u0304', '\u022d'),
+        ('\u0308', '\u1e4f')]), ('\xf6', &[('\u0304', '\u022b')]), ('\xf8', &[('\u0301',
+        '\u01ff')]), ('\xfc', &[('\u0300', '\u01dc'), ('\u0301', '\u01d8'), ('\u0304', '\u01d6'),
+        ('\u030c', '\u01da')]), ('\u0102', &[('\u0300', '\u1eb0'), ('\u0301', '\u1eae'), ('\u0303',
+        '\u1eb4'), ('\u0309', '\u1eb2')]), ('\u0103', &[('\u0300', '\u1eb1'), ('\u0301', '\u1eaf'),
+        ('\u0303', '\u1eb5'), ('\u0309', '\u1eb3')]), ('\u0112', &[('\u0300', '\u1e14'), ('\u0301',
+        '\u1e16')]), ('\u0113', &[('\u0300', '\u1e15'), ('\u0301', '\u1e17')]), ('\u014c',
+        &[('\u0300', '\u1e50'), ('\u0301', '\u1e52')]), ('\u014d', &[('\u0300', '\u1e51'),
+        ('\u0301', '\u1e53')]), ('\u015a', &[('\u0307', '\u1e64')]), ('\u015b', &[('\u0307',
+        '\u1e65')]), ('\u0160', &[('\u0307', '\u1e66')]), ('\u0161', &[('\u0307', '\u1e67')]),
+        ('\u0168', &[('\u0301', '\u1e78')]), ('\u0169', &[('\u0301', '\u1e79')]), ('\u016a',
+        &[('\u0308', '\u1e7a')]), ('\u016b', &[('\u0308', '\u1e7b')]), ('\u017f', &[('\u0307',
+        '\u1e9b')]), ('\u01a0', &[('\u0300', '\u1edc'), ('\u0301', '\u1eda'), ('\u0303', '\u1ee0'),
+        ('\u0309', '\u1ede'), ('\u0323', '\u1ee2')]), ('\u01a1', &[('\u0300', '\u1edd'), ('\u0301',
+        '\u1edb'), ('\u0303', '\u1ee1'), ('\u0309', '\u1edf'), ('\u0323', '\u1ee3')]), ('\u01af',
+        &[('\u0300', '\u1eea'), ('\u0301', '\u1ee8'), ('\u0303', '\u1eee'), ('\u0309', '\u1eec'),
+        ('\u0323', '\u1ef0')]), ('\u01b0', &[('\u0300', '\u1eeb'), ('\u0301', '\u1ee9'), ('\u0303',
+        '\u1eef'), ('\u0309', '\u1eed'), ('\u0323', '\u1ef1')]), ('\u01b7', &[('\u030c',
+        '\u01ee')]), ('\u01ea', &[('\u0304', '\u01ec')]), ('\u01eb', &[('\u0304', '\u01ed')]),
+        ('\u0226', &[('\u0304', '\u01e0')]), ('\u0227', &[('\u0304', '\u01e1')]), ('\u0228',
+        &[('\u0306', '\u1e1c')]), ('\u0229', &[('\u0306', '\u1e1d')]), ('\u022e', &[('\u0304',
+        '\u0230')]), ('\u022f', &[('\u0304', '\u0231')]), ('\u0292', &[('\u030c', '\u01ef')]),
+        ('\u0391', &[('\u0300', '\u1fba'), ('\u0301', '\u0386'), ('\u0304', '\u1fb9'), ('\u0306',
+        '\u1fb8'), ('\u0313', '\u1f08'), ('\u0314', '\u1f09'), ('\u0345', '\u1fbc')]), ('\u0395',
+        &[('\u0300', '\u1fc8'), ('\u0301', '\u0388'), ('\u0313', '\u1f18'), ('\u0314', '\u1f19')]),
+        ('\u0397', &[('\u0300', '\u1fca'), ('\u0301', '\u0389'), ('\u0313', '\u1f28'), ('\u0314',
+        '\u1f29'), ('\u0345', '\u1fcc')]), ('\u0399', &[('\u0300', '\u1fda'), ('\u0301', '\u038a'),
+        ('\u0304', '\u1fd9'), ('\u0306', '\u1fd8'), ('\u0308', '\u03aa'), ('\u0313', '\u1f38'),
+        ('\u0314', '\u1f39')]), ('\u039f', &[('\u0300', '\u1ff8'), ('\u0301', '\u038c'), ('\u0313',
+        '\u1f48'), ('\u0314', '\u1f49')]), ('\u03a1', &[('\u0314', '\u1fec')]), ('\u03a5',
+        &[('\u0300', '\u1fea'), ('\u0301', '\u038e'), ('\u0304', '\u1fe9'), ('\u0306', '\u1fe8'),
+        ('\u0308', '\u03ab'), ('\u0314', '\u1f59')]), ('\u03a9', &[('\u0300', '\u1ffa'), ('\u0301',
+        '\u038f'), ('\u0313', '\u1f68'), ('\u0314', '\u1f69'), ('\u0345', '\u1ffc')]), ('\u03ac',
+        &[('\u0345', '\u1fb4')]), ('\u03ae', &[('\u0345', '\u1fc4')]), ('\u03b1', &[('\u0300',
+        '\u1f70'), ('\u0301', '\u03ac'), ('\u0304', '\u1fb1'), ('\u0306', '\u1fb0'), ('\u0313',
+        '\u1f00'), ('\u0314', '\u1f01'), ('\u0342', '\u1fb6'), ('\u0345', '\u1fb3')]), ('\u03b5',
+        &[('\u0300', '\u1f72'), ('\u0301', '\u03ad'), ('\u0313', '\u1f10'), ('\u0314', '\u1f11')]),
+        ('\u03b7', &[('\u0300', '\u1f74'), ('\u0301', '\u03ae'), ('\u0313', '\u1f20'), ('\u0314',
+        '\u1f21'), ('\u0342', '\u1fc6'), ('\u0345', '\u1fc3')]), ('\u03b9', &[('\u0300', '\u1f76'),
+        ('\u0301', '\u03af'), ('\u0304', '\u1fd1'), ('\u0306', '\u1fd0'), ('\u0308', '\u03ca'),
+        ('\u0313', '\u1f30'), ('\u0314', '\u1f31'), ('\u0342', '\u1fd6')]), ('\u03bf', &[('\u0300',
+        '\u1f78'), ('\u0301', '\u03cc'), ('\u0313', '\u1f40'), ('\u0314', '\u1f41')]), ('\u03c1',
+        &[('\u0313', '\u1fe4'), ('\u0314', '\u1fe5')]), ('\u03c5', &[('\u0300', '\u1f7a'),
+        ('\u0301', '\u03cd'), ('\u0304', '\u1fe1'), ('\u0306', '\u1fe0'), ('\u0308', '\u03cb'),
+        ('\u0313', '\u1f50'), ('\u0314', '\u1f51'), ('\u0342', '\u1fe6')]), ('\u03c9', &[('\u0300',
+        '\u1f7c'), ('\u0301', '\u03ce'), ('\u0313', '\u1f60'), ('\u0314', '\u1f61'), ('\u0342',
+        '\u1ff6'), ('\u0345', '\u1ff3')]), ('\u03ca', &[('\u0300', '\u1fd2'), ('\u0301', '\u0390'),
+        ('\u0342', '\u1fd7')]), ('\u03cb', &[('\u0300', '\u1fe2'), ('\u0301', '\u03b0'), ('\u0342',
+        '\u1fe7')]), ('\u03ce', &[('\u0345', '\u1ff4')]), ('\u03d2', &[('\u0301', '\u03d3'),
+        ('\u0308', '\u03d4')]), ('\u0406', &[('\u0308', '\u0407')]), ('\u0410', &[('\u0306',
+        '\u04d0'), ('\u0308', '\u04d2')]), ('\u0413', &[('\u0301', '\u0403')]), ('\u0415',
+        &[('\u0300', '\u0400'), ('\u0306', '\u04d6'), ('\u0308', '\u0401')]), ('\u0416',
+        &[('\u0306', '\u04c1'), ('\u0308', '\u04dc')]), ('\u0417', &[('\u0308', '\u04de')]),
+        ('\u0418', &[('\u0300', '\u040d'), ('\u0304', '\u04e2'), ('\u0306', '\u0419'), ('\u0308',
+        '\u04e4')]), ('\u041a', &[('\u0301', '\u040c')]), ('\u041e', &[('\u0308', '\u04e6')]),
+        ('\u0423', &[('\u0304', '\u04ee'), ('\u0306', '\u040e'), ('\u0308', '\u04f0'), ('\u030b',
+        '\u04f2')]), ('\u0427', &[('\u0308', '\u04f4')]), ('\u042b', &[('\u0308', '\u04f8')]),
+        ('\u042d', &[('\u0308', '\u04ec')]), ('\u0430', &[('\u0306', '\u04d1'), ('\u0308',
+        '\u04d3')]), ('\u0433', &[('\u0301', '\u0453')]), ('\u0435', &[('\u0300', '\u0450'),
+        ('\u0306', '\u04d7'), ('\u0308', '\u0451')]), ('\u0436', &[('\u0306', '\u04c2'), ('\u0308',
+        '\u04dd')]), ('\u0437', &[('\u0308', '\u04df')]), ('\u0438', &[('\u0300', '\u045d'),
+        ('\u0304', '\u04e3'), ('\u0306', '\u0439'), ('\u0308', '\u04e5')]), ('\u043a', &[('\u0301',
+        '\u045c')]), ('\u043e', &[('\u0308', '\u04e7')]), ('\u0443', &[('\u0304', '\u04ef'),
+        ('\u0306', '\u045e'), ('\u0308', '\u04f1'), ('\u030b', '\u04f3')]), ('\u0447', &[('\u0308',
+        '\u04f5')]), ('\u044b', &[('\u0308', '\u04f9')]), ('\u044d', &[('\u0308', '\u04ed')]),
+        ('\u0456', &[('\u0308', '\u0457')]), ('\u0474', &[('\u030f', '\u0476')]), ('\u0475',
+        &[('\u030f', '\u0477')]), ('\u04d8', &[('\u0308', '\u04da')]), ('\u04d9', &[('\u0308',
+        '\u04db')]), ('\u04e8', &[('\u0308', '\u04ea')]), ('\u04e9', &[('\u0308', '\u04eb')]),
+        ('\u0627', &[('\u0653', '\u0622'), ('\u0654', '\u0623'), ('\u0655', '\u0625')]), ('\u0648',
+        &[('\u0654', '\u0624')]), ('\u064a', &[('\u0654', '\u0626')]), ('\u06c1', &[('\u0654',
+        '\u06c2')]), ('\u06d2', &[('\u0654', '\u06d3')]), ('\u06d5', &[('\u0654', '\u06c0')]),
+        ('\u0928', &[('\u093c', '\u0929')]), ('\u0930', &[('\u093c', '\u0931')]), ('\u0933',
+        &[('\u093c', '\u0934')]), ('\u09c7', &[('\u09be', '\u09cb'), ('\u09d7', '\u09cc')]),
+        ('\u0b47', &[('\u0b3e', '\u0b4b'), ('\u0b56', '\u0b48'), ('\u0b57', '\u0b4c')]), ('\u0b92',
+        &[('\u0bd7', '\u0b94')]), ('\u0bc6', &[('\u0bbe', '\u0bca'), ('\u0bd7', '\u0bcc')]),
+        ('\u0bc7', &[('\u0bbe', '\u0bcb')]), ('\u0c46', &[('\u0c56', '\u0c48')]), ('\u0cbf',
+        &[('\u0cd5', '\u0cc0')]), ('\u0cc6', &[('\u0cc2', '\u0cca'), ('\u0cd5', '\u0cc7'),
+        ('\u0cd6', '\u0cc8')]), ('\u0cca', &[('\u0cd5', '\u0ccb')]), ('\u0d46', &[('\u0d3e',
+        '\u0d4a'), ('\u0d57', '\u0d4c')]), ('\u0d47', &[('\u0d3e', '\u0d4b')]), ('\u0dd9',
+        &[('\u0dca', '\u0dda'), ('\u0dcf', '\u0ddc'), ('\u0ddf', '\u0dde')]), ('\u0ddc',
+        &[('\u0dca', '\u0ddd')]), ('\u1025', &[('\u102e', '\u1026')]), ('\u1b05', &[('\u1b35',
+        '\u1b06')]), ('\u1b07', &[('\u1b35', '\u1b08')]), ('\u1b09', &[('\u1b35', '\u1b0a')]),
+        ('\u1b0b', &[('\u1b35', '\u1b0c')]), ('\u1b0d', &[('\u1b35', '\u1b0e')]), ('\u1b11',
+        &[('\u1b35', '\u1b12')]), ('\u1b3a', &[('\u1b35', '\u1b3b')]), ('\u1b3c', &[('\u1b35',
+        '\u1b3d')]), ('\u1b3e', &[('\u1b35', '\u1b40')]), ('\u1b3f', &[('\u1b35', '\u1b41')]),
+        ('\u1b42', &[('\u1b35', '\u1b43')]), ('\u1e36', &[('\u0304', '\u1e38')]), ('\u1e37',
+        &[('\u0304', '\u1e39')]), ('\u1e5a', &[('\u0304', '\u1e5c')]), ('\u1e5b', &[('\u0304',
+        '\u1e5d')]), ('\u1e62', &[('\u0307', '\u1e68')]), ('\u1e63', &[('\u0307', '\u1e69')]),
+        ('\u1ea0', &[('\u0302', '\u1eac'), ('\u0306', '\u1eb6')]), ('\u1ea1', &[('\u0302',
+        '\u1ead'), ('\u0306', '\u1eb7')]), ('\u1eb8', &[('\u0302', '\u1ec6')]), ('\u1eb9',
+        &[('\u0302', '\u1ec7')]), ('\u1ecc', &[('\u0302', '\u1ed8')]), ('\u1ecd', &[('\u0302',
+        '\u1ed9')]), ('\u1f00', &[('\u0300', '\u1f02'), ('\u0301', '\u1f04'), ('\u0342', '\u1f06'),
+        ('\u0345', '\u1f80')]), ('\u1f01', &[('\u0300', '\u1f03'), ('\u0301', '\u1f05'), ('\u0342',
+        '\u1f07'), ('\u0345', '\u1f81')]), ('\u1f02', &[('\u0345', '\u1f82')]), ('\u1f03',
+        &[('\u0345', '\u1f83')]), ('\u1f04', &[('\u0345', '\u1f84')]), ('\u1f05', &[('\u0345',
+        '\u1f85')]), ('\u1f06', &[('\u0345', '\u1f86')]), ('\u1f07', &[('\u0345', '\u1f87')]),
+        ('\u1f08', &[('\u0300', '\u1f0a'), ('\u0301', '\u1f0c'), ('\u0342', '\u1f0e'), ('\u0345',
+        '\u1f88')]), ('\u1f09', &[('\u0300', '\u1f0b'), ('\u0301', '\u1f0d'), ('\u0342', '\u1f0f'),
+        ('\u0345', '\u1f89')]), ('\u1f0a', &[('\u0345', '\u1f8a')]), ('\u1f0b', &[('\u0345',
+        '\u1f8b')]), ('\u1f0c', &[('\u0345', '\u1f8c')]), ('\u1f0d', &[('\u0345', '\u1f8d')]),
+        ('\u1f0e', &[('\u0345', '\u1f8e')]), ('\u1f0f', &[('\u0345', '\u1f8f')]), ('\u1f10',
+        &[('\u0300', '\u1f12'), ('\u0301', '\u1f14')]), ('\u1f11', &[('\u0300', '\u1f13'),
+        ('\u0301', '\u1f15')]), ('\u1f18', &[('\u0300', '\u1f1a'), ('\u0301', '\u1f1c')]),
+        ('\u1f19', &[('\u0300', '\u1f1b'), ('\u0301', '\u1f1d')]), ('\u1f20', &[('\u0300',
+        '\u1f22'), ('\u0301', '\u1f24'), ('\u0342', '\u1f26'), ('\u0345', '\u1f90')]), ('\u1f21',
+        &[('\u0300', '\u1f23'), ('\u0301', '\u1f25'), ('\u0342', '\u1f27'), ('\u0345', '\u1f91')]),
+        ('\u1f22', &[('\u0345', '\u1f92')]), ('\u1f23', &[('\u0345', '\u1f93')]), ('\u1f24',
+        &[('\u0345', '\u1f94')]), ('\u1f25', &[('\u0345', '\u1f95')]), ('\u1f26', &[('\u0345',
+        '\u1f96')]), ('\u1f27', &[('\u0345', '\u1f97')]), ('\u1f28', &[('\u0300', '\u1f2a'),
+        ('\u0301', '\u1f2c'), ('\u0342', '\u1f2e'), ('\u0345', '\u1f98')]), ('\u1f29', &[('\u0300',
+        '\u1f2b'), ('\u0301', '\u1f2d'), ('\u0342', '\u1f2f'), ('\u0345', '\u1f99')]), ('\u1f2a',
+        &[('\u0345', '\u1f9a')]), ('\u1f2b', &[('\u0345', '\u1f9b')]), ('\u1f2c', &[('\u0345',
+        '\u1f9c')]), ('\u1f2d', &[('\u0345', '\u1f9d')]), ('\u1f2e', &[('\u0345', '\u1f9e')]),
+        ('\u1f2f', &[('\u0345', '\u1f9f')]), ('\u1f30', &[('\u0300', '\u1f32'), ('\u0301',
+        '\u1f34'), ('\u0342', '\u1f36')]), ('\u1f31', &[('\u0300', '\u1f33'), ('\u0301', '\u1f35'),
+        ('\u0342', '\u1f37')]), ('\u1f38', &[('\u0300', '\u1f3a'), ('\u0301', '\u1f3c'), ('\u0342',
+        '\u1f3e')]), ('\u1f39', &[('\u0300', '\u1f3b'), ('\u0301', '\u1f3d'), ('\u0342',
+        '\u1f3f')]), ('\u1f40', &[('\u0300', '\u1f42'), ('\u0301', '\u1f44')]), ('\u1f41',
+        &[('\u0300', '\u1f43'), ('\u0301', '\u1f45')]), ('\u1f48', &[('\u0300', '\u1f4a'),
+        ('\u0301', '\u1f4c')]), ('\u1f49', &[('\u0300', '\u1f4b'), ('\u0301', '\u1f4d')]),
+        ('\u1f50', &[('\u0300', '\u1f52'), ('\u0301', '\u1f54'), ('\u0342', '\u1f56')]), ('\u1f51',
+        &[('\u0300', '\u1f53'), ('\u0301', '\u1f55'), ('\u0342', '\u1f57')]), ('\u1f59',
+        &[('\u0300', '\u1f5b'), ('\u0301', '\u1f5d'), ('\u0342', '\u1f5f')]), ('\u1f60',
+        &[('\u0300', '\u1f62'), ('\u0301', '\u1f64'), ('\u0342', '\u1f66'), ('\u0345', '\u1fa0')]),
+        ('\u1f61', &[('\u0300', '\u1f63'), ('\u0301', '\u1f65'), ('\u0342', '\u1f67'), ('\u0345',
+        '\u1fa1')]), ('\u1f62', &[('\u0345', '\u1fa2')]), ('\u1f63', &[('\u0345', '\u1fa3')]),
+        ('\u1f64', &[('\u0345', '\u1fa4')]), ('\u1f65', &[('\u0345', '\u1fa5')]), ('\u1f66',
+        &[('\u0345', '\u1fa6')]), ('\u1f67', &[('\u0345', '\u1fa7')]), ('\u1f68', &[('\u0300',
+        '\u1f6a'), ('\u0301', '\u1f6c'), ('\u0342', '\u1f6e'), ('\u0345', '\u1fa8')]), ('\u1f69',
+        &[('\u0300', '\u1f6b'), ('\u0301', '\u1f6d'), ('\u0342', '\u1f6f'), ('\u0345', '\u1fa9')]),
+        ('\u1f6a', &[('\u0345', '\u1faa')]), ('\u1f6b', &[('\u0345', '\u1fab')]), ('\u1f6c',
+        &[('\u0345', '\u1fac')]), ('\u1f6d', &[('\u0345', '\u1fad')]), ('\u1f6e', &[('\u0345',
+        '\u1fae')]), ('\u1f6f', &[('\u0345', '\u1faf')]), ('\u1f70', &[('\u0345', '\u1fb2')]),
+        ('\u1f74', &[('\u0345', '\u1fc2')]), ('\u1f7c', &[('\u0345', '\u1ff2')]), ('\u1fb6',
+        &[('\u0345', '\u1fb7')]), ('\u1fbf', &[('\u0300', '\u1fcd'), ('\u0301', '\u1fce'),
+        ('\u0342', '\u1fcf')]), ('\u1fc6', &[('\u0345', '\u1fc7')]), ('\u1ff6', &[('\u0345',
+        '\u1ff7')]), ('\u1ffe', &[('\u0300', '\u1fdd'), ('\u0301', '\u1fde'), ('\u0342',
+        '\u1fdf')]), ('\u2190', &[('\u0338', '\u219a')]), ('\u2192', &[('\u0338', '\u219b')]),
+        ('\u2194', &[('\u0338', '\u21ae')]), ('\u21d0', &[('\u0338', '\u21cd')]), ('\u21d2',
+        &[('\u0338', '\u21cf')]), ('\u21d4', &[('\u0338', '\u21ce')]), ('\u2203', &[('\u0338',
+        '\u2204')]), ('\u2208', &[('\u0338', '\u2209')]), ('\u220b', &[('\u0338', '\u220c')]),
+        ('\u2223', &[('\u0338', '\u2224')]), ('\u2225', &[('\u0338', '\u2226')]), ('\u223c',
+        &[('\u0338', '\u2241')]), ('\u2243', &[('\u0338', '\u2244')]), ('\u2245', &[('\u0338',
+        '\u2247')]), ('\u2248', &[('\u0338', '\u2249')]), ('\u224d', &[('\u0338', '\u226d')]),
+        ('\u2261', &[('\u0338', '\u2262')]), ('\u2264', &[('\u0338', '\u2270')]), ('\u2265',
+        &[('\u0338', '\u2271')]), ('\u2272', &[('\u0338', '\u2274')]), ('\u2273', &[('\u0338',
+        '\u2275')]), ('\u2276', &[('\u0338', '\u2278')]), ('\u2277', &[('\u0338', '\u2279')]),
+        ('\u227a', &[('\u0338', '\u2280')]), ('\u227b', &[('\u0338', '\u2281')]), ('\u227c',
+        &[('\u0338', '\u22e0')]), ('\u227d', &[('\u0338', '\u22e1')]), ('\u2282', &[('\u0338',
+        '\u2284')]), ('\u2283', &[('\u0338', '\u2285')]), ('\u2286', &[('\u0338', '\u2288')]),
+        ('\u2287', &[('\u0338', '\u2289')]), ('\u2291', &[('\u0338', '\u22e2')]), ('\u2292',
+        &[('\u0338', '\u22e3')]), ('\u22a2', &[('\u0338', '\u22ac')]), ('\u22a8', &[('\u0338',
+        '\u22ad')]), ('\u22a9', &[('\u0338', '\u22ae')]), ('\u22ab', &[('\u0338', '\u22af')]),
+        ('\u22b2', &[('\u0338', '\u22ea')]), ('\u22b3', &[('\u0338', '\u22eb')]), ('\u22b4',
+        &[('\u0338', '\u22ec')]), ('\u22b5', &[('\u0338', '\u22ed')]), ('\u3046', &[('\u3099',
+        '\u3094')]), ('\u304b', &[('\u3099', '\u304c')]), ('\u304d', &[('\u3099', '\u304e')]),
+        ('\u304f', &[('\u3099', '\u3050')]), ('\u3051', &[('\u3099', '\u3052')]), ('\u3053',
+        &[('\u3099', '\u3054')]), ('\u3055', &[('\u3099', '\u3056')]), ('\u3057', &[('\u3099',
+        '\u3058')]), ('\u3059', &[('\u3099', '\u305a')]), ('\u305b', &[('\u3099', '\u305c')]),
+        ('\u305d', &[('\u3099', '\u305e')]), ('\u305f', &[('\u3099', '\u3060')]), ('\u3061',
+        &[('\u3099', '\u3062')]), ('\u3064', &[('\u3099', '\u3065')]), ('\u3066', &[('\u3099',
+        '\u3067')]), ('\u3068', &[('\u3099', '\u3069')]), ('\u306f', &[('\u3099', '\u3070'),
+        ('\u309a', '\u3071')]), ('\u3072', &[('\u3099', '\u3073'), ('\u309a', '\u3074')]),
+        ('\u3075', &[('\u3099', '\u3076'), ('\u309a', '\u3077')]), ('\u3078', &[('\u3099',
+        '\u3079'), ('\u309a', '\u307a')]), ('\u307b', &[('\u3099', '\u307c'), ('\u309a',
+        '\u307d')]), ('\u309d', &[('\u3099', '\u309e')]), ('\u30a6', &[('\u3099', '\u30f4')]),
+        ('\u30ab', &[('\u3099', '\u30ac')]), ('\u30ad', &[('\u3099', '\u30ae')]), ('\u30af',
+        &[('\u3099', '\u30b0')]), ('\u30b1', &[('\u3099', '\u30b2')]), ('\u30b3', &[('\u3099',
+        '\u30b4')]), ('\u30b5', &[('\u3099', '\u30b6')]), ('\u30b7', &[('\u3099', '\u30b8')]),
+        ('\u30b9', &[('\u3099', '\u30ba')]), ('\u30bb', &[('\u3099', '\u30bc')]), ('\u30bd',
+        &[('\u3099', '\u30be')]), ('\u30bf', &[('\u3099', '\u30c0')]), ('\u30c1', &[('\u3099',
+        '\u30c2')]), ('\u30c4', &[('\u3099', '\u30c5')]), ('\u30c6', &[('\u3099', '\u30c7')]),
+        ('\u30c8', &[('\u3099', '\u30c9')]), ('\u30cf', &[('\u3099', '\u30d0'), ('\u309a',
+        '\u30d1')]), ('\u30d2', &[('\u3099', '\u30d3'), ('\u309a', '\u30d4')]), ('\u30d5',
+        &[('\u3099', '\u30d6'), ('\u309a', '\u30d7')]), ('\u30d8', &[('\u3099', '\u30d9'),
+        ('\u309a', '\u30da')]), ('\u30db', &[('\u3099', '\u30dc'), ('\u309a', '\u30dd')]),
+        ('\u30ef', &[('\u3099', '\u30f7')]), ('\u30f0', &[('\u3099', '\u30f8')]), ('\u30f1',
+        &[('\u3099', '\u30f9')]), ('\u30f2', &[('\u3099', '\u30fa')]), ('\u30fd', &[('\u3099',
+        '\u30fe')]), ('\U00011099', &[('\U000110ba', '\U0001109a')]), ('\U0001109b',
+        &[('\U000110ba', '\U0001109c')]), ('\U000110a5', &[('\U000110ba', '\U000110ab')]),
+        ('\U00011131', &[('\U00011127', '\U0001112e')]), ('\U00011132', &[('\U00011127',
+        '\U0001112f')]), ('\U00011347', &[('\U0001133e', '\U0001134b'), ('\U00011357',
+        '\U0001134c')]), ('\U000114b9', &[('\U000114b0', '\U000114bc'), ('\U000114ba',
+        '\U000114bb'), ('\U000114bd', '\U000114be')]), ('\U000115b8', &[('\U000115af',
+        '\U000115ba')]), ('\U000115b9', &[('\U000115af', '\U000115bb')])
+    ];
+
 
     fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
         use core::option::{Some, None};