diff options
Diffstat (limited to 'src/libunicode')
| -rw-r--r-- | src/libunicode/normalize.rs | 18 | ||||
| -rw-r--r-- | src/libunicode/tables.rs | 205 |
2 files changed, 112 insertions, 111 deletions
diff --git a/src/libunicode/normalize.rs b/src/libunicode/normalize.rs index a60e95c3827..76a9476d1fc 100644 --- a/src/libunicode/normalize.rs +++ b/src/libunicode/normalize.rs @@ -100,15 +100,15 @@ pub fn compose(a: char, b: char) -> Option<char> { } // Constants from Unicode 6.3.0 Section 3.12 Conjoining Jamo Behavior -static S_BASE: u32 = 0xAC00; -static L_BASE: u32 = 0x1100; -static V_BASE: u32 = 0x1161; -static T_BASE: u32 = 0x11A7; -static L_COUNT: u32 = 19; -static V_COUNT: u32 = 21; -static T_COUNT: u32 = 28; -static N_COUNT: u32 = (V_COUNT * T_COUNT); -static S_COUNT: u32 = (L_COUNT * N_COUNT); +const S_BASE: u32 = 0xAC00; +const L_BASE: u32 = 0x1100; +const V_BASE: u32 = 0x1161; +const T_BASE: u32 = 0x11A7; +const L_COUNT: u32 = 19; +const V_COUNT: u32 = 21; +const T_COUNT: u32 = 28; +const N_COUNT: u32 = (V_COUNT * T_COUNT); +const S_COUNT: u32 = (L_COUNT * N_COUNT); // Decompose a precomposed Hangul syllable #[inline(always)] diff --git a/src/libunicode/tables.rs b/src/libunicode/tables.rs index 135b267262c..e359883295f 100644 --- a/src/libunicode/tables.rs +++ b/src/libunicode/tables.rs @@ -3636,108 +3636,109 @@ pub mod property { } pub mod regex { - pub static UNICODE_CLASSES: &'static [(&'static str, &'static [(char, char)])] = &[ - ("Alphabetic", super::derived_property::Alphabetic_table), ("Arabic", - super::script::Arabic_table), ("Armenian", super::script::Armenian_table), ("Avestan", - super::script::Avestan_table), ("Balinese", super::script::Balinese_table), ("Bamum", - super::script::Bamum_table), ("Bassa_Vah", super::script::Bassa_Vah_table), ("Batak", - super::script::Batak_table), ("Bengali", super::script::Bengali_table), ("Bopomofo", - super::script::Bopomofo_table), ("Brahmi", super::script::Brahmi_table), ("Braille", - super::script::Braille_table), ("Buginese", super::script::Buginese_table), ("Buhid", - super::script::Buhid_table), ("C", super::general_category::C_table), - ("Canadian_Aboriginal", super::script::Canadian_Aboriginal_table), ("Carian", - super::script::Carian_table), ("Caucasian_Albanian", - super::script::Caucasian_Albanian_table), ("Cc", super::general_category::Cc_table), ("Cf", - super::general_category::Cf_table), ("Chakma", super::script::Chakma_table), ("Cham", - super::script::Cham_table), ("Cherokee", super::script::Cherokee_table), ("Cn", - super::general_category::Cn_table), ("Co", super::general_category::Co_table), ("Common", - super::script::Common_table), ("Coptic", super::script::Coptic_table), ("Cuneiform", - super::script::Cuneiform_table), ("Cypriot", super::script::Cypriot_table), ("Cyrillic", - super::script::Cyrillic_table), ("Default_Ignorable_Code_Point", - super::derived_property::Default_Ignorable_Code_Point_table), ("Deseret", - super::script::Deseret_table), ("Devanagari", super::script::Devanagari_table), ("Duployan", - super::script::Duployan_table), ("Egyptian_Hieroglyphs", - super::script::Egyptian_Hieroglyphs_table), ("Elbasan", super::script::Elbasan_table), - ("Ethiopic", super::script::Ethiopic_table), ("Georgian", super::script::Georgian_table), - ("Glagolitic", super::script::Glagolitic_table), ("Gothic", super::script::Gothic_table), - ("Grantha", super::script::Grantha_table), ("Greek", super::script::Greek_table), - ("Gujarati", super::script::Gujarati_table), ("Gurmukhi", super::script::Gurmukhi_table), - ("Han", super::script::Han_table), ("Hangul", super::script::Hangul_table), ("Hanunoo", - super::script::Hanunoo_table), ("Hebrew", super::script::Hebrew_table), ("Hiragana", - super::script::Hiragana_table), ("Imperial_Aramaic", super::script::Imperial_Aramaic_table), - ("Inherited", super::script::Inherited_table), ("Inscriptional_Pahlavi", - super::script::Inscriptional_Pahlavi_table), ("Inscriptional_Parthian", - super::script::Inscriptional_Parthian_table), ("Javanese", super::script::Javanese_table), - ("Join_Control", super::property::Join_Control_table), ("Kaithi", - super::script::Kaithi_table), ("Kannada", super::script::Kannada_table), ("Katakana", - super::script::Katakana_table), ("Kayah_Li", super::script::Kayah_Li_table), ("Kharoshthi", - super::script::Kharoshthi_table), ("Khmer", super::script::Khmer_table), ("Khojki", - super::script::Khojki_table), ("Khudawadi", super::script::Khudawadi_table), ("L", - super::general_category::L_table), ("LC", super::general_category::LC_table), ("Lao", - super::script::Lao_table), ("Latin", super::script::Latin_table), ("Lepcha", - super::script::Lepcha_table), ("Limbu", super::script::Limbu_table), ("Linear_A", - super::script::Linear_A_table), ("Linear_B", super::script::Linear_B_table), ("Lisu", - super::script::Lisu_table), ("Ll", super::general_category::Ll_table), ("Lm", - super::general_category::Lm_table), ("Lo", super::general_category::Lo_table), ("Lowercase", - super::derived_property::Lowercase_table), ("Lt", super::general_category::Lt_table), ("Lu", - super::general_category::Lu_table), ("Lycian", super::script::Lycian_table), ("Lydian", - super::script::Lydian_table), ("M", super::general_category::M_table), ("Mahajani", - super::script::Mahajani_table), ("Malayalam", super::script::Malayalam_table), ("Mandaic", - super::script::Mandaic_table), ("Manichaean", super::script::Manichaean_table), ("Mc", - super::general_category::Mc_table), ("Me", super::general_category::Me_table), - ("Meetei_Mayek", super::script::Meetei_Mayek_table), ("Mende_Kikakui", - super::script::Mende_Kikakui_table), ("Meroitic_Cursive", - super::script::Meroitic_Cursive_table), ("Meroitic_Hieroglyphs", - super::script::Meroitic_Hieroglyphs_table), ("Miao", super::script::Miao_table), ("Mn", - super::general_category::Mn_table), ("Modi", super::script::Modi_table), ("Mongolian", - super::script::Mongolian_table), ("Mro", super::script::Mro_table), ("Myanmar", - super::script::Myanmar_table), ("N", super::general_category::N_table), ("Nabataean", - super::script::Nabataean_table), ("Nd", super::general_category::Nd_table), ("New_Tai_Lue", - super::script::New_Tai_Lue_table), ("Nko", super::script::Nko_table), ("Nl", - super::general_category::Nl_table), ("No", super::general_category::No_table), - ("Noncharacter_Code_Point", super::property::Noncharacter_Code_Point_table), ("Ogham", - super::script::Ogham_table), ("Ol_Chiki", super::script::Ol_Chiki_table), ("Old_Italic", - super::script::Old_Italic_table), ("Old_North_Arabian", - super::script::Old_North_Arabian_table), ("Old_Permic", super::script::Old_Permic_table), - ("Old_Persian", super::script::Old_Persian_table), ("Old_South_Arabian", - super::script::Old_South_Arabian_table), ("Old_Turkic", super::script::Old_Turkic_table), - ("Oriya", super::script::Oriya_table), ("Osmanya", super::script::Osmanya_table), ("P", - super::general_category::P_table), ("Pahawh_Hmong", super::script::Pahawh_Hmong_table), - ("Palmyrene", super::script::Palmyrene_table), ("Pau_Cin_Hau", - super::script::Pau_Cin_Hau_table), ("Pc", super::general_category::Pc_table), ("Pd", - super::general_category::Pd_table), ("Pe", super::general_category::Pe_table), ("Pf", - super::general_category::Pf_table), ("Phags_Pa", super::script::Phags_Pa_table), - ("Phoenician", super::script::Phoenician_table), ("Pi", super::general_category::Pi_table), - ("Po", super::general_category::Po_table), ("Ps", super::general_category::Ps_table), - ("Psalter_Pahlavi", super::script::Psalter_Pahlavi_table), ("Rejang", - super::script::Rejang_table), ("Runic", super::script::Runic_table), ("S", - super::general_category::S_table), ("Samaritan", super::script::Samaritan_table), - ("Saurashtra", super::script::Saurashtra_table), ("Sc", super::general_category::Sc_table), - ("Sharada", super::script::Sharada_table), ("Shavian", super::script::Shavian_table), - ("Siddham", super::script::Siddham_table), ("Sinhala", super::script::Sinhala_table), ("Sk", - super::general_category::Sk_table), ("Sm", super::general_category::Sm_table), ("So", - super::general_category::So_table), ("Sora_Sompeng", super::script::Sora_Sompeng_table), - ("Sundanese", super::script::Sundanese_table), ("Syloti_Nagri", - super::script::Syloti_Nagri_table), ("Syriac", super::script::Syriac_table), ("Tagalog", - super::script::Tagalog_table), ("Tagbanwa", super::script::Tagbanwa_table), ("Tai_Le", - super::script::Tai_Le_table), ("Tai_Tham", super::script::Tai_Tham_table), ("Tai_Viet", - super::script::Tai_Viet_table), ("Takri", super::script::Takri_table), ("Tamil", - super::script::Tamil_table), ("Telugu", super::script::Telugu_table), ("Thaana", - super::script::Thaana_table), ("Thai", super::script::Thai_table), ("Tibetan", - super::script::Tibetan_table), ("Tifinagh", super::script::Tifinagh_table), ("Tirhuta", - super::script::Tirhuta_table), ("Ugaritic", super::script::Ugaritic_table), ("Uppercase", - super::derived_property::Uppercase_table), ("Vai", super::script::Vai_table), - ("Warang_Citi", super::script::Warang_Citi_table), ("White_Space", - super::property::White_Space_table), ("XID_Continue", - super::derived_property::XID_Continue_table), ("XID_Start", - super::derived_property::XID_Start_table), ("Yi", super::script::Yi_table), ("Z", - super::general_category::Z_table), ("Zl", super::general_category::Zl_table), ("Zp", - super::general_category::Zp_table), ("Zs", super::general_category::Zs_table) - ]; - - pub static PERLD: &'static [(char, char)] = super::general_category::Nd_table; - - pub static PERLS: &'static [(char, char)] = super::property::White_Space_table; + pub static UNICODE_CLASSES: &'static [(&'static str, &'static &'static [(char, char)])] = &[ + ("Alphabetic", &super::derived_property::Alphabetic_table), ("Arabic", + &super::script::Arabic_table), ("Armenian", &super::script::Armenian_table), ("Avestan", + &super::script::Avestan_table), ("Balinese", &super::script::Balinese_table), ("Bamum", + &super::script::Bamum_table), ("Bassa_Vah", &super::script::Bassa_Vah_table), ("Batak", + &super::script::Batak_table), ("Bengali", &super::script::Bengali_table), ("Bopomofo", + &super::script::Bopomofo_table), ("Brahmi", &super::script::Brahmi_table), ("Braille", + &super::script::Braille_table), ("Buginese", &super::script::Buginese_table), ("Buhid", + &super::script::Buhid_table), ("C", &super::general_category::C_table), + ("Canadian_Aboriginal", &super::script::Canadian_Aboriginal_table), ("Carian", + &super::script::Carian_table), ("Caucasian_Albanian", + &super::script::Caucasian_Albanian_table), ("Cc", &super::general_category::Cc_table), + ("Cf", &super::general_category::Cf_table), ("Chakma", &super::script::Chakma_table), + ("Cham", &super::script::Cham_table), ("Cherokee", &super::script::Cherokee_table), ("Cn", + &super::general_category::Cn_table), ("Co", &super::general_category::Co_table), ("Common", + &super::script::Common_table), ("Coptic", &super::script::Coptic_table), ("Cuneiform", + &super::script::Cuneiform_table), ("Cypriot", &super::script::Cypriot_table), ("Cyrillic", + &super::script::Cyrillic_table), ("Default_Ignorable_Code_Point", + &super::derived_property::Default_Ignorable_Code_Point_table), ("Deseret", + &super::script::Deseret_table), ("Devanagari", &super::script::Devanagari_table), + ("Duployan", &super::script::Duployan_table), ("Egyptian_Hieroglyphs", + &super::script::Egyptian_Hieroglyphs_table), ("Elbasan", &super::script::Elbasan_table), + ("Ethiopic", &super::script::Ethiopic_table), ("Georgian", &super::script::Georgian_table), + ("Glagolitic", &super::script::Glagolitic_table), ("Gothic", &super::script::Gothic_table), + ("Grantha", &super::script::Grantha_table), ("Greek", &super::script::Greek_table), + ("Gujarati", &super::script::Gujarati_table), ("Gurmukhi", &super::script::Gurmukhi_table), + ("Han", &super::script::Han_table), ("Hangul", &super::script::Hangul_table), ("Hanunoo", + &super::script::Hanunoo_table), ("Hebrew", &super::script::Hebrew_table), ("Hiragana", + &super::script::Hiragana_table), ("Imperial_Aramaic", + &super::script::Imperial_Aramaic_table), ("Inherited", &super::script::Inherited_table), + ("Inscriptional_Pahlavi", &super::script::Inscriptional_Pahlavi_table), + ("Inscriptional_Parthian", &super::script::Inscriptional_Parthian_table), ("Javanese", + &super::script::Javanese_table), ("Join_Control", &super::property::Join_Control_table), + ("Kaithi", &super::script::Kaithi_table), ("Kannada", &super::script::Kannada_table), + ("Katakana", &super::script::Katakana_table), ("Kayah_Li", &super::script::Kayah_Li_table), + ("Kharoshthi", &super::script::Kharoshthi_table), ("Khmer", &super::script::Khmer_table), + ("Khojki", &super::script::Khojki_table), ("Khudawadi", &super::script::Khudawadi_table), + ("L", &super::general_category::L_table), ("LC", &super::general_category::LC_table), + ("Lao", &super::script::Lao_table), ("Latin", &super::script::Latin_table), ("Lepcha", + &super::script::Lepcha_table), ("Limbu", &super::script::Limbu_table), ("Linear_A", + &super::script::Linear_A_table), ("Linear_B", &super::script::Linear_B_table), ("Lisu", + &super::script::Lisu_table), ("Ll", &super::general_category::Ll_table), ("Lm", + &super::general_category::Lm_table), ("Lo", &super::general_category::Lo_table), + ("Lowercase", &super::derived_property::Lowercase_table), ("Lt", + &super::general_category::Lt_table), ("Lu", &super::general_category::Lu_table), ("Lycian", + &super::script::Lycian_table), ("Lydian", &super::script::Lydian_table), ("M", + &super::general_category::M_table), ("Mahajani", &super::script::Mahajani_table), + ("Malayalam", &super::script::Malayalam_table), ("Mandaic", &super::script::Mandaic_table), + ("Manichaean", &super::script::Manichaean_table), ("Mc", + &super::general_category::Mc_table), ("Me", &super::general_category::Me_table), + ("Meetei_Mayek", &super::script::Meetei_Mayek_table), ("Mende_Kikakui", + &super::script::Mende_Kikakui_table), ("Meroitic_Cursive", + &super::script::Meroitic_Cursive_table), ("Meroitic_Hieroglyphs", + &super::script::Meroitic_Hieroglyphs_table), ("Miao", &super::script::Miao_table), ("Mn", + &super::general_category::Mn_table), ("Modi", &super::script::Modi_table), ("Mongolian", + &super::script::Mongolian_table), ("Mro", &super::script::Mro_table), ("Myanmar", + &super::script::Myanmar_table), ("N", &super::general_category::N_table), ("Nabataean", + &super::script::Nabataean_table), ("Nd", &super::general_category::Nd_table), + ("New_Tai_Lue", &super::script::New_Tai_Lue_table), ("Nko", &super::script::Nko_table), + ("Nl", &super::general_category::Nl_table), ("No", &super::general_category::No_table), + ("Noncharacter_Code_Point", &super::property::Noncharacter_Code_Point_table), ("Ogham", + &super::script::Ogham_table), ("Ol_Chiki", &super::script::Ol_Chiki_table), ("Old_Italic", + &super::script::Old_Italic_table), ("Old_North_Arabian", + &super::script::Old_North_Arabian_table), ("Old_Permic", &super::script::Old_Permic_table), + ("Old_Persian", &super::script::Old_Persian_table), ("Old_South_Arabian", + &super::script::Old_South_Arabian_table), ("Old_Turkic", &super::script::Old_Turkic_table), + ("Oriya", &super::script::Oriya_table), ("Osmanya", &super::script::Osmanya_table), ("P", + &super::general_category::P_table), ("Pahawh_Hmong", &super::script::Pahawh_Hmong_table), + ("Palmyrene", &super::script::Palmyrene_table), ("Pau_Cin_Hau", + &super::script::Pau_Cin_Hau_table), ("Pc", &super::general_category::Pc_table), ("Pd", + &super::general_category::Pd_table), ("Pe", &super::general_category::Pe_table), ("Pf", + &super::general_category::Pf_table), ("Phags_Pa", &super::script::Phags_Pa_table), + ("Phoenician", &super::script::Phoenician_table), ("Pi", + &super::general_category::Pi_table), ("Po", &super::general_category::Po_table), ("Ps", + &super::general_category::Ps_table), ("Psalter_Pahlavi", + &super::script::Psalter_Pahlavi_table), ("Rejang", &super::script::Rejang_table), ("Runic", + &super::script::Runic_table), ("S", &super::general_category::S_table), ("Samaritan", + &super::script::Samaritan_table), ("Saurashtra", &super::script::Saurashtra_table), ("Sc", + &super::general_category::Sc_table), ("Sharada", &super::script::Sharada_table), ("Shavian", + &super::script::Shavian_table), ("Siddham", &super::script::Siddham_table), ("Sinhala", + &super::script::Sinhala_table), ("Sk", &super::general_category::Sk_table), ("Sm", + &super::general_category::Sm_table), ("So", &super::general_category::So_table), + ("Sora_Sompeng", &super::script::Sora_Sompeng_table), ("Sundanese", + &super::script::Sundanese_table), ("Syloti_Nagri", &super::script::Syloti_Nagri_table), + ("Syriac", &super::script::Syriac_table), ("Tagalog", &super::script::Tagalog_table), + ("Tagbanwa", &super::script::Tagbanwa_table), ("Tai_Le", &super::script::Tai_Le_table), + ("Tai_Tham", &super::script::Tai_Tham_table), ("Tai_Viet", &super::script::Tai_Viet_table), + ("Takri", &super::script::Takri_table), ("Tamil", &super::script::Tamil_table), ("Telugu", + &super::script::Telugu_table), ("Thaana", &super::script::Thaana_table), ("Thai", + &super::script::Thai_table), ("Tibetan", &super::script::Tibetan_table), ("Tifinagh", + &super::script::Tifinagh_table), ("Tirhuta", &super::script::Tirhuta_table), ("Ugaritic", + &super::script::Ugaritic_table), ("Uppercase", &super::derived_property::Uppercase_table), + ("Vai", &super::script::Vai_table), ("Warang_Citi", &super::script::Warang_Citi_table), + ("White_Space", &super::property::White_Space_table), ("XID_Continue", + &super::derived_property::XID_Continue_table), ("XID_Start", + &super::derived_property::XID_Start_table), ("Yi", &super::script::Yi_table), ("Z", + &super::general_category::Z_table), ("Zl", &super::general_category::Zl_table), ("Zp", + &super::general_category::Zp_table), ("Zs", &super::general_category::Zs_table) + ]; + + pub static PERLD: &'static &'static [(char, char)] = &super::general_category::Nd_table; + + pub static PERLS: &'static &'static [(char, char)] = &super::property::White_Space_table; pub static PERLW: &'static [(char, char)] = &[ ('\x30', '\x39'), ('\x41', '\x5a'), ('\x5f', '\x5f'), ('\x61', '\x7a'), ('\xaa', '\xaa'), |
