about summary refs log tree commit diff
path: root/src/libsyntax/parse/lexer
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2016-05-05 08:50:23 -0700
committerbors <bors@rust-lang.org>2016-05-05 08:50:23 -0700
commit413bafdabf5c5716fd823ac9a6232c48e2ba2902 (patch)
treed7880d95b69b72a0fe39659b46f7a6fc485b83d5 /src/libsyntax/parse/lexer
parent3f65afa6946ee4a6b1d619aee69e1f638edfb662 (diff)
parent496081c5c7bdf3afc2e444c166ee875b4f9041e5 (diff)
downloadrust-413bafdabf5c5716fd823ac9a6232c48e2ba2902.tar.gz
rust-413bafdabf5c5716fd823ac9a6232c48e2ba2902.zip
Auto merge of #33128 - xen0n:more-confusing-unicode-chars, r=nagisa
Add more aliases for Unicode confusable chars

Building upon #29837, this PR:

* added aliases for space characters,
* distinguished square brackets from parens, and
* added common CJK punctuation characters as aliases.

This will especially help CJK users who may have forgotten to switch off IME when coding.
Diffstat (limited to 'src/libsyntax/parse/lexer')
-rw-r--r--src/libsyntax/parse/lexer/unicode_chars.rs59
1 files changed, 53 insertions, 6 deletions
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
index 1d32dd49731..d337c78bee8 100644
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ b/src/libsyntax/parse/lexer/unicode_chars.rs
@@ -16,6 +16,22 @@ use errors::DiagnosticBuilder;
 use super::StringReader;
 
 const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
+    (' ', "No-Break Space", ' '),
+    (' ', "Ogham Space Mark", ' '),
+    (' ', "En Quad", ' '),
+    (' ', "Em Quad", ' '),
+    (' ', "En Space", ' '),
+    (' ', "Em Space", ' '),
+    (' ', "Three-Per-Em Space", ' '),
+    (' ', "Four-Per-Em Space", ' '),
+    (' ', "Six-Per-Em Space", ' '),
+    (' ', "Figure Space", ' '),
+    (' ', "Punctuation Space", ' '),
+    (' ', "Thin Space", ' '),
+    (' ', "Hair Space", ' '),
+    (' ', "Narrow No-Break Space", ' '),
+    (' ', "Medium Mathematical Space", ' '),
+    (' ', "Ideographic Space", ' '),
     ('ߺ', "Nko Lajanyalan", '_'),
     ('﹍', "Dashed Low Line", '_'),
     ('﹎', "Centreline Low Line", '_'),
@@ -24,14 +40,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('‑', "Non-Breaking Hyphen", '-'),
     ('‒', "Figure Dash", '-'),
     ('–', "En Dash", '-'),
+    ('—', "Em Dash", '-'),
     ('﹘', "Small Em Dash", '-'),
     ('⁃', "Hyphen Bullet", '-'),
     ('˗', "Modifier Letter Minus Sign", '-'),
     ('−', "Minus Sign", '-'),
+    ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
     ('٫', "Arabic Decimal Separator", ','),
     ('‚', "Single Low-9 Quotation Mark", ','),
     ('ꓹ', "Lisu Letter Tone Na Po", ','),
+    (',', "Fullwidth Comma", ','),
     (';', "Greek Question Mark", ';'),
+    (';', "Fullwidth Semicolon", ';'),
     ('ः', "Devanagari Sign Visarga", ':'),
     ('ઃ', "Gujarati Sign Visarga", ':'),
     (':', "Fullwidth Colon", ':'),
@@ -53,6 +73,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('ʔ', "Latin Letter Glottal Stop", '?'),
     ('ॽ', "Devanagari Letter Glottal Stop", '?'),
     ('Ꭾ', "Cherokee Letter He", '?'),
+    ('?', "Fullwidth Question Mark", '?'),
     ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
     ('․', "One Dot Leader", '.'),
     ('۔', "Arabic Full Stop", '.'),
@@ -60,9 +81,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('܂', "Syriac Sublinear Full Stop", '.'),
     ('꘎', "Vai Full Stop", '.'),
     ('𐩐', "Kharoshthi Punctuation Dot", '.'),
+    ('·', "Middle Dot", '.'),
     ('٠', "Arabic-Indic Digit Zero", '.'),
     ('۰', "Extended Arabic-Indic Digit Zero", '.'),
     ('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
+    ('。', "Ideographic Full Stop", '.'),
+    ('・', "Katakana Middle Dot", '.'),
     ('՝', "Armenian Comma", '\''),
     (''', "Fullwidth Apostrophe", '\''),
     ('‘', "Left Single Quotation Mark", '\''),
@@ -108,16 +132,30 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
     ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
     ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
-    ('[', "Fullwidth Left Square Bracket", '('),
     ('❨', "Medium Left Parenthesis Ornament", '('),
-    ('❲', "Light Left Tortoise Shell Bracket Ornament", '('),
-    ('〔', "Left Tortoise Shell Bracket", '('),
     ('﴾', "Ornate Left Parenthesis", '('),
-    (']', "Fullwidth Right Square Bracket", ')'),
+    ('(', "Fullwidth Left Parenthesis", '('),
     ('❩', "Medium Right Parenthesis Ornament", ')'),
-    ('❳', "Light Right Tortoise Shell Bracket Ornament", ')'),
-    ('〕', "Right Tortoise Shell Bracket", ')'),
     ('﴿', "Ornate Right Parenthesis", ')'),
+    (')', "Fullwidth Right Parenthesis", ')'),
+    ('[', "Fullwidth Left Square Bracket", '['),
+    ('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
+    ('「', "Left Corner Bracket", '['),
+    ('『', "Left White Corner Bracket", '['),
+    ('【', "Left Black Lenticular Bracket", '['),
+    ('〔', "Left Tortoise Shell Bracket", '['),
+    ('〖', "Left White Lenticular Bracket", '['),
+    ('〘', "Left White Tortoise Shell Bracket", '['),
+    ('〚', "Left White Square Bracket", '['),
+    (']', "Fullwidth Right Square Bracket", ']'),
+    ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
+    ('」', "Right Corner Bracket", ']'),
+    ('』', "Right White Corner Bracket", ']'),
+    ('】', "Right Black Lenticular Bracket", ']'),
+    ('〕', "Right Tortoise Shell Bracket", ']'),
+    ('〗', "Right White Lenticular Bracket", ']'),
+    ('〙', "Right White Tortoise Shell Bracket", ']'),
+    ('〛', "Right White Square Bracket", ']'),
     ('❴', "Medium Left Curly Bracket Ornament", '{'),
     ('❵', "Medium Right Curly Bracket Ornament", '}'),
     ('⁎', "Low Asterisk", '*'),
@@ -140,6 +178,8 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('⟍', "Mathematical Falling Diagonal", '\\'),
     ('⧵', "Reverse Solidus Operator", '\\'),
     ('⧹', "Big Reverse Solidus", '\\'),
+    ('、', "Ideographic Comma", '\\'),
+    ('ヽ', "Katakana Iteration Mark", '\\'),
     ('㇔', "Cjk Stroke D", '\\'),
     ('丶', "Cjk Unified Ideograph-4E36", '\\'),
     ('⼂', "Kangxi Radical Dot", '\\'),
@@ -148,15 +188,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
     ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
     ('˂', "Modifier Letter Left Arrowhead", '<'),
+    ('〈', "Left Angle Bracket", '<'),
+    ('《', "Left Double Angle Bracket", '<'),
     ('꓿', "Lisu Punctuation Full Stop", '='),
     ('›', "Single Right-Pointing Angle Quotation Mark", '>'),
     ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
     ('˃', "Modifier Letter Right Arrowhead", '>'),
+    ('〉', "Right Angle Bracket", '>'),
+    ('》', "Right Double Angle Bracket", '>'),
     ('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'),
     ('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
     ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ];
 
 const ASCII_ARRAY: &'static [(char, &'static str)] = &[
+    (' ', "Space"),
     ('_', "Underscore"),
     ('-', "Minus/Hyphen"),
     (',', "Comma"),
@@ -169,6 +214,8 @@ const ASCII_ARRAY: &'static [(char, &'static str)] = &[
     ('"', "Quotation Mark"),
     ('(', "Left Parenthesis"),
     (')', "Right Parenthesis"),
+    ('[', "Left Square Bracket"),
+    (']', "Right Square Bracket"),
     ('{', "Left Curly Brace"),
     ('}', "Right Curly Brace"),
     ('*', "Asterisk"),