about summary refs log tree commit diff
path: root/src/libsyntax/parse/lexer
diff options
context:
space:
mode:
authorMalo Jaffré <jaffre.malo@gmail.com>2017-08-06 17:36:50 +0200
committerMalo Jaffré <jaffre.malo@gmail.com>2017-08-06 17:36:50 +0200
commit4e2ddcb879d225e7d22fbf4af0536c06203b8d94 (patch)
tree4a482812cacf26d18d3b90fe0be3b5c45477b162 /src/libsyntax/parse/lexer
parenta9c24fd579cfa08852dca94214caae4b7e6b91c1 (diff)
downloadrust-4e2ddcb879d225e7d22fbf4af0536c06203b8d94.tar.gz
rust-4e2ddcb879d225e7d22fbf4af0536c06203b8d94.zip
Update the list of confusable characters
Also reorder and space the list to make it clearer for futures updates
and to come closer to the original list.

Thanks @est31 for the instructions.

Fixes #43629.
r? @est31
Diffstat (limited to 'src/libsyntax/parse/lexer')
-rw-r--r--src/libsyntax/parse/lexer/unicode_chars.rs144
1 files changed, 125 insertions, 19 deletions
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
index 83a164bdb96..cc38021b7aa 100644
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ b/src/libsyntax/parse/lexer/unicode_chars.rs
@@ -1,4 +1,4 @@
-// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -9,15 +9,16 @@
 // except according to those terms.
 
 // Characters and their corresponding confusables were collected from
-// http://www.unicode.org/Public/security/revision-06/confusables.txt
+// http://www.unicode.org/Public/security/10.0.0/confusables.txt
 
 use syntax_pos::{Span, NO_EXPANSION};
 use errors::DiagnosticBuilder;
 use super::StringReader;
 
 const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
-    (' ', "No-Break Space", ' '),
-    (' ', "Ogham Space Mark", ' '),
+    ('
', "Line Separator", ' '),
+    ('
', "Paragraph Separator", ' '),
+    (' ', "Ogham Space mark", ' '),
     (' ', "En Quad", ' '),
     (' ', "Em Quad", ' '),
     (' ', "En Space", ' '),
@@ -25,39 +26,63 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     (' ', "Three-Per-Em Space", ' '),
     (' ', "Four-Per-Em Space", ' '),
     (' ', "Six-Per-Em Space", ' '),
-    (' ', "Figure Space", ' '),
     (' ', "Punctuation Space", ' '),
     (' ', "Thin Space", ' '),
     (' ', "Hair Space", ' '),
-    (' ', "Narrow No-Break Space", ' '),
     (' ', "Medium Mathematical Space", ' '),
+    (' ', "No-Break Space", ' '),
+    (' ', "Figure Space", ' '),
+    (' ', "Narrow No-Break Space", ' '),
     (' ', "Ideographic Space", ' '),
+
     ('ߺ', "Nko Lajanyalan", '_'),
     ('﹍', "Dashed Low Line", '_'),
     ('﹎', "Centreline Low Line", '_'),
     ('﹏', "Wavy Low Line", '_'),
+    ('_', "Fullwidth Low Line", '-'),
+
     ('‐', "Hyphen", '-'),
     ('‑', "Non-Breaking Hyphen", '-'),
     ('‒', "Figure Dash", '-'),
     ('–', "En Dash", '-'),
     ('—', "Em Dash", '-'),
     ('﹘', "Small Em Dash", '-'),
+    ('۔', "Arabic Full Stop", '-'),
     ('⁃', "Hyphen Bullet", '-'),
     ('˗', "Modifier Letter Minus Sign", '-'),
     ('−', "Minus Sign", '-'),
+    ('➖', "Heavy Minus Sign", '-'),
+    ('Ⲻ', "Coptic Letter Dialect-P Ni", '-'),
     ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
+    ('-', "Fullwidth Hyphen-Minus", '-'),
+    ('―', "Horizontal Bar", '-'),
+    ('─', "Box Drawings Light Horizontal", '-'),
+    ('━', "Box Drawings Heavy Horizontal", '-'),
+    ('㇐', "CJK Stroke H", '-'),
+    ('ꟷ', "Latin Epigraphic Letter Dideways", '-'),
+    ('ᅳ', "Hangul Jungseong Eu", '-'),
+    ('ㅡ', "Hangul Letter Eu", '-'),
+    ('一', "CJK Unified Ideograph-4E00", '-'),
+    ('⼀', "Kangxi Radical One", '-'),
+
+    ('؍', "Arabic Date Separator", ','),
     ('٫', "Arabic Decimal Separator", ','),
     ('‚', "Single Low-9 Quotation Mark", ','),
+    ('¸', "Cedilla", ','),
     ('ꓹ', "Lisu Letter Tone Na Po", ','),
     (',', "Fullwidth Comma", ','),
+
     (';', "Greek Question Mark", ';'),
     (';', "Fullwidth Semicolon", ';'),
+    ('︔', "Presentation Form For Vertical Semicolon", ';'),
+
     ('ः', "Devanagari Sign Visarga", ':'),
     ('ઃ', "Gujarati Sign Visarga", ':'),
     (':', "Fullwidth Colon", ':'),
     ('։', "Armenian Full Stop", ':'),
     ('܃', "Syriac Supralinear Colon", ':'),
     ('܄', "Syriac Sublinear Colon", ':'),
+    ('᛬', "Runic Multiple Ponctuation", ':'),
     ('︰', "Presentation Form For Vertical Two Dot Leader", ':'),
     ('᠃', "Mongolian Full Stop", ':'),
     ('᠉', "Mongolian Manchu Full Stop", ':'),
@@ -68,25 +93,48 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('∶', "Ratio", ':'),
     ('ː', "Modifier Letter Triangular Colon", ':'),
     ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'),
+    ('︓', "Presentation Form For Vertical Colon", ':'),
+
     ('!', "Fullwidth Exclamation Mark", '!'),
     ('ǃ', "Latin Letter Retroflex Click", '!'),
+    ('ⵑ', "Tifinagh Letter Tuareg Yang", '!'),
+    ('︕', "Presentation Form For Vertical Exclamation Mark", '!'),
+
     ('ʔ', "Latin Letter Glottal Stop", '?'),
+    ('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
     ('ॽ', "Devanagari Letter Glottal Stop", '?'),
     ('Ꭾ', "Cherokee Letter He", '?'),
+    ('ꛫ', "Bamum Letter Ntuu", '?'),
     ('?', "Fullwidth Question Mark", '?'),
+    ('︖', "Presentation Form For Vertical Question Mark", '?'),
+
     ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
     ('․', "One Dot Leader", '.'),
-    ('۔', "Arabic Full Stop", '.'),
     ('܁', "Syriac Supralinear Full Stop", '.'),
     ('܂', "Syriac Sublinear Full Stop", '.'),
     ('꘎', "Vai Full Stop", '.'),
     ('𐩐', "Kharoshthi Punctuation Dot", '.'),
-    ('·', "Middle Dot", '.'),
     ('٠', "Arabic-Indic Digit Zero", '.'),
     ('۰', "Extended Arabic-Indic Digit Zero", '.'),
     ('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
-    ('。', "Ideographic Full Stop", '.'),
+    ('·', "Middle Dot", '.'),
     ('・', "Katakana Middle Dot", '.'),
+    ('・', "Halfwidth Katakana Middle Dot", '.'),
+    ('᛫', "Runic Single Punctuation", '.'),
+    ('·', "Greek Ano Teleia", '.'),
+    ('⸱', "Word Separator Middle Dot", '.'),
+    ('𐄁', "Aegean Word Separator Dot", '.'),
+    ('•', "Bullet", '.'),
+    ('‧', "Hyphenation Point", '.'),
+    ('∙', "Bullet Operator", '.'),
+    ('⋅', "Dot Operator", '.'),
+    ('ꞏ', "Latin Letter Sinological Dot", '.'),
+    ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
+    ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
+    ('.', "Fullwidth Full Stop", '.'),
+    ('。', "Ideographic Full Stop", '.'),
+    ('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'),
+
     ('՝', "Armenian Comma", '\''),
     (''', "Fullwidth Apostrophe", '\''),
     ('‘', "Left Single Quotation Mark", '\''),
@@ -96,8 +144,10 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('‵', "Reversed Prime", '\''),
     ('՚', "Armenian Apostrophe", '\''),
     ('׳', "Hebrew Punctuation Geresh", '\''),
+    ('`', "Greek Accent", '\''),
     ('`', "Greek Varia", '\''),
     ('`', "Fullwidth Grave Accent", '\''),
+    ('´', "Acute Accent", '\''),
     ('΄', "Greek Tonos", '\''),
     ('´', "Greek Oxia", '\''),
     ('᾽', "Greek Koronis", '\''),
@@ -105,6 +155,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('῾', "Greek Dasia", '\''),
     ('ʹ', "Modifier Letter Prime", '\''),
     ('ʹ', "Greek Numeral Sign", '\''),
+    ('ˈ', "Modifier Letter Vertical Line", '\''),
     ('ˊ', "Modifier Letter Acute Accent", '\''),
     ('ˋ', "Modifier Letter Grave Accent", '\''),
     ('˴', "Modifier Letter Middle Grave Accent", '\''),
@@ -116,6 +167,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('י', "Hebrew Letter Yod", '\''),
     ('ߴ', "Nko High Tone Apostrophe", '\''),
     ('ߵ', "Nko Low Tone Apostrophe", '\''),
+    ('ᑊ', "Canadian Syllabics West-Cree P", '\''),
+    ('ᛌ', "Runic Letter Short-Twig-Sol S", '\''),
+    ('𖽑', "Miao Sign Aspiration", '\''),
+    ('𖽒', "Miao Sign Reformed Voicing", '\''),
+
+    ('᳓', "Vedic Sign Nihshvasa", '"'),
     ('"', "Fullwidth Quotation Mark", '"'),
     ('“', "Left Double Quotation Mark", '"'),
     ('”', "Right Double Quotation Mark", '"'),
@@ -132,12 +189,15 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
     ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
     ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
+
+    ('(', "Fullwidth Left Parenthesis", '('),
     ('❨', "Medium Left Parenthesis Ornament", '('),
     ('﴾', "Ornate Left Parenthesis", '('),
-    ('(', "Fullwidth Left Parenthesis", '('),
+
+    (')', "Fullwidth Right Parenthesis", ')'),
     ('❩', "Medium Right Parenthesis Ornament", ')'),
     ('﴿', "Ornate Right Parenthesis", ')'),
-    (')', "Fullwidth Right Parenthesis", ')'),
+
     ('[', "Fullwidth Left Square Bracket", '['),
     ('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
     ('「', "Left Corner Bracket", '['),
@@ -147,6 +207,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('〖', "Left White Lenticular Bracket", '['),
     ('〘', "Left White Tortoise Shell Bracket", '['),
     ('〚', "Left White Square Bracket", '['),
+
     (']', "Fullwidth Right Square Bracket", ']'),
     ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
     ('」', "Right Corner Bracket", ']'),
@@ -156,11 +217,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('〗', "Right White Lenticular Bracket", ']'),
     ('〙', "Right White Tortoise Shell Bracket", ']'),
     ('〛', "Right White Square Bracket", ']'),
+
     ('❴', "Medium Left Curly Bracket Ornament", '{'),
+    ('𝄔', "Musical Symbol Brace", '{'),
+    ('{', "Fullwidth Left Curly Bracket", '{'),
+
     ('❵', "Medium Right Curly Bracket Ornament", '}'),
+    ('}', "Fullwidth Right Curly Bracket", '}'),
+
     ('⁎', "Low Asterisk", '*'),
     ('٭', "Arabic Five Pointed Star", '*'),
     ('∗', "Asterisk Operator", '*'),
+    ('𐌟', "Old Italic Letter Ess", '*'),
+    ('*', "Fullwidth Asterisk", '*'),
+
     ('᜵', "Philippine Single Punctuation", '/'),
     ('⁁', "Caret Insertion Point", '/'),
     ('∕', "Division Slash", '/'),
@@ -168,37 +238,73 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
     ('⟋', "Mathematical Rising Diagonal", '/'),
     ('⧸', "Big Solidus", '/'),
-    ('㇓', "Cjk Stroke Sp", '/'),
+    ('𝈺', "Greek Instrumental Notation Symbol-47", '/'),
+    ('㇓', "CJK Stroke Sp", '/'),
     ('〳', "Vertical Kana Repeat Mark Upper Half", '/'),
-    ('丿', "Cjk Unified Ideograph-4E3F", '/'),
+    ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'),
+    ('ノ', "Katakana Letter No", '/'),
+    ('丿', "CJK Unified Ideograph-4E3F", '/'),
     ('⼃', "Kangxi Radical Slash", '/'),
+    ('/', "Fullwidth Solidus", '/'),
+
     ('\', "Fullwidth Reverse Solidus", '\\'),
     ('﹨', "Small Reverse Solidus", '\\'),
     ('∖', "Set Minus", '\\'),
     ('⟍', "Mathematical Falling Diagonal", '\\'),
     ('⧵', "Reverse Solidus Operator", '\\'),
     ('⧹', "Big Reverse Solidus", '\\'),
+    ('⧹', "Greek Vocal Notation Symbol-16", '\\'),
+    ('⧹', "Greek Instrumental Symbol-48", '\\'),
+    ('㇔', "CJK Stroke D", '\\'),
+    ('丶', "CJK Unified Ideograph-4E36", '\\'),
+    ('⼂', "Kangxi Radical Dot", '\\'),
     ('、', "Ideographic Comma", '\\'),
     ('ヽ', "Katakana Iteration Mark", '\\'),
-    ('㇔', "Cjk Stroke D", '\\'),
-    ('丶', "Cjk Unified Ideograph-4E36", '\\'),
-    ('⼂', "Kangxi Radical Dot", '\\'),
+
     ('ꝸ', "Latin Small Letter Um", '&'),
+    ('&', "Fullwidth Ampersand", '&'),
+
+    ('᛭', "Runic Cros Punctuation", '+'),
+    ('➕', "Heavy Plus Sign", '+'),
+    ('𐊛', "Lycian Letter H", '+'),
     ('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
+    ('+', "Fullwidth Plus Sign", '+'),
+
     ('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
     ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
     ('˂', "Modifier Letter Left Arrowhead", '<'),
+    ('𝈶', "Greek Instrumental Symbol-40", '<'),
+    ('ᐸ', "Canadian Syllabics Pa", '<'),
+    ('ᚲ', "Runic Letter Kauna", '<'),
+    ('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'),
+    ('⟨', "Mathematical Left Angle Bracket", '<'),
+    ('〈', "Left-Pointing Angle Bracket", '<'),
     ('〈', "Left Angle Bracket", '<'),
+    ('㇛', "CJK Stroke Pd", '<'),
+    ('く', "Hiragana Letter Ku", '<'),
+    ('𡿨', "CJK Unified Ideograph-21FE8", '<'),
     ('《', "Left Double Angle Bracket", '<'),
+    ('<', "Fullwidth Less-Than Sign", '<'),
+
+    ('᐀', "Canadian Syllabics Hyphen", '='),
+    ('⹀', "Double Hyphen", '='),
+    ('゠', "Katakana-Hiragana Double Hyphen", '='),
     ('꓿', "Lisu Punctuation Full Stop", '='),
+    ('=', "Fullwidth Equals Sign", '='),
+
     ('›', "Single Right-Pointing Angle Quotation Mark", '>'),
     ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
     ('˃', "Modifier Letter Right Arrowhead", '>'),
+    ('𝈷', "Greek Instrumental Symbol-42", '>'),
+    ('ᐳ', "Canadian Syllabics Po", '>'),
+    ('𖼿', "Miao Letter Archaic Zza", '>'),
+    ('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'),
+    ('⟩', "Mathematical Right Angle Bracket", '>'),
+    ('〉', "Right-Pointing Angle Bracket", '>'),
     ('〉', "Right Angle Bracket", '>'),
     ('》', "Right Double Angle Bracket", '>'),
-    ('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'),
-    ('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
-    ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ];
+    ('>', "Fullwidth Greater-Than Sign", '>'), ];
+
 
 const ASCII_ARRAY: &'static [(char, &'static str)] = &[
     (' ', "Space"),