Auto merge of #107105 - matthiaskrgr:rollup-rkz9t7r, r=matthiaskrgr

Rollup of 8 pull requests Successful merges: - #106783 (Recover labels written as identifiers) - #106973 (Don't treat closures from other crates as local) - #106979 (Document how to get the type of a default associated type) - #107053 (signal update string representation for haiku.) - #107058 (Recognise double-equals homoglyph) - #107067 (Custom MIR: Support storage statements) - #107076 (Added const-generic ui test case for issue #106419) - #107091 (Fix broken format strings in `infer.ftl`) Failed merges: r? `@ghost` `@rustbot` modify labels: rollup
author: bors <bors@rust-lang.org> 2023-01-20 12:58:13 +0000
committer: bors <bors@rust-lang.org> 2023-01-20 12:58:13 +0000
commit: 04a41f889f563b2384c63c990b5423d201d62ebd (patch)
tree: 9d9e675e350653f151e0e956cc568d863ef6cd7d /compiler/rustc_parse/src
parent: 56ee85274e5a3a4dda92f3bf73d1664c74ff9c15 (diff)
parent: c42fad8ff3b912719e5cbedc6756c6d1f27f8db3 (diff)
download: rust-04a41f889f563b2384c63c990b5423d201d62ebd.tar.gz
rust-04a41f889f563b2384c63c990b5423d201d62ebd.zip
2 files changed, 376 insertions, 327 deletions
diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs
index 65479b341d7..34d003ccfa7 100644
--- a/compiler/rustc_parse/src/lexer/unicode_chars.rs
+++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs
@@ -7,329 +7,331 @@ use rustc_errors::{Applicability, Diagnostic};
 use rustc_span::{symbol::kw, BytePos, Pos, Span};
 
 #[rustfmt::skip] // for line breaks
-pub(crate) const UNICODE_ARRAY: &[(char, &str, char)] = &[
-    (' ', "Line Separator", ' '),
-    (' ', "Paragraph Separator", ' '),
-    (' ', "Ogham Space mark", ' '),
-    (' ', "En Quad", ' '),
-    (' ', "Em Quad", ' '),
-    (' ', "En Space", ' '),
-    (' ', "Em Space", ' '),
-    (' ', "Three-Per-Em Space", ' '),
-    (' ', "Four-Per-Em Space", ' '),
-    (' ', "Six-Per-Em Space", ' '),
-    (' ', "Punctuation Space", ' '),
-    (' ', "Thin Space", ' '),
-    (' ', "Hair Space", ' '),
-    (' ', "Medium Mathematical Space", ' '),
-    (' ', "No-Break Space", ' '),
-    (' ', "Figure Space", ' '),
-    (' ', "Narrow No-Break Space", ' '),
-    ('　', "Ideographic Space", ' '),
-
-    ('ߺ', "Nko Lajanyalan", '_'),
-    ('﹍', "Dashed Low Line", '_'),
-    ('﹎', "Centreline Low Line", '_'),
-    ('﹏', "Wavy Low Line", '_'),
-    ('＿', "Fullwidth Low Line", '_'),
-
-    ('‐', "Hyphen", '-'),
-    ('‑', "Non-Breaking Hyphen", '-'),
-    ('‒', "Figure Dash", '-'),
-    ('–', "En Dash", '-'),
-    ('—', "Em Dash", '-'),
-    ('﹘', "Small Em Dash", '-'),
-    ('۔', "Arabic Full Stop", '-'),
-    ('⁃', "Hyphen Bullet", '-'),
-    ('˗', "Modifier Letter Minus Sign", '-'),
-    ('−', "Minus Sign", '-'),
-    ('➖', "Heavy Minus Sign", '-'),
-    ('Ⲻ', "Coptic Letter Dialect-P Ni", '-'),
-    ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
-    ('－', "Fullwidth Hyphen-Minus", '-'),
-    ('―', "Horizontal Bar", '-'),
-    ('─', "Box Drawings Light Horizontal", '-'),
-    ('━', "Box Drawings Heavy Horizontal", '-'),
-    ('㇐', "CJK Stroke H", '-'),
-    ('ꟷ', "Latin Epigraphic Letter Sideways I", '-'),
-    ('ᅳ', "Hangul Jungseong Eu", '-'),
-    ('ㅡ', "Hangul Letter Eu", '-'),
-    ('一', "CJK Unified Ideograph-4E00", '-'),
-    ('⼀', "Kangxi Radical One", '-'),
-
-    ('؍', "Arabic Date Separator", ','),
-    ('٫', "Arabic Decimal Separator", ','),
-    ('‚', "Single Low-9 Quotation Mark", ','),
-    ('¸', "Cedilla", ','),
-    ('ꓹ', "Lisu Letter Tone Na Po", ','),
-    ('，', "Fullwidth Comma", ','),
-
-    (';', "Greek Question Mark", ';'),
-    ('；', "Fullwidth Semicolon", ';'),
-    ('︔', "Presentation Form For Vertical Semicolon", ';'),
-
-    ('ः', "Devanagari Sign Visarga", ':'),
-    ('ઃ', "Gujarati Sign Visarga", ':'),
-    ('：', "Fullwidth Colon", ':'),
-    ('։', "Armenian Full Stop", ':'),
-    ('܃', "Syriac Supralinear Colon", ':'),
-    ('܄', "Syriac Sublinear Colon", ':'),
-    ('᛬', "Runic Multiple Punctuation", ':'),
-    ('︰', "Presentation Form For Vertical Two Dot Leader", ':'),
-    ('᠃', "Mongolian Full Stop", ':'),
-    ('᠉', "Mongolian Manchu Full Stop", ':'),
-    ('⁚', "Two Dot Punctuation", ':'),
-    ('׃', "Hebrew Punctuation Sof Pasuq", ':'),
-    ('˸', "Modifier Letter Raised Colon", ':'),
-    ('꞉', "Modifier Letter Colon", ':'),
-    ('∶', "Ratio", ':'),
-    ('ː', "Modifier Letter Triangular Colon", ':'),
-    ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'),
-    ('︓', "Presentation Form For Vertical Colon", ':'),
-
-    ('！', "Fullwidth Exclamation Mark", '!'),
-    ('ǃ', "Latin Letter Retroflex Click", '!'),
-    ('ⵑ', "Tifinagh Letter Tuareg Yang", '!'),
-    ('︕', "Presentation Form For Vertical Exclamation Mark", '!'),
-
-    ('ʔ', "Latin Letter Glottal Stop", '?'),
-    ('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
-    ('ॽ', "Devanagari Letter Glottal Stop", '?'),
-    ('Ꭾ', "Cherokee Letter He", '?'),
-    ('ꛫ', "Bamum Letter Ntuu", '?'),
-    ('？', "Fullwidth Question Mark", '?'),
-    ('︖', "Presentation Form For Vertical Question Mark", '?'),
-
-    ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
-    ('․', "One Dot Leader", '.'),
-    ('܁', "Syriac Supralinear Full Stop", '.'),
-    ('܂', "Syriac Sublinear Full Stop", '.'),
-    ('꘎', "Vai Full Stop", '.'),
-    ('𐩐', "Kharoshthi Punctuation Dot", '.'),
-    ('٠', "Arabic-Indic Digit Zero", '.'),
-    ('۰', "Extended Arabic-Indic Digit Zero", '.'),
-    ('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
-    ('·', "Middle Dot", '.'),
-    ('・', "Katakana Middle Dot", '.'),
-    ('･', "Halfwidth Katakana Middle Dot", '.'),
-    ('᛫', "Runic Single Punctuation", '.'),
-    ('·', "Greek Ano Teleia", '.'),
-    ('⸱', "Word Separator Middle Dot", '.'),
-    ('𐄁', "Aegean Word Separator Dot", '.'),
-    ('•', "Bullet", '.'),
-    ('‧', "Hyphenation Point", '.'),
-    ('∙', "Bullet Operator", '.'),
-    ('⋅', "Dot Operator", '.'),
-    ('ꞏ', "Latin Letter Sinological Dot", '.'),
-    ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
-    ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
-    ('．', "Fullwidth Full Stop", '.'),
-    ('。', "Ideographic Full Stop", '.'),
-    ('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'),
-
-    ('՝', "Armenian Comma", '\''),
-    ('＇', "Fullwidth Apostrophe", '\''),
-    ('‘', "Left Single Quotation Mark", '\''),
-    ('’', "Right Single Quotation Mark", '\''),
-    ('‛', "Single High-Reversed-9 Quotation Mark", '\''),
-    ('′', "Prime", '\''),
-    ('‵', "Reversed Prime", '\''),
-    ('՚', "Armenian Apostrophe", '\''),
-    ('׳', "Hebrew Punctuation Geresh", '\''),
-    ('`', "Grave Accent", '\''),
-    ('`', "Greek Varia", '\''),
-    ('｀', "Fullwidth Grave Accent", '\''),
-    ('´', "Acute Accent", '\''),
-    ('΄', "Greek Tonos", '\''),
-    ('´', "Greek Oxia", '\''),
-    ('᾽', "Greek Koronis", '\''),
-    ('᾿', "Greek Psili", '\''),
-    ('῾', "Greek Dasia", '\''),
-    ('ʹ', "Modifier Letter Prime", '\''),
-    ('ʹ', "Greek Numeral Sign", '\''),
-    ('ˈ', "Modifier Letter Vertical Line", '\''),
-    ('ˊ', "Modifier Letter Acute Accent", '\''),
-    ('ˋ', "Modifier Letter Grave Accent", '\''),
-    ('˴', "Modifier Letter Middle Grave Accent", '\''),
-    ('ʻ', "Modifier Letter Turned Comma", '\''),
-    ('ʽ', "Modifier Letter Reversed Comma", '\''),
-    ('ʼ', "Modifier Letter Apostrophe", '\''),
-    ('ʾ', "Modifier Letter Right Half Ring", '\''),
-    ('ꞌ', "Latin Small Letter Saltillo", '\''),
-    ('י', "Hebrew Letter Yod", '\''),
-    ('ߴ', "Nko High Tone Apostrophe", '\''),
-    ('ߵ', "Nko Low Tone Apostrophe", '\''),
-    ('ᑊ', "Canadian Syllabics West-Cree P", '\''),
-    ('ᛌ', "Runic Letter Short-Twig-Sol S", '\''),
-    ('𖽑', "Miao Sign Aspiration", '\''),
-    ('𖽒', "Miao Sign Reformed Voicing", '\''),
-
-    ('᳓', "Vedic Sign Nihshvasa", '"'),
-    ('＂', "Fullwidth Quotation Mark", '"'),
-    ('“', "Left Double Quotation Mark", '"'),
-    ('”', "Right Double Quotation Mark", '"'),
-    ('‟', "Double High-Reversed-9 Quotation Mark", '"'),
-    ('″', "Double Prime", '"'),
-    ('‶', "Reversed Double Prime", '"'),
-    ('〃', "Ditto Mark", '"'),
-    ('״', "Hebrew Punctuation Gershayim", '"'),
-    ('˝', "Double Acute Accent", '"'),
-    ('ʺ', "Modifier Letter Double Prime", '"'),
-    ('˶', "Modifier Letter Middle Double Acute Accent", '"'),
-    ('˵', "Modifier Letter Middle Double Grave Accent", '"'),
-    ('ˮ', "Modifier Letter Double Apostrophe", '"'),
-    ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
-    ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
-    ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
-
-    ('（', "Fullwidth Left Parenthesis", '('),
-    ('❨', "Medium Left Parenthesis Ornament", '('),
-    ('﴾', "Ornate Left Parenthesis", '('),
-
-    ('）', "Fullwidth Right Parenthesis", ')'),
-    ('❩', "Medium Right Parenthesis Ornament", ')'),
-    ('﴿', "Ornate Right Parenthesis", ')'),
-
-    ('［', "Fullwidth Left Square Bracket", '['),
-    ('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
-    ('「', "Left Corner Bracket", '['),
-    ('『', "Left White Corner Bracket", '['),
-    ('【', "Left Black Lenticular Bracket", '['),
-    ('〔', "Left Tortoise Shell Bracket", '['),
-    ('〖', "Left White Lenticular Bracket", '['),
-    ('〘', "Left White Tortoise Shell Bracket", '['),
-    ('〚', "Left White Square Bracket", '['),
-
-    ('］', "Fullwidth Right Square Bracket", ']'),
-    ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
-    ('」', "Right Corner Bracket", ']'),
-    ('』', "Right White Corner Bracket", ']'),
-    ('】', "Right Black Lenticular Bracket", ']'),
-    ('〕', "Right Tortoise Shell Bracket", ']'),
-    ('〗', "Right White Lenticular Bracket", ']'),
-    ('〙', "Right White Tortoise Shell Bracket", ']'),
-    ('〛', "Right White Square Bracket", ']'),
-
-    ('❴', "Medium Left Curly Bracket Ornament", '{'),
-    ('𝄔', "Musical Symbol Brace", '{'),
-    ('｛', "Fullwidth Left Curly Bracket", '{'),
-
-    ('❵', "Medium Right Curly Bracket Ornament", '}'),
-    ('｝', "Fullwidth Right Curly Bracket", '}'),
-
-    ('⁎', "Low Asterisk", '*'),
-    ('٭', "Arabic Five Pointed Star", '*'),
-    ('∗', "Asterisk Operator", '*'),
-    ('𐌟', "Old Italic Letter Ess", '*'),
-    ('＊', "Fullwidth Asterisk", '*'),
-
-    ('᜵', "Philippine Single Punctuation", '/'),
-    ('⁁', "Caret Insertion Point", '/'),
-    ('∕', "Division Slash", '/'),
-    ('⁄', "Fraction Slash", '/'),
-    ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
-    ('⟋', "Mathematical Rising Diagonal", '/'),
-    ('⧸', "Big Solidus", '/'),
-    ('𝈺', "Greek Instrumental Notation Symbol-47", '/'),
-    ('㇓', "CJK Stroke Sp", '/'),
-    ('〳', "Vertical Kana Repeat Mark Upper Half", '/'),
-    ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'),
-    ('ノ', "Katakana Letter No", '/'),
-    ('丿', "CJK Unified Ideograph-4E3F", '/'),
-    ('⼃', "Kangxi Radical Slash", '/'),
-    ('／', "Fullwidth Solidus", '/'),
-
-    ('＼', "Fullwidth Reverse Solidus", '\\'),
-    ('﹨', "Small Reverse Solidus", '\\'),
-    ('∖', "Set Minus", '\\'),
-    ('⟍', "Mathematical Falling Diagonal", '\\'),
-    ('⧵', "Reverse Solidus Operator", '\\'),
-    ('⧹', "Big Reverse Solidus", '\\'),
-    ('⧹', "Greek Vocal Notation Symbol-16", '\\'),
-    ('⧹', "Greek Instrumental Symbol-48", '\\'),
-    ('㇔', "CJK Stroke D", '\\'),
-    ('丶', "CJK Unified Ideograph-4E36", '\\'),
-    ('⼂', "Kangxi Radical Dot", '\\'),
-    ('、', "Ideographic Comma", '\\'),
-    ('ヽ', "Katakana Iteration Mark", '\\'),
-
-    ('ꝸ', "Latin Small Letter Um", '&'),
-    ('＆', "Fullwidth Ampersand", '&'),
-
-    ('᛭', "Runic Cross Punctuation", '+'),
-    ('➕', "Heavy Plus Sign", '+'),
-    ('𐊛', "Lycian Letter H", '+'),
-    ('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
-    ('＋', "Fullwidth Plus Sign", '+'),
-
-    ('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
-    ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
-    ('˂', "Modifier Letter Left Arrowhead", '<'),
-    ('𝈶', "Greek Instrumental Symbol-40", '<'),
-    ('ᐸ', "Canadian Syllabics Pa", '<'),
-    ('ᚲ', "Runic Letter Kauna", '<'),
-    ('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'),
-    ('⟨', "Mathematical Left Angle Bracket", '<'),
-    ('〈', "Left-Pointing Angle Bracket", '<'),
-    ('〈', "Left Angle Bracket", '<'),
-    ('㇛', "CJK Stroke Pd", '<'),
-    ('く', "Hiragana Letter Ku", '<'),
-    ('𡿨', "CJK Unified Ideograph-21FE8", '<'),
-    ('《', "Left Double Angle Bracket", '<'),
-    ('＜', "Fullwidth Less-Than Sign", '<'),
-
-    ('᐀', "Canadian Syllabics Hyphen", '='),
-    ('⹀', "Double Hyphen", '='),
-    ('゠', "Katakana-Hiragana Double Hyphen", '='),
-    ('꓿', "Lisu Punctuation Full Stop", '='),
-    ('＝', "Fullwidth Equals Sign", '='),
-
-    ('›', "Single Right-Pointing Angle Quotation Mark", '>'),
-    ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
-    ('˃', "Modifier Letter Right Arrowhead", '>'),
-    ('𝈷', "Greek Instrumental Symbol-42", '>'),
-    ('ᐳ', "Canadian Syllabics Po", '>'),
-    ('𖼿', "Miao Letter Archaic Zza", '>'),
-    ('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'),
-    ('⟩', "Mathematical Right Angle Bracket", '>'),
-    ('〉', "Right-Pointing Angle Bracket", '>'),
-    ('〉', "Right Angle Bracket", '>'),
-    ('》', "Right Double Angle Bracket", '>'),
-    ('＞', "Fullwidth Greater-Than Sign", '>'),
+pub(crate) const UNICODE_ARRAY: &[(char, &str, &str)] = &[
+    (' ', "Line Separator", " "),
+    (' ', "Paragraph Separator", " "),
+    (' ', "Ogham Space mark", " "),
+    (' ', "En Quad", " "),
+    (' ', "Em Quad", " "),
+    (' ', "En Space", " "),
+    (' ', "Em Space", " "),
+    (' ', "Three-Per-Em Space", " "),
+    (' ', "Four-Per-Em Space", " "),
+    (' ', "Six-Per-Em Space", " "),
+    (' ', "Punctuation Space", " "),
+    (' ', "Thin Space", " "),
+    (' ', "Hair Space", " "),
+    (' ', "Medium Mathematical Space", " "),
+    (' ', "No-Break Space", " "),
+    (' ', "Figure Space", " "),
+    (' ', "Narrow No-Break Space", " "),
+    ('　', "Ideographic Space", " "),
+
+    ('ߺ', "Nko Lajanyalan", "_"),
+    ('﹍', "Dashed Low Line", "_"),
+    ('﹎', "Centreline Low Line", "_"),
+    ('﹏', "Wavy Low Line", "_"),
+    ('＿', "Fullwidth Low Line", "_"),
+
+    ('‐', "Hyphen", "-"),
+    ('‑', "Non-Breaking Hyphen", "-"),
+    ('‒', "Figure Dash", "-"),
+    ('–', "En Dash", "-"),
+    ('—', "Em Dash", "-"),
+    ('﹘', "Small Em Dash", "-"),
+    ('۔', "Arabic Full Stop", "-"),
+    ('⁃', "Hyphen Bullet", "-"),
+    ('˗', "Modifier Letter Minus Sign", "-"),
+    ('−', "Minus Sign", "-"),
+    ('➖', "Heavy Minus Sign", "-"),
+    ('Ⲻ', "Coptic Letter Dialect-P Ni", "-"),
+    ('ー', "Katakana-Hiragana Prolonged Sound Mark", "-"),
+    ('－', "Fullwidth Hyphen-Minus", "-"),
+    ('―', "Horizontal Bar", "-"),
+    ('─', "Box Drawings Light Horizontal", "-"),
+    ('━', "Box Drawings Heavy Horizontal", "-"),
+    ('㇐', "CJK Stroke H", "-"),
+    ('ꟷ', "Latin Epigraphic Letter Sideways I", "-"),
+    ('ᅳ', "Hangul Jungseong Eu", "-"),
+    ('ㅡ', "Hangul Letter Eu", "-"),
+    ('一', "CJK Unified Ideograph-4E00", "-"),
+    ('⼀', "Kangxi Radical One", "-"),
+
+    ('؍', "Arabic Date Separator", ","),
+    ('٫', "Arabic Decimal Separator", ","),
+    ('‚', "Single Low-9 Quotation Mark", ","),
+    ('¸', "Cedilla", ","),
+    ('ꓹ', "Lisu Letter Tone Na Po", ","),
+    ('，', "Fullwidth Comma", ","),
+
+    (';', "Greek Question Mark", ";"),
+    ('；', "Fullwidth Semicolon", ";"),
+    ('︔', "Presentation Form For Vertical Semicolon", ";"),
+
+    ('ः', "Devanagari Sign Visarga", ":"),
+    ('ઃ', "Gujarati Sign Visarga", ":"),
+    ('：', "Fullwidth Colon", ":"),
+    ('։', "Armenian Full Stop", ":"),
+    ('܃', "Syriac Supralinear Colon", ":"),
+    ('܄', "Syriac Sublinear Colon", ":"),
+    ('᛬', "Runic Multiple Punctuation", ":"),
+    ('︰', "Presentation Form For Vertical Two Dot Leader", ":"),
+    ('᠃', "Mongolian Full Stop", ":"),
+    ('᠉', "Mongolian Manchu Full Stop", ":"),
+    ('⁚', "Two Dot Punctuation", ":"),
+    ('׃', "Hebrew Punctuation Sof Pasuq", ":"),
+    ('˸', "Modifier Letter Raised Colon", ":"),
+    ('꞉', "Modifier Letter Colon", ":"),
+    ('∶', "Ratio", ":"),
+    ('ː', "Modifier Letter Triangular Colon", ":"),
+    ('ꓽ', "Lisu Letter Tone Mya Jeu", ":"),
+    ('︓', "Presentation Form For Vertical Colon", ":"),
+
+    ('！', "Fullwidth Exclamation Mark", "!"),
+    ('ǃ', "Latin Letter Retroflex Click", "!"),
+    ('ⵑ', "Tifinagh Letter Tuareg Yang", "!"),
+    ('︕', "Presentation Form For Vertical Exclamation Mark", "!"),
+
+    ('ʔ', "Latin Letter Glottal Stop", "?"),
+    ('Ɂ', "Latin Capital Letter Glottal Stop", "?"),
+    ('ॽ', "Devanagari Letter Glottal Stop", "?"),
+    ('Ꭾ', "Cherokee Letter He", "?"),
+    ('ꛫ', "Bamum Letter Ntuu", "?"),
+    ('？', "Fullwidth Question Mark", "?"),
+    ('︖', "Presentation Form For Vertical Question Mark", "?"),
+
+    ('𝅭', "Musical Symbol Combining Augmentation Dot", "."),
+    ('․', "One Dot Leader", "."),
+    ('܁', "Syriac Supralinear Full Stop", "."),
+    ('܂', "Syriac Sublinear Full Stop", "."),
+    ('꘎', "Vai Full Stop", "."),
+    ('𐩐', "Kharoshthi Punctuation Dot", "."),
+    ('٠', "Arabic-Indic Digit Zero", "."),
+    ('۰', "Extended Arabic-Indic Digit Zero", "."),
+    ('ꓸ', "Lisu Letter Tone Mya Ti", "."),
+    ('·', "Middle Dot", "."),
+    ('・', "Katakana Middle Dot", "."),
+    ('･', "Halfwidth Katakana Middle Dot", "."),
+    ('᛫', "Runic Single Punctuation", "."),
+    ('·', "Greek Ano Teleia", "."),
+    ('⸱', "Word Separator Middle Dot", "."),
+    ('𐄁', "Aegean Word Separator Dot", "."),
+    ('•', "Bullet", "."),
+    ('‧', "Hyphenation Point", "."),
+    ('∙', "Bullet Operator", "."),
+    ('⋅', "Dot Operator", "."),
+    ('ꞏ', "Latin Letter Sinological Dot", "."),
+    ('ᐧ', "Canadian Syllabics Final Middle Dot", "."),
+    ('ᐧ', "Canadian Syllabics Final Middle Dot", "."),
+    ('．', "Fullwidth Full Stop", "."),
+    ('。', "Ideographic Full Stop", "."),
+    ('︒', "Presentation Form For Vertical Ideographic Full Stop", "."),
+
+    ('՝', "Armenian Comma", "\'"),
+    ('＇', "Fullwidth Apostrophe", "\'"),
+    ('‘', "Left Single Quotation Mark", "\'"),
+    ('’', "Right Single Quotation Mark", "\'"),
+    ('‛', "Single High-Reversed-9 Quotation Mark", "\'"),
+    ('′', "Prime", "\'"),
+    ('‵', "Reversed Prime", "\'"),
+    ('՚', "Armenian Apostrophe", "\'"),
+    ('׳', "Hebrew Punctuation Geresh", "\'"),
+    ('`', "Grave Accent", "\'"),
+    ('`', "Greek Varia", "\'"),
+    ('｀', "Fullwidth Grave Accent", "\'"),
+    ('´', "Acute Accent", "\'"),
+    ('΄', "Greek Tonos", "\'"),
+    ('´', "Greek Oxia", "\'"),
+    ('᾽', "Greek Koronis", "\'"),
+    ('᾿', "Greek Psili", "\'"),
+    ('῾', "Greek Dasia", "\'"),
+    ('ʹ', "Modifier Letter Prime", "\'"),
+    ('ʹ', "Greek Numeral Sign", "\'"),
+    ('ˈ', "Modifier Letter Vertical Line", "\'"),
+    ('ˊ', "Modifier Letter Acute Accent", "\'"),
+    ('ˋ', "Modifier Letter Grave Accent", "\'"),
+    ('˴', "Modifier Letter Middle Grave Accent", "\'"),
+    ('ʻ', "Modifier Letter Turned Comma", "\'"),
+    ('ʽ', "Modifier Letter Reversed Comma", "\'"),
+    ('ʼ', "Modifier Letter Apostrophe", "\'"),
+    ('ʾ', "Modifier Letter Right Half Ring", "\'"),
+    ('ꞌ', "Latin Small Letter Saltillo", "\'"),
+    ('י', "Hebrew Letter Yod", "\'"),
+    ('ߴ', "Nko High Tone Apostrophe", "\'"),
+    ('ߵ', "Nko Low Tone Apostrophe", "\'"),
+    ('ᑊ', "Canadian Syllabics West-Cree P", "\'"),
+    ('ᛌ', "Runic Letter Short-Twig-Sol S", "\'"),
+    ('𖽑', "Miao Sign Aspiration", "\'"),
+    ('𖽒', "Miao Sign Reformed Voicing", "\'"),
+
+    ('᳓', "Vedic Sign Nihshvasa", "\""),
+    ('＂', "Fullwidth Quotation Mark", "\""),
+    ('“', "Left Double Quotation Mark", "\""),
+    ('”', "Right Double Quotation Mark", "\""),
+    ('‟', "Double High-Reversed-9 Quotation Mark", "\""),
+    ('″', "Double Prime", "\""),
+    ('‶', "Reversed Double Prime", "\""),
+    ('〃', "Ditto Mark", "\""),
+    ('״', "Hebrew Punctuation Gershayim", "\""),
+    ('˝', "Double Acute Accent", "\""),
+    ('ʺ', "Modifier Letter Double Prime", "\""),
+    ('˶', "Modifier Letter Middle Double Acute Accent", "\""),
+    ('˵', "Modifier Letter Middle Double Grave Accent", "\""),
+    ('ˮ', "Modifier Letter Double Apostrophe", "\""),
+    ('ײ', "Hebrew Ligature Yiddish Double Yod", "\""),
+    ('❞', "Heavy Double Comma Quotation Mark Ornament", "\""),
+    ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", "\""),
+
+    ('（', "Fullwidth Left Parenthesis", "("),
+    ('❨', "Medium Left Parenthesis Ornament", "("),
+    ('﴾', "Ornate Left Parenthesis", "("),
+
+    ('）', "Fullwidth Right Parenthesis", ")"),
+    ('❩', "Medium Right Parenthesis Ornament", ")"),
+    ('﴿', "Ornate Right Parenthesis", ")"),
+
+    ('［', "Fullwidth Left Square Bracket", "["),
+    ('❲', "Light Left Tortoise Shell Bracket Ornament", "["),
+    ('「', "Left Corner Bracket", "["),
+    ('『', "Left White Corner Bracket", "["),
+    ('【', "Left Black Lenticular Bracket", "["),
+    ('〔', "Left Tortoise Shell Bracket", "["),
+    ('〖', "Left White Lenticular Bracket", "["),
+    ('〘', "Left White Tortoise Shell Bracket", "["),
+    ('〚', "Left White Square Bracket", "["),
+
+    ('］', "Fullwidth Right Square Bracket", "]"),
+    ('❳', "Light Right Tortoise Shell Bracket Ornament", "]"),
+    ('」', "Right Corner Bracket", "]"),
+    ('』', "Right White Corner Bracket", "]"),
+    ('】', "Right Black Lenticular Bracket", "]"),
+    ('〕', "Right Tortoise Shell Bracket", "]"),
+    ('〗', "Right White Lenticular Bracket", "]"),
+    ('〙', "Right White Tortoise Shell Bracket", "]"),
+    ('〛', "Right White Square Bracket", "]"),
+
+    ('❴', "Medium Left Curly Bracket Ornament", "{"),
+    ('𝄔', "Musical Symbol Brace", "{"),
+    ('｛', "Fullwidth Left Curly Bracket", "{"),
+
+    ('❵', "Medium Right Curly Bracket Ornament", "}"),
+    ('｝', "Fullwidth Right Curly Bracket", "}"),
+
+    ('⁎', "Low Asterisk", "*"),
+    ('٭', "Arabic Five Pointed Star", "*"),
+    ('∗', "Asterisk Operator", "*"),
+    ('𐌟', "Old Italic Letter Ess", "*"),
+    ('＊', "Fullwidth Asterisk", "*"),
+
+    ('᜵', "Philippine Single Punctuation", "/"),
+    ('⁁', "Caret Insertion Point", "/"),
+    ('∕', "Division Slash", "/"),
+    ('⁄', "Fraction Slash", "/"),
+    ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", "/"),
+    ('⟋', "Mathematical Rising Diagonal", "/"),
+    ('⧸', "Big Solidus", "/"),
+    ('𝈺', "Greek Instrumental Notation Symbol-47", "/"),
+    ('㇓', "CJK Stroke Sp", "/"),
+    ('〳', "Vertical Kana Repeat Mark Upper Half", "/"),
+    ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", "/"),
+    ('ノ', "Katakana Letter No", "/"),
+    ('丿', "CJK Unified Ideograph-4E3F", "/"),
+    ('⼃', "Kangxi Radical Slash", "/"),
+    ('／', "Fullwidth Solidus", "/"),
+
+    ('＼', "Fullwidth Reverse Solidus", "\\"),
+    ('﹨', "Small Reverse Solidus", "\\"),
+    ('∖', "Set Minus", "\\"),
+    ('⟍', "Mathematical Falling Diagonal", "\\"),
+    ('⧵', "Reverse Solidus Operator", "\\"),
+    ('⧹', "Big Reverse Solidus", "\\"),
+    ('⧹', "Greek Vocal Notation Symbol-16", "\\"),
+    ('⧹', "Greek Instrumental Symbol-48", "\\"),
+    ('㇔', "CJK Stroke D", "\\"),
+    ('丶', "CJK Unified Ideograph-4E36", "\\"),
+    ('⼂', "Kangxi Radical Dot", "\\"),
+    ('、', "Ideographic Comma", "\\"),
+    ('ヽ', "Katakana Iteration Mark", "\\"),
+
+    ('ꝸ', "Latin Small Letter Um", "&"),
+    ('＆', "Fullwidth Ampersand", "&"),
+
+    ('᛭', "Runic Cross Punctuation", "+"),
+    ('➕', "Heavy Plus Sign", "+"),
+    ('𐊛', "Lycian Letter H", "+"),
+    ('﬩', "Hebrew Letter Alternative Plus Sign", "+"),
+    ('＋', "Fullwidth Plus Sign", "+"),
+
+    ('‹', "Single Left-Pointing Angle Quotation Mark", "<"),
+    ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", "<"),
+    ('˂', "Modifier Letter Left Arrowhead", "<"),
+    ('𝈶', "Greek Instrumental Symbol-40", "<"),
+    ('ᐸ', "Canadian Syllabics Pa", "<"),
+    ('ᚲ', "Runic Letter Kauna", "<"),
+    ('❬', "Medium Left-Pointing Angle Bracket Ornament", "<"),
+    ('⟨', "Mathematical Left Angle Bracket", "<"),
+    ('〈', "Left-Pointing Angle Bracket", "<"),
+    ('〈', "Left Angle Bracket", "<"),
+    ('㇛', "CJK Stroke Pd", "<"),
+    ('く', "Hiragana Letter Ku", "<"),
+    ('𡿨', "CJK Unified Ideograph-21FE8", "<"),
+    ('《', "Left Double Angle Bracket", "<"),
+    ('＜', "Fullwidth Less-Than Sign", "<"),
+
+    ('᐀', "Canadian Syllabics Hyphen", "="),
+    ('⹀', "Double Hyphen", "="),
+    ('゠', "Katakana-Hiragana Double Hyphen", "="),
+    ('꓿', "Lisu Punctuation Full Stop", "="),
+    ('＝', "Fullwidth Equals Sign", "="),
+
+    ('›', "Single Right-Pointing Angle Quotation Mark", ">"),
+    ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", ">"),
+    ('˃', "Modifier Letter Right Arrowhead", ">"),
+    ('𝈷', "Greek Instrumental Symbol-42", ">"),
+    ('ᐳ', "Canadian Syllabics Po", ">"),
+    ('𖼿', "Miao Letter Archaic Zza", ">"),
+    ('❭', "Medium Right-Pointing Angle Bracket Ornament", ">"),
+    ('⟩', "Mathematical Right Angle Bracket", ">"),
+    ('〉', "Right-Pointing Angle Bracket", ">"),
+    ('〉', "Right Angle Bracket", ">"),
+    ('》', "Right Double Angle Bracket", ">"),
+    ('＞', "Fullwidth Greater-Than Sign", ">"),
+    ('⩵', "Two Consecutive Equals Signs", "==")
 ];
 
 // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
 // keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
 // However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
 // fancier error recovery to it, as there will be less overall work to do this way.
-const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
-    (' ', "Space", None),
-    ('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
-    ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
-    (',', "Comma", Some(token::Comma)),
-    (';', "Semicolon", Some(token::Semi)),
-    (':', "Colon", Some(token::Colon)),
-    ('!', "Exclamation Mark", Some(token::Not)),
-    ('?', "Question Mark", Some(token::Question)),
-    ('.', "Period", Some(token::Dot)),
-    ('(', "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))),
-    (')', "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))),
-    ('[', "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))),
-    (']', "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))),
-    ('{', "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))),
-    ('}', "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))),
-    ('*', "Asterisk", Some(token::BinOp(token::Star))),
-    ('/', "Slash", Some(token::BinOp(token::Slash))),
-    ('\\', "Backslash", None),
-    ('&', "Ampersand", Some(token::BinOp(token::And))),
-    ('+', "Plus Sign", Some(token::BinOp(token::Plus))),
-    ('<', "Less-Than Sign", Some(token::Lt)),
-    ('=', "Equals Sign", Some(token::Eq)),
-    ('>', "Greater-Than Sign", Some(token::Gt)),
+const ASCII_ARRAY: &[(&str, &str, Option<token::TokenKind>)] = &[
+    (" ", "Space", None),
+    ("_", "Underscore", Some(token::Ident(kw::Underscore, false))),
+    ("-", "Minus/Hyphen", Some(token::BinOp(token::Minus))),
+    (",", "Comma", Some(token::Comma)),
+    (";", "Semicolon", Some(token::Semi)),
+    (":", "Colon", Some(token::Colon)),
+    ("!", "Exclamation Mark", Some(token::Not)),
+    ("?", "Question Mark", Some(token::Question)),
+    (".", "Period", Some(token::Dot)),
+    ("(", "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))),
+    (")", "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))),
+    ("[", "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))),
+    ("]", "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))),
+    ("{", "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))),
+    ("}", "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))),
+    ("*", "Asterisk", Some(token::BinOp(token::Star))),
+    ("/", "Slash", Some(token::BinOp(token::Slash))),
+    ("\\", "Backslash", None),
+    ("&", "Ampersand", Some(token::BinOp(token::And))),
+    ("+", "Plus Sign", Some(token::BinOp(token::Plus))),
+    ("<", "Less-Than Sign", Some(token::Lt)),
+    ("=", "Equals Sign", Some(token::Eq)),
+    ("==", "Double Equals Sign", Some(token::EqEq)),
+    (">", "Greater-Than Sign", Some(token::Gt)),
     // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
     // spitting the correct token out.
-    ('\'', "Single Quote", None),
-    ('"', "Quotation Mark", None),
+    ("\'", "Single Quote", None),
+    ("\"", "Quotation Mark", None),
 ];
 
 pub(super) fn check_for_substitution<'a>(
@@ -339,11 +341,11 @@ pub(super) fn check_for_substitution<'a>(
     err: &mut Diagnostic,
     count: usize,
 ) -> Option<token::TokenKind> {
-    let &(_u_char, u_name, ascii_char) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?;
+    let &(_, u_name, ascii_str) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?;
 
     let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count));
 
-    let Some((_ascii_char, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) else {
+    let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
         let msg = format!("substitution character not found for '{}'", ch);
         reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
         return None;
@@ -354,7 +356,7 @@ pub(super) fn check_for_substitution<'a>(
         let msg = format!(
             "Unicode characters '“' (Left Double Quotation Mark) and \
              '”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
-            ascii_char, ascii_name
+            ascii_str, ascii_name
         );
         err.span_suggestion(
             Span::with_root_ctxt(
@@ -368,12 +370,12 @@ pub(super) fn check_for_substitution<'a>(
     } else {
         let msg = format!(
             "Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
-            ch, u_name, ascii_char, ascii_name
+            ch, u_name, ascii_str, ascii_name
         );
         err.span_suggestion(
             span,
             &msg,
-            ascii_char.to_string().repeat(count),
+            ascii_str.to_string().repeat(count),
             Applicability::MaybeIncorrect,
         );
     }
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index d58afcd4c9f..bf93a89f065 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -1353,9 +1353,6 @@ impl<'a> Parser<'a> {
                 err.span_label(sp, "while parsing this `loop` expression");
                 err
             })
-        } else if self.eat_keyword(kw::Continue) {
-            let kind = ExprKind::Continue(self.eat_label());
-            Ok(self.mk_expr(lo.to(self.prev_token.span), kind))
         } else if self.eat_keyword(kw::Match) {
             let match_sp = self.prev_token.span;
             self.parse_match_expr().map_err(|mut err| {
@@ -1379,6 +1376,8 @@ impl<'a> Parser<'a> {
             self.parse_try_block(lo)
         } else if self.eat_keyword(kw::Return) {
             self.parse_return_expr()
+        } else if self.eat_keyword(kw::Continue) {
+            self.parse_continue_expr(lo)
         } else if self.eat_keyword(kw::Break) {
             self.parse_break_expr()
         } else if self.eat_keyword(kw::Yield) {
@@ -1715,10 +1714,10 @@ impl<'a> Parser<'a> {
     fn parse_break_expr(&mut self) -> PResult<'a, P<Expr>> {
         let lo = self.prev_token.span;
         let mut label = self.eat_label();
-        let kind = if label.is_some() && self.token == token::Colon {
+        let kind = if self.token == token::Colon && let Some(label) = label.take() {
             // The value expression can be a labeled loop, see issue #86948, e.g.:
             // `loop { break 'label: loop { break 'label 42; }; }`
-            let lexpr = self.parse_labeled_expr(label.take().unwrap(), true)?;
+            let lexpr = self.parse_labeled_expr(label, true)?;
             self.sess.emit_err(LabeledLoopInBreak {
                 span: lexpr.span,
                 sub: WrapExpressionInParentheses {
@@ -1730,8 +1729,8 @@ impl<'a> Parser<'a> {
         } else if self.token != token::OpenDelim(Delimiter::Brace)
             || !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL)
         {
-            let expr = self.parse_expr_opt()?;
-            if let Some(expr) = &expr {
+            let mut expr = self.parse_expr_opt()?;
+            if let Some(expr) = &mut expr {
                 if label.is_some()
                     && matches!(
                         expr.kind,
@@ -1749,7 +1748,19 @@ impl<'a> Parser<'a> {
                         BuiltinLintDiagnostics::BreakWithLabelAndLoop(expr.span),
                     );
                 }
+
+                // Recover `break label aaaaa`
+                if self.may_recover()
+                    && let ExprKind::Path(None, p) = &expr.kind
+                    && let [segment] = &*p.segments
+                    && let &ast::PathSegment { ident, args: None, .. } = segment
+                    && let Some(next) = self.parse_expr_opt()?
+                {
+                    label = Some(self.recover_ident_into_label(ident));
+                    *expr = next;
+                }
             }
+
             expr
         } else {
             None
@@ -1758,6 +1769,23 @@ impl<'a> Parser<'a> {
         self.maybe_recover_from_bad_qpath(expr)
     }
 
+    /// Parse `"continue" label?`.
+    fn parse_continue_expr(&mut self, lo: Span) -> PResult<'a, P<Expr>> {
+        let mut label = self.eat_label();
+
+        // Recover `continue label` -> `continue 'label`
+        if self.may_recover()
+            && label.is_none()
+            && let Some((ident, _)) = self.token.ident()
+        {
+            self.bump();
+            label = Some(self.recover_ident_into_label(ident));
+        }
+
+        let kind = ExprKind::Continue(label);
+        Ok(self.mk_expr(lo.to(self.prev_token.span), kind))
+    }
+
     /// Parse `"yield" expr?`.
     fn parse_yield_expr(&mut self) -> PResult<'a, P<Expr>> {
         let lo = self.prev_token.span;
@@ -3046,6 +3074,25 @@ impl<'a> Parser<'a> {
         false
     }
 
+    /// Converts an ident into 'label and emits an "expected a label, found an identifier" error.
+    fn recover_ident_into_label(&mut self, ident: Ident) -> Label {
+        // Convert `label` -> `'label`,
+        // so that nameres doesn't complain about non-existing label
+        let label = format!("'{}", ident.name);
+        let ident = Ident { name: Symbol::intern(&label), span: ident.span };
+
+        self.struct_span_err(ident.span, "expected a label, found an identifier")
+            .span_suggestion(
+                ident.span,
+                "labels start with a tick",
+                label,
+                Applicability::MachineApplicable,
+            )
+            .emit();
+
+        Label { ident }
+    }
+
     /// Parses `ident (COLON expr)?`.
     fn parse_expr_field(&mut self) -> PResult<'a, ExprField> {
         let attrs = self.parse_outer_attributes()?;
author	bors <bors@rust-lang.org>	2023-01-20 12:58:13 +0000
committer	bors <bors@rust-lang.org>	2023-01-20 12:58:13 +0000
commit	04a41f889f563b2384c63c990b5423d201d62ebd (patch)
tree	9d9e675e350653f151e0e956cc568d863ef6cd7d /compiler/rustc_parse/src
parent	56ee85274e5a3a4dda92f3bf73d1664c74ff9c15 (diff)
parent	c42fad8ff3b912719e5cbedc6756c6d1f27f8db3 (diff)
download	rust-04a41f889f563b2384c63c990b5423d201d62ebd.tar.gz rust-04a41f889f563b2384c63c990b5423d201d62ebd.zip