about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2019-07-26 16:57:54 +0000
committerbors <bors@rust-lang.org>2019-07-26 16:57:54 +0000
commitc43753f910aae000f8bcb0a502407ea332afc74b (patch)
treead563761b27efd2cbab0ade95c5139aaccb93494 /src/libsyntax
parent1a563362865e6051d4c350544131228e8eff5138 (diff)
parent232d27c306d76d2f973c88b0e0d1883aac8717f4 (diff)
downloadrust-c43753f910aae000f8bcb0a502407ea332afc74b.tar.gz
rust-c43753f910aae000f8bcb0a502407ea332afc74b.zip
Auto merge of #63015 - Centril:rollup-ydhpcas, r=Centril
Rollup of 22 pull requests

Successful merges:

 - #62084 (allow clippy::unreadable_literal in unicode tables)
 - #62120 (Add missing type links in documentation)
 - #62310 (Add missing doc links in boxed module)
 - #62421 (Introduce `as_deref` to Option)
 - #62583 (Implement Unpin for all raw pointers)
 - #62692 (rustc: precompute the largest Niche and store it in LayoutDetails.)
 - #62801 (Remove support for -Zlower-128bit-ops)
 - #62828 (Remove vector fadd/fmul reduction workarounds)
 - #62862 (code cleanup)
 - #62904 (Disable d32 on armv6 hf targets)
 - #62907 (Initialize the MSP430 AsmParser)
 - #62956 (Implement slow-path for FirstSets::first)
 - #62963 (Allow lexer to recover from some homoglyphs)
 - #62964 (clarify and unify some type test names)
 - #62970 (ci: gate toolstate repo pushes on the TOOLSTATE_PUBLISH envvar)
 - #62980 (std: Add more accessors for `Metadata` on Windows)
 - #62983 (Remove needless indirection through Rc)
 - #62985 (librustc_errors: Support ui-testing flag in annotate-snippet emitter)
 - #63002 (error_index_generator should output stdout/stderr when it panics.)
 - #63004 (Add test for issue-54062)
 - #63007 (ci: debug network failures while downloading awscli from PyPI)
 - #63009 (Remove redundant `mut` from variable declaration.)

Failed merges:

r? @ghost
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/ext/tt/macro_parser.rs8
-rw-r--r--src/libsyntax/ext/tt/macro_rules.rs57
-rw-r--r--src/libsyntax/ext/tt/transcribe.rs24
-rw-r--r--src/libsyntax/parse/lexer/mod.rs14
-rw-r--r--src/libsyntax/parse/lexer/unicode_chars.rs73
5 files changed, 94 insertions, 82 deletions
diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs
index ae1979540ff..dbf14daa30e 100644
--- a/src/libsyntax/ext/tt/macro_parser.rs
+++ b/src/libsyntax/ext/tt/macro_parser.rs
@@ -92,7 +92,6 @@ use rustc_data_structures::sync::Lrc;
 use std::collections::hash_map::Entry::{Occupied, Vacant};
 use std::mem;
 use std::ops::{Deref, DerefMut};
-use std::rc::Rc;
 
 // To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body.
 
@@ -280,7 +279,7 @@ pub enum ParseResult<T> {
 
 /// A `ParseResult` where the `Success` variant contains a mapping of `Ident`s to `NamedMatch`es.
 /// This represents the mapping of metavars to the token trees they bind to.
-pub type NamedParseResult = ParseResult<FxHashMap<Ident, Rc<NamedMatch>>>;
+pub type NamedParseResult = ParseResult<FxHashMap<Ident, NamedMatch>>;
 
 /// Count how many metavars are named in the given matcher `ms`.
 pub fn count_names(ms: &[TokenTree]) -> usize {
@@ -373,7 +372,7 @@ fn nameize<I: Iterator<Item = NamedMatch>>(
         sess: &ParseSess,
         m: &TokenTree,
         res: &mut I,
-        ret_val: &mut FxHashMap<Ident, Rc<NamedMatch>>,
+        ret_val: &mut FxHashMap<Ident, NamedMatch>,
     ) -> Result<(), (syntax_pos::Span, String)> {
         match *m {
             TokenTree::Sequence(_, ref seq) => for next_m in &seq.tts {
@@ -390,8 +389,7 @@ fn nameize<I: Iterator<Item = NamedMatch>>(
             TokenTree::MetaVarDecl(sp, bind_name, _) => {
                 match ret_val.entry(bind_name) {
                     Vacant(spot) => {
-                        // FIXME(simulacrum): Don't construct Rc here
-                        spot.insert(Rc::new(res.next().unwrap()));
+                        spot.insert(res.next().unwrap());
                     }
                     Occupied(..) => {
                         return Err((sp, format!("duplicated bind name: {}", bind_name)))
diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs
index 4503cea0f10..817d8547e87 100644
--- a/src/libsyntax/ext/tt/macro_rules.rs
+++ b/src/libsyntax/ext/tt/macro_rules.rs
@@ -308,7 +308,7 @@ pub fn compile(
     let mut valid = true;
 
     // Extract the arguments:
-    let lhses = match *argument_map[&lhs_nm] {
+    let lhses = match argument_map[&lhs_nm] {
         MatchedSeq(ref s, _) => s
             .iter()
             .map(|m| {
@@ -335,7 +335,7 @@ pub fn compile(
         _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs"),
     };
 
-    let rhses = match *argument_map[&rhs_nm] {
+    let rhses = match argument_map[&rhs_nm] {
         MatchedSeq(ref s, _) => s
             .iter()
             .map(|m| {
@@ -625,38 +625,37 @@ impl FirstSets {
                     return first;
                 }
                 TokenTree::Sequence(sp, ref seq_rep) => {
-                    match self.first.get(&sp.entire()) {
-                        Some(&Some(ref subfirst)) => {
-                            // If the sequence contents can be empty, then the first
-                            // token could be the separator token itself.
-
-                            if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) {
-                                first.add_one_maybe(TokenTree::Token(sep.clone()));
-                            }
-
-                            assert!(first.maybe_empty);
-                            first.add_all(subfirst);
-                            if subfirst.maybe_empty
-                                || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrMore
-                                || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrOne
-                            {
-                                // continue scanning for more first
-                                // tokens, but also make sure we
-                                // restore empty-tracking state
-                                first.maybe_empty = true;
-                                continue;
-                            } else {
-                                return first;
-                            }
-                        }
-
+                    let subfirst_owned;
+                    let subfirst = match self.first.get(&sp.entire()) {
+                        Some(&Some(ref subfirst)) => subfirst,
                         Some(&None) => {
-                            panic!("assume all sequences have (unique) spans for now");
+                            subfirst_owned = self.first(&seq_rep.tts[..]);
+                            &subfirst_owned
                         }
-
                         None => {
                             panic!("We missed a sequence during FirstSets construction");
                         }
+                    };
+
+                    // If the sequence contents can be empty, then the first
+                    // token could be the separator token itself.
+                    if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) {
+                        first.add_one_maybe(TokenTree::Token(sep.clone()));
+                    }
+
+                    assert!(first.maybe_empty);
+                    first.add_all(subfirst);
+                    if subfirst.maybe_empty
+                        || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrMore
+                        || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrOne
+                    {
+                        // Continue scanning for more first
+                        // tokens, but also make sure we
+                        // restore empty-tracking state.
+                        first.maybe_empty = true;
+                        continue;
+                    } else {
+                        return first;
                     }
                 }
             }
diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs
index fa93c5a904e..214e721fd15 100644
--- a/src/libsyntax/ext/tt/transcribe.rs
+++ b/src/libsyntax/ext/tt/transcribe.rs
@@ -12,7 +12,6 @@ use smallvec::{smallvec, SmallVec};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
 use std::mem;
-use std::rc::Rc;
 
 /// An iterator over the token trees in a delimited token tree (`{ ... }`) or a sequence (`$(...)`).
 enum Frame {
@@ -65,9 +64,9 @@ impl Iterator for Frame {
 /// `transcribe` would return a `TokenStream` containing `println!("{}", stringify!(bar));`.
 ///
 /// Along the way, we do some additional error checking.
-pub fn transcribe(
+pub(super) fn transcribe(
     cx: &ExtCtxt<'_>,
-    interp: &FxHashMap<Ident, Rc<NamedMatch>>,
+    interp: &FxHashMap<Ident, NamedMatch>,
     src: Vec<quoted::TokenTree>,
 ) -> TokenStream {
     // Nothing for us to transcribe...
@@ -212,7 +211,7 @@ pub fn transcribe(
                 // Find the matched nonterminal from the macro invocation, and use it to replace
                 // the meta-var.
                 if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
-                    if let MatchedNonterminal(ref nt) = *cur_matched {
+                    if let MatchedNonterminal(ref nt) = cur_matched {
                         // FIXME #2887: why do we apply a mark when matching a token tree meta-var
                         // (e.g. `$x:tt`), but not when we are matching any other type of token
                         // tree?
@@ -273,18 +272,17 @@ pub fn transcribe(
 /// See the definition of `repeats` in the `transcribe` function. `repeats` is used to descend
 /// into the right place in nested matchers. If we attempt to descend too far, the macro writer has
 /// made a mistake, and we return `None`.
-fn lookup_cur_matched(
+fn lookup_cur_matched<'a>(
     ident: Ident,
-    interpolations: &FxHashMap<Ident, Rc<NamedMatch>>,
+    interpolations: &'a FxHashMap<Ident, NamedMatch>,
     repeats: &[(usize, usize)],
-) -> Option<Rc<NamedMatch>> {
+) -> Option<&'a NamedMatch> {
     interpolations.get(&ident).map(|matched| {
-        let mut matched = matched.clone();
+        let mut matched = matched;
         for &(idx, _) in repeats {
-            let m = matched.clone();
-            match *m {
+            match matched {
                 MatchedNonterminal(_) => break,
-                MatchedSeq(ref ads, _) => matched = Rc::new(ads[idx].clone()),
+                MatchedSeq(ref ads, _) => matched = ads.get(idx).unwrap(),
             }
         }
 
@@ -343,7 +341,7 @@ impl LockstepIterSize {
 /// multiple nested matcher sequences.
 fn lockstep_iter_size(
     tree: &quoted::TokenTree,
-    interpolations: &FxHashMap<Ident, Rc<NamedMatch>>,
+    interpolations: &FxHashMap<Ident, NamedMatch>,
     repeats: &[(usize, usize)],
 ) -> LockstepIterSize {
     use quoted::TokenTree;
@@ -360,7 +358,7 @@ fn lockstep_iter_size(
         }
         TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => {
             match lookup_cur_matched(name, interpolations, repeats) {
-                Some(matched) => match *matched {
+                Some(matched) => match matched {
                     MatchedNonterminal(_) => LockstepIterSize::Unconstrained,
                     MatchedSeq(ref ads, _) => LockstepIterSize::Constraint(ads.len(), name),
                 },
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index b97801a50d4..52f65e1b474 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -389,8 +389,18 @@ impl<'a> StringReader<'a> {
                                                           self.pos,
                                                           "unknown start of token",
                                                           c);
-                unicode_chars::check_for_substitution(self, start, c, &mut err);
-                return Err(err)
+                // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
+                // instead of keeping a table in `check_for_substitution`into the token. Ideally,
+                // this should be inside `rustc_lexer`. However, we should first remove compound
+                // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
+                // as there will be less overall work to do this way.
+                return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
+                    Some(token) => {
+                        err.emit();
+                        Ok(token)
+                    }
+                    None => Err(err),
+                }
             }
         };
         Ok(kind)
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
index b728a9e1988..eaa736c6a35 100644
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ b/src/libsyntax/parse/lexer/unicode_chars.rs
@@ -3,7 +3,8 @@
 
 use super::StringReader;
 use errors::{Applicability, DiagnosticBuilder};
-use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
+use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw};
+use crate::parse::token;
 
 #[rustfmt::skip] // for line breaks
 const UNICODE_ARRAY: &[(char, &str, char)] = &[
@@ -297,32 +298,38 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[
     ('>', "Fullwidth Greater-Than Sign", '>'),
 ];
 
-const ASCII_ARRAY: &[(char, &str)] = &[
-    (' ', "Space"),
-    ('_', "Underscore"),
-    ('-', "Minus/Hyphen"),
-    (',', "Comma"),
-    (';', "Semicolon"),
-    (':', "Colon"),
-    ('!', "Exclamation Mark"),
-    ('?', "Question Mark"),
-    ('.', "Period"),
-    ('\'', "Single Quote"),
-    ('"', "Quotation Mark"),
-    ('(', "Left Parenthesis"),
-    (')', "Right Parenthesis"),
-    ('[', "Left Square Bracket"),
-    (']', "Right Square Bracket"),
-    ('{', "Left Curly Brace"),
-    ('}', "Right Curly Brace"),
-    ('*', "Asterisk"),
-    ('/', "Slash"),
-    ('\\', "Backslash"),
-    ('&', "Ampersand"),
-    ('+', "Plus Sign"),
-    ('<', "Less-Than Sign"),
-    ('=', "Equals Sign"),
-    ('>', "Greater-Than Sign"),
+// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
+// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
+// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
+// fancier error recovery to it, as there will be less overall work to do this way.
+const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
+    (' ', "Space", Some(token::Whitespace)),
+    ('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
+    ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
+    (',', "Comma", Some(token::Comma)),
+    (';', "Semicolon", Some(token::Semi)),
+    (':', "Colon", Some(token::Colon)),
+    ('!', "Exclamation Mark", Some(token::Not)),
+    ('?', "Question Mark", Some(token::Question)),
+    ('.', "Period", Some(token::Dot)),
+    ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))),
+    (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))),
+    ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))),
+    (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))),
+    ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))),
+    ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))),
+    ('*', "Asterisk", Some(token::BinOp(token::Star))),
+    ('/', "Slash", Some(token::BinOp(token::Slash))),
+    ('\\', "Backslash", None),
+    ('&', "Ampersand", Some(token::BinOp(token::And))),
+    ('+', "Plus Sign", Some(token::BinOp(token::Plus))),
+    ('<', "Less-Than Sign", Some(token::Lt)),
+    ('=', "Equals Sign", Some(token::Eq)),
+    ('>', "Greater-Than Sign", Some(token::Gt)),
+    // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
+    // spitting the correct token out.
+    ('\'', "Single Quote", None),
+    ('"', "Quotation Mark", None),
 ];
 
 crate fn check_for_substitution<'a>(
@@ -330,20 +337,20 @@ crate fn check_for_substitution<'a>(
     pos: BytePos,
     ch: char,
     err: &mut DiagnosticBuilder<'a>,
-) -> bool {
+) -> Option<token::TokenKind> {
     let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) {
         Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char),
-        None => return false,
+        None => return None,
     };
 
     let span = Span::new(pos, pos + Pos::from_usize(ch.len_utf8()), NO_EXPANSION);
 
-    let ascii_name = match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) {
-        Some((_ascii_char, ascii_name)) => ascii_name,
+    let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) {
+        Some((_ascii_char, ascii_name, token)) => (ascii_name, token),
         None => {
             let msg = format!("substitution character not found for '{}'", ch);
             reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
-            return false;
+            return None;
         }
     };
 
@@ -371,7 +378,7 @@ crate fn check_for_substitution<'a>(
         );
         err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect);
     }
-    true
+    token.clone()
 }
 
 /// Extract string if found at current position with given delimiters