move syntax::parse -> librustc_parse

also move MACRO_ARGUMENTS -> librustc_parse
author: Mazdak Farrokhzad <twingoow@gmail.com> 2019-10-15 22:48:13 +0200
committer: Mazdak Farrokhzad <twingoow@gmail.com> 2019-11-10 03:57:18 +0100
commit: 4ae2728fa8052915414127dce28245eb8f70842a (patch)
tree: 27cc54d90904091e4dc9bf7ae5fa3b41be4b6187 /src/libsyntax/parse/lexer
parent: be023ebe850261c6bb202a02a686827d821c3697 (diff)
download: rust-4ae2728fa8052915414127dce28245eb8f70842a.tar.gz
rust-4ae2728fa8052915414127dce28245eb8f70842a.zip
4 files changed, 0 insertions, 1530 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
deleted file mode 100644
index f2d5ff3440e..00000000000
--- a/src/libsyntax/parse/lexer/mod.rs
+++ /dev/null
@@ -1,643 +0,0 @@
-use crate::token::{self, Token, TokenKind};
-use crate::sess::ParseSess;
-use crate::symbol::{sym, Symbol};
-use crate::util::comments;
-
-use errors::{FatalError, DiagnosticBuilder};
-use syntax_pos::{BytePos, Pos, Span};
-use rustc_lexer::Base;
-use rustc_lexer::unescape;
-
-use std::char;
-use std::convert::TryInto;
-use rustc_data_structures::sync::Lrc;
-use log::debug;
-
-mod tokentrees;
-mod unicode_chars;
-mod unescape_error_reporting;
-use unescape_error_reporting::{emit_unescape_error, push_escaped_char};
-
-#[derive(Clone, Debug)]
-pub struct UnmatchedBrace {
-    pub expected_delim: token::DelimToken,
-    pub found_delim: Option<token::DelimToken>,
-    pub found_span: Span,
-    pub unclosed_span: Option<Span>,
-    pub candidate_span: Option<Span>,
-}
-
-pub struct StringReader<'a> {
-    sess: &'a ParseSess,
-    /// Initial position, read-only.
-    start_pos: BytePos,
-    /// The absolute offset within the source_map of the current character.
-    // FIXME(#64197): `pub` is needed by tests for now.
-    pub pos: BytePos,
-    /// Stop reading src at this index.
-    end_src_index: usize,
-    /// Source text to tokenize.
-    src: Lrc<String>,
-    override_span: Option<Span>,
-}
-
-impl<'a> StringReader<'a> {
-    pub fn new(sess: &'a ParseSess,
-               source_file: Lrc<syntax_pos::SourceFile>,
-               override_span: Option<Span>) -> Self {
-        if source_file.src.is_none() {
-            sess.span_diagnostic.bug(&format!("cannot lex `source_file` without source: {}",
-                                              source_file.name));
-        }
-
-        let src = (*source_file.src.as_ref().unwrap()).clone();
-
-        StringReader {
-            sess,
-            start_pos: source_file.start_pos,
-            pos: source_file.start_pos,
-            end_src_index: src.len(),
-            src,
-            override_span,
-        }
-    }
-
-    pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
-        let begin = sess.source_map().lookup_byte_offset(span.lo());
-        let end = sess.source_map().lookup_byte_offset(span.hi());
-
-        // Make the range zero-length if the span is invalid.
-        if begin.sf.start_pos != end.sf.start_pos {
-            span = span.shrink_to_lo();
-        }
-
-        let mut sr = StringReader::new(sess, begin.sf, None);
-
-        // Seek the lexer to the right byte range.
-        sr.end_src_index = sr.src_index(span.hi());
-
-        sr
-    }
-
-
-    fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
-        self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
-    }
-
-    /// Returns the next token, including trivia like whitespace or comments.
-    ///
-    /// `Err(())` means that some errors were encountered, which can be
-    /// retrieved using `buffer_fatal_errors`.
-    pub fn next_token(&mut self) -> Token {
-        let start_src_index = self.src_index(self.pos);
-        let text: &str = &self.src[start_src_index..self.end_src_index];
-
-        if text.is_empty() {
-            let span = self.mk_sp(self.pos, self.pos);
-            return Token::new(token::Eof, span);
-        }
-
-        {
-            let is_beginning_of_file = self.pos == self.start_pos;
-            if is_beginning_of_file {
-                if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
-                    let start = self.pos;
-                    self.pos = self.pos + BytePos::from_usize(shebang_len);
-
-                    let sym = self.symbol_from(start + BytePos::from_usize("#!".len()));
-                    let kind = token::Shebang(sym);
-
-                    let span = self.mk_sp(start, self.pos);
-                    return Token::new(kind, span);
-                }
-            }
-        }
-
-        let token = rustc_lexer::first_token(text);
-
-        let start = self.pos;
-        self.pos = self.pos + BytePos::from_usize(token.len);
-
-        debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start));
-
-        // This could use `?`, but that makes code significantly (10-20%) slower.
-        // https://github.com/rust-lang/rust/issues/37939
-        let kind = self.cook_lexer_token(token.kind, start);
-
-        let span = self.mk_sp(start, self.pos);
-        Token::new(kind, span)
-    }
-
-    /// Report a fatal lexical error with a given span.
-    fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
-        self.sess.span_diagnostic.span_fatal(sp, m)
-    }
-
-    /// Report a lexical error with a given span.
-    fn err_span(&self, sp: Span, m: &str) {
-        self.sess.span_diagnostic.struct_span_err(sp, m).emit();
-    }
-
-
-    /// Report a fatal error spanning [`from_pos`, `to_pos`).
-    fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
-        self.fatal_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
-    /// Report a lexical error spanning [`from_pos`, `to_pos`).
-    fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
-        self.err_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
-    fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str)
-        -> DiagnosticBuilder<'a>
-    {
-        self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m)
-    }
-
-    fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
-        -> DiagnosticBuilder<'a>
-    {
-        let mut m = m.to_string();
-        m.push_str(": ");
-        push_escaped_char(&mut m, c);
-
-        self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
-    }
-
-    /// Turns simple `rustc_lexer::TokenKind` enum into a rich
-    /// `libsyntax::TokenKind`. This turns strings into interned
-    /// symbols and runs additional validation.
-    fn cook_lexer_token(
-        &self,
-        token: rustc_lexer::TokenKind,
-        start: BytePos,
-    ) -> TokenKind {
-        match token {
-            rustc_lexer::TokenKind::LineComment => {
-                let string = self.str_from(start);
-                // comments with only more "/"s are not doc comments
-                let tok = if comments::is_line_doc_comment(string) {
-                    self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment");
-                    token::DocComment(Symbol::intern(string))
-                } else {
-                    token::Comment
-                };
-
-                tok
-            }
-            rustc_lexer::TokenKind::BlockComment { terminated } => {
-                let string = self.str_from(start);
-                // block comments starting with "/**" or "/*!" are doc-comments
-                // but comments with only "*"s between two "/"s are not
-                let is_doc_comment = comments::is_block_doc_comment(string);
-
-                if !terminated {
-                    let msg = if is_doc_comment {
-                        "unterminated block doc-comment"
-                    } else {
-                        "unterminated block comment"
-                    };
-                    let last_bpos = self.pos;
-                    self.fatal_span_(start, last_bpos, msg).raise();
-                }
-
-                let tok = if is_doc_comment {
-                    self.forbid_bare_cr(start,
-                                        string,
-                                        "bare CR not allowed in block doc-comment");
-                    token::DocComment(Symbol::intern(string))
-                } else {
-                    token::Comment
-                };
-
-                tok
-            }
-            rustc_lexer::TokenKind::Whitespace => token::Whitespace,
-            rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
-                let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
-                let mut ident_start = start;
-                if is_raw_ident {
-                    ident_start = ident_start + BytePos(2);
-                }
-                // FIXME: perform NFKC normalization here. (Issue #2253)
-                let sym = self.symbol_from(ident_start);
-                if is_raw_ident {
-                    let span = self.mk_sp(start, self.pos);
-                    if !sym.can_be_raw() {
-                        self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
-                    }
-                    self.sess.raw_identifier_spans.borrow_mut().push(span);
-                }
-                token::Ident(sym, is_raw_ident)
-            }
-            rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
-                let suffix_start = start + BytePos(suffix_start as u32);
-                let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
-                let suffix = if suffix_start < self.pos {
-                    let string = self.str_from(suffix_start);
-                    if string == "_" {
-                        self.sess.span_diagnostic
-                            .struct_span_warn(self.mk_sp(suffix_start, self.pos),
-                                              "underscore literal suffix is not allowed")
-                            .warn("this was previously accepted by the compiler but is \
-                                   being phased out; it will become a hard error in \
-                                   a future release!")
-                            .note("for more information, see issue #42326 \
-                                   <https://github.com/rust-lang/rust/issues/42326>")
-                            .emit();
-                        None
-                    } else {
-                        Some(Symbol::intern(string))
-                    }
-                } else {
-                    None
-                };
-                token::Literal(token::Lit { kind, symbol, suffix })
-            }
-            rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
-                // Include the leading `'` in the real identifier, for macro
-                // expansion purposes. See #12512 for the gory details of why
-                // this is necessary.
-                let lifetime_name = self.str_from(start);
-                if starts_with_number {
-                    self.err_span_(
-                        start,
-                        self.pos,
-                        "lifetimes cannot start with a number",
-                    );
-                }
-                let ident = Symbol::intern(lifetime_name);
-                token::Lifetime(ident)
-            }
-            rustc_lexer::TokenKind::Semi => token::Semi,
-            rustc_lexer::TokenKind::Comma => token::Comma,
-            rustc_lexer::TokenKind::Dot => token::Dot,
-            rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
-            rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
-            rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace),
-            rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace),
-            rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket),
-            rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket),
-            rustc_lexer::TokenKind::At => token::At,
-            rustc_lexer::TokenKind::Pound => token::Pound,
-            rustc_lexer::TokenKind::Tilde => token::Tilde,
-            rustc_lexer::TokenKind::Question => token::Question,
-            rustc_lexer::TokenKind::Colon => token::Colon,
-            rustc_lexer::TokenKind::Dollar => token::Dollar,
-            rustc_lexer::TokenKind::Eq => token::Eq,
-            rustc_lexer::TokenKind::Not => token::Not,
-            rustc_lexer::TokenKind::Lt => token::Lt,
-            rustc_lexer::TokenKind::Gt => token::Gt,
-            rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
-            rustc_lexer::TokenKind::And => token::BinOp(token::And),
-            rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
-            rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
-            rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
-            rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
-            rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
-            rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
-
-            rustc_lexer::TokenKind::Unknown => {
-                let c = self.str_from(start).chars().next().unwrap();
-                let mut err = self.struct_fatal_span_char(start,
-                                                          self.pos,
-                                                          "unknown start of token",
-                                                          c);
-                // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
-                // instead of keeping a table in `check_for_substitution`into the token. Ideally,
-                // this should be inside `rustc_lexer`. However, we should first remove compound
-                // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
-                // as there will be less overall work to do this way.
-                let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
-                    .unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
-                err.emit();
-                token
-            }
-        }
-    }
-
-    fn cook_lexer_literal(
-        &self,
-        start: BytePos,
-        suffix_start: BytePos,
-        kind: rustc_lexer::LiteralKind
-    ) -> (token::LitKind, Symbol) {
-        match kind {
-            rustc_lexer::LiteralKind::Char { terminated } => {
-                if !terminated {
-                    self.fatal_span_(start, suffix_start,
-                                     "unterminated character literal".into())
-                        .raise()
-                }
-                let content_start = start + BytePos(1);
-                let content_end = suffix_start - BytePos(1);
-                self.validate_char_escape(content_start, content_end);
-                let id = self.symbol_from_to(content_start, content_end);
-                (token::Char, id)
-            },
-            rustc_lexer::LiteralKind::Byte { terminated } => {
-                if !terminated {
-                    self.fatal_span_(start + BytePos(1), suffix_start,
-                                     "unterminated byte constant".into())
-                        .raise()
-                }
-                let content_start = start + BytePos(2);
-                let content_end = suffix_start - BytePos(1);
-                self.validate_byte_escape(content_start, content_end);
-                let id = self.symbol_from_to(content_start, content_end);
-                (token::Byte, id)
-            },
-            rustc_lexer::LiteralKind::Str { terminated } => {
-                if !terminated {
-                    self.fatal_span_(start, suffix_start,
-                                     "unterminated double quote string".into())
-                        .raise()
-                }
-                let content_start = start + BytePos(1);
-                let content_end = suffix_start - BytePos(1);
-                self.validate_str_escape(content_start, content_end);
-                let id = self.symbol_from_to(content_start, content_end);
-                (token::Str, id)
-            }
-            rustc_lexer::LiteralKind::ByteStr { terminated } => {
-                if !terminated {
-                    self.fatal_span_(start + BytePos(1), suffix_start,
-                                     "unterminated double quote byte string".into())
-                        .raise()
-                }
-                let content_start = start + BytePos(2);
-                let content_end = suffix_start - BytePos(1);
-                self.validate_byte_str_escape(content_start, content_end);
-                let id = self.symbol_from_to(content_start, content_end);
-                (token::ByteStr, id)
-            }
-            rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => {
-                if !started {
-                    self.report_non_started_raw_string(start);
-                }
-                if !terminated {
-                    self.report_unterminated_raw_string(start, n_hashes)
-                }
-                let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes);
-                let n = u32::from(n_hashes);
-                let content_start = start + BytePos(2 + n);
-                let content_end = suffix_start - BytePos(1 + n);
-                self.validate_raw_str_escape(content_start, content_end);
-                let id = self.symbol_from_to(content_start, content_end);
-                (token::StrRaw(n_hashes), id)
-            }
-            rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => {
-                if !started {
-                    self.report_non_started_raw_string(start);
-                }
-                if !terminated {
-                    self.report_unterminated_raw_string(start, n_hashes)
-                }
-                let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes);
-                let n = u32::from(n_hashes);
-                let content_start = start + BytePos(3 + n);
-                let content_end = suffix_start - BytePos(1 + n);
-                self.validate_raw_byte_str_escape(content_start, content_end);
-                let id = self.symbol_from_to(content_start, content_end);
-                (token::ByteStrRaw(n_hashes), id)
-            }
-            rustc_lexer::LiteralKind::Int { base, empty_int } => {
-                if empty_int {
-                    self.err_span_(start, suffix_start, "no valid digits found for number");
-                    (token::Integer, sym::integer(0))
-                } else {
-                    self.validate_int_literal(base, start, suffix_start);
-                    (token::Integer, self.symbol_from_to(start, suffix_start))
-                }
-            },
-            rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
-                if empty_exponent {
-                    let mut err = self.struct_span_fatal(
-                        start, self.pos,
-                        "expected at least one digit in exponent"
-                    );
-                    err.emit();
-                }
-
-                match base {
-                    Base::Hexadecimal => {
-                        self.err_span_(start, suffix_start,
-                                       "hexadecimal float literal is not supported")
-                    }
-                    Base::Octal => {
-                        self.err_span_(start, suffix_start,
-                                       "octal float literal is not supported")
-                    }
-                    Base::Binary => {
-                        self.err_span_(start, suffix_start,
-                                       "binary float literal is not supported")
-                    }
-                    _ => ()
-                }
-
-                let id = self.symbol_from_to(start, suffix_start);
-                (token::Float, id)
-            },
-        }
-    }
-
-    #[inline]
-    fn src_index(&self, pos: BytePos) -> usize {
-        (pos - self.start_pos).to_usize()
-    }
-
-    /// Slice of the source text from `start` up to but excluding `self.pos`,
-    /// meaning the slice does not include the character `self.ch`.
-    fn str_from(&self, start: BytePos) -> &str
-    {
-        self.str_from_to(start, self.pos)
-    }
-
-    /// Creates a Symbol from a given offset to the current offset.
-    fn symbol_from(&self, start: BytePos) -> Symbol {
-        debug!("taking an ident from {:?} to {:?}", start, self.pos);
-        Symbol::intern(self.str_from(start))
-    }
-
-    /// As symbol_from, with an explicit endpoint.
-    fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
-        debug!("taking an ident from {:?} to {:?}", start, end);
-        Symbol::intern(self.str_from_to(start, end))
-    }
-
-    /// Slice of the source text spanning from `start` up to but excluding `end`.
-    fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
-    {
-        &self.src[self.src_index(start)..self.src_index(end)]
-    }
-
-    fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) {
-        let mut idx = 0;
-        loop {
-            idx = match s[idx..].find('\r') {
-                None => break,
-                Some(it) => idx + it + 1
-            };
-            self.err_span_(start + BytePos(idx as u32 - 1),
-                           start + BytePos(idx as u32),
-                           errmsg);
-        }
-    }
-
-    fn report_non_started_raw_string(&self, start: BytePos) -> ! {
-        let bad_char = self.str_from(start).chars().last().unwrap();
-        self
-            .struct_fatal_span_char(
-                start,
-                self.pos,
-                "found invalid character; only `#` is allowed \
-                 in raw string delimitation",
-                bad_char,
-            )
-            .emit();
-        FatalError.raise()
-    }
-
-    fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! {
-        let mut err = self.struct_span_fatal(
-            start, start,
-            "unterminated raw string",
-        );
-        err.span_label(
-            self.mk_sp(start, start),
-            "unterminated raw string",
-        );
-
-        if n_hashes > 0 {
-            err.note(&format!("this raw string should be terminated with `\"{}`",
-                                "#".repeat(n_hashes as usize)));
-        }
-
-        err.emit();
-        FatalError.raise()
-    }
-
-    fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 {
-        match n_hashes.try_into() {
-            Ok(n_hashes) => n_hashes,
-            Err(_) => {
-                self.fatal_span_(start,
-                                 self.pos,
-                                 "too many `#` symbols: raw strings may be \
-                                  delimited by up to 65535 `#` symbols").raise();
-            }
-        }
-    }
-
-    fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) {
-        let lit = self.str_from_to(content_start, content_end);
-        if let Err((off, err)) = unescape::unescape_char(lit) {
-            emit_unescape_error(
-                &self.sess.span_diagnostic,
-                lit,
-                self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
-                unescape::Mode::Char,
-                0..off,
-                err,
-            )
-        }
-    }
-
-    fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) {
-        let lit = self.str_from_to(content_start, content_end);
-        if let Err((off, err)) = unescape::unescape_byte(lit) {
-            emit_unescape_error(
-                &self.sess.span_diagnostic,
-                lit,
-                self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
-                unescape::Mode::Byte,
-                0..off,
-                err,
-            )
-        }
-    }
-
-    fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) {
-        let lit = self.str_from_to(content_start, content_end);
-        unescape::unescape_str(lit, &mut |range, c| {
-            if let Err(err) = c {
-                emit_unescape_error(
-                    &self.sess.span_diagnostic,
-                    lit,
-                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
-                    unescape::Mode::Str,
-                    range,
-                    err,
-                )
-            }
-        })
-    }
-
-    fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
-        let lit = self.str_from_to(content_start, content_end);
-        unescape::unescape_raw_str(lit, &mut |range, c| {
-            if let Err(err) = c {
-                emit_unescape_error(
-                    &self.sess.span_diagnostic,
-                    lit,
-                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
-                    unescape::Mode::Str,
-                    range,
-                    err,
-                )
-            }
-        })
-    }
-
-    fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
-        let lit = self.str_from_to(content_start, content_end);
-        unescape::unescape_raw_byte_str(lit, &mut |range, c| {
-            if let Err(err) = c {
-                emit_unescape_error(
-                    &self.sess.span_diagnostic,
-                    lit,
-                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
-                    unescape::Mode::ByteStr,
-                    range,
-                    err,
-                )
-            }
-        })
-    }
-
-    fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
-        let lit = self.str_from_to(content_start, content_end);
-        unescape::unescape_byte_str(lit, &mut |range, c| {
-            if let Err(err) = c {
-                emit_unescape_error(
-                    &self.sess.span_diagnostic,
-                    lit,
-                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
-                    unescape::Mode::ByteStr,
-                    range,
-                    err,
-                )
-            }
-        })
-    }
-
-    fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) {
-        let base = match base {
-            Base::Binary => 2,
-            Base::Octal => 8,
-            _ => return,
-        };
-        let s = self.str_from_to(content_start + BytePos(2), content_end);
-        for (idx, c) in s.char_indices() {
-            let idx = idx as u32;
-            if c != '_' && c.to_digit(base).is_none() {
-                let lo = content_start + BytePos(2 + idx);
-                let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32);
-                self.err_span_(lo, hi,
-                               &format!("invalid digit for a base {} literal", base));
-
-            }
-        }
-    }
-}
diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs
deleted file mode 100644
index 2b056434d4d..00000000000
--- a/src/libsyntax/parse/lexer/tokentrees.rs
+++ /dev/null
@@ -1,280 +0,0 @@
-use rustc_data_structures::fx::FxHashMap;
-use syntax_pos::Span;
-
-use super::{StringReader, UnmatchedBrace};
-
-use crate::print::pprust::token_to_string;
-use crate::token::{self, Token};
-use crate::tokenstream::{DelimSpan, IsJoint::{self, *}, TokenStream, TokenTree, TreeAndJoint};
-
-use errors::PResult;
-
-impl<'a> StringReader<'a> {
-    crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
-        let mut tt_reader = TokenTreesReader {
-            string_reader: self,
-            token: Token::dummy(),
-            joint_to_prev: Joint,
-            open_braces: Vec::new(),
-            unmatched_braces: Vec::new(),
-            matching_delim_spans: Vec::new(),
-            last_unclosed_found_span: None,
-            last_delim_empty_block_spans: FxHashMap::default()
-        };
-        let res = tt_reader.parse_all_token_trees();
-        (res, tt_reader.unmatched_braces)
-    }
-}
-
-struct TokenTreesReader<'a> {
-    string_reader: StringReader<'a>,
-    token: Token,
-    joint_to_prev: IsJoint,
-    /// Stack of open delimiters and their spans. Used for error message.
-    open_braces: Vec<(token::DelimToken, Span)>,
-    unmatched_braces: Vec<UnmatchedBrace>,
-    /// The type and spans for all braces
-    ///
-    /// Used only for error recovery when arriving to EOF with mismatched braces.
-    matching_delim_spans: Vec<(token::DelimToken, Span, Span)>,
-    last_unclosed_found_span: Option<Span>,
-    last_delim_empty_block_spans: FxHashMap<token::DelimToken, Span>
-}
-
-impl<'a> TokenTreesReader<'a> {
-    // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`.
-    fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> {
-        let mut buf = TokenStreamBuilder::default();
-
-        self.real_token();
-        while self.token != token::Eof {
-            buf.push(self.parse_token_tree()?);
-        }
-
-        Ok(buf.into_token_stream())
-    }
-
-    // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`.
-    fn parse_token_trees_until_close_delim(&mut self) -> TokenStream {
-        let mut buf = TokenStreamBuilder::default();
-        loop {
-            if let token::CloseDelim(..) = self.token.kind {
-                return buf.into_token_stream();
-            }
-
-            match self.parse_token_tree() {
-                Ok(tree) => buf.push(tree),
-                Err(mut e) => {
-                    e.emit();
-                    return buf.into_token_stream();
-                }
-            }
-        }
-    }
-
-    fn parse_token_tree(&mut self) -> PResult<'a, TreeAndJoint> {
-        let sm = self.string_reader.sess.source_map();
-        match self.token.kind {
-            token::Eof => {
-                let msg = "this file contains an un-closed delimiter";
-                let mut err = self.string_reader.sess.span_diagnostic
-                    .struct_span_err(self.token.span, msg);
-                for &(_, sp) in &self.open_braces {
-                    err.span_label(sp, "un-closed delimiter");
-                    self.unmatched_braces.push(UnmatchedBrace {
-                        expected_delim: token::DelimToken::Brace,
-                        found_delim: None,
-                        found_span: self.token.span,
-                        unclosed_span: Some(sp),
-                        candidate_span: None,
-                    });
-                }
-
-                if let Some((delim, _)) = self.open_braces.last() {
-                    if let Some((_, open_sp, close_sp)) = self.matching_delim_spans.iter()
-                        .filter(|(d, open_sp, close_sp)| {
-                            if let Some(close_padding) = sm.span_to_margin(*close_sp) {
-                                if let Some(open_padding) = sm.span_to_margin(*open_sp) {
-                                    return delim == d && close_padding != open_padding;
-                                }
-                            }
-                            false
-                        }).next()  // these are in reverse order as they get inserted on close, but
-                    {              // we want the last open/first close
-                        err.span_label(
-                            *open_sp,
-                            "this delimiter might not be properly closed...",
-                        );
-                        err.span_label(
-                            *close_sp,
-                            "...as it matches this but it has different indentation",
-                        );
-                    }
-                }
-                Err(err)
-            },
-            token::OpenDelim(delim) => {
-                // The span for beginning of the delimited section
-                let pre_span = self.token.span;
-
-                // Parse the open delimiter.
-                self.open_braces.push((delim, self.token.span));
-                self.real_token();
-
-                // Parse the token trees within the delimiters.
-                // We stop at any delimiter so we can try to recover if the user
-                // uses an incorrect delimiter.
-                let tts = self.parse_token_trees_until_close_delim();
-
-                // Expand to cover the entire delimited token tree
-                let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
-
-                match self.token.kind {
-                    // Correct delimiter.
-                    token::CloseDelim(d) if d == delim => {
-                        let (open_brace, open_brace_span) = self.open_braces.pop().unwrap();
-                        let close_brace_span = self.token.span;
-
-                        if tts.is_empty() {
-                            let empty_block_span = open_brace_span.to(close_brace_span);
-                            self.last_delim_empty_block_spans.insert(delim, empty_block_span);
-                        }
-
-                        if self.open_braces.len() == 0 {
-                            // Clear up these spans to avoid suggesting them as we've found
-                            // properly matched delimiters so far for an entire block.
-                            self.matching_delim_spans.clear();
-                        } else {
-                            self.matching_delim_spans.push(
-                                (open_brace, open_brace_span, close_brace_span),
-                            );
-                        }
-                        // Parse the close delimiter.
-                        self.real_token();
-                    }
-                    // Incorrect delimiter.
-                    token::CloseDelim(other) => {
-                        let mut unclosed_delimiter = None;
-                        let mut candidate = None;
-                        if self.last_unclosed_found_span != Some(self.token.span) {
-                            // do not complain about the same unclosed delimiter multiple times
-                            self.last_unclosed_found_span = Some(self.token.span);
-                            // This is a conservative error: only report the last unclosed
-                            // delimiter. The previous unclosed delimiters could actually be
-                            // closed! The parser just hasn't gotten to them yet.
-                            if let Some(&(_, sp)) = self.open_braces.last() {
-                                unclosed_delimiter = Some(sp);
-                            };
-                            if let Some(current_padding) = sm.span_to_margin(self.token.span) {
-                                for (brace, brace_span) in &self.open_braces {
-                                    if let Some(padding) = sm.span_to_margin(*brace_span) {
-                                        // high likelihood of these two corresponding
-                                        if current_padding == padding && brace == &other {
-                                            candidate = Some(*brace_span);
-                                        }
-                                    }
-                                }
-                            }
-                            let (tok, _) = self.open_braces.pop().unwrap();
-                            self.unmatched_braces.push(UnmatchedBrace {
-                                expected_delim: tok,
-                                found_delim: Some(other),
-                                found_span: self.token.span,
-                                unclosed_span: unclosed_delimiter,
-                                candidate_span: candidate,
-                            });
-                        } else {
-                            self.open_braces.pop();
-                        }
-
-                        // If the incorrect delimiter matches an earlier opening
-                        // delimiter, then don't consume it (it can be used to
-                        // close the earlier one). Otherwise, consume it.
-                        // E.g., we try to recover from:
-                        // fn foo() {
-                        //     bar(baz(
-                        // }  // Incorrect delimiter but matches the earlier `{`
-                        if !self.open_braces.iter().any(|&(b, _)| b == other) {
-                            self.real_token();
-                        }
-                    }
-                    token::Eof => {
-                        // Silently recover, the EOF token will be seen again
-                        // and an error emitted then. Thus we don't pop from
-                        // self.open_braces here.
-                    },
-                    _ => {}
-                }
-
-                Ok(TokenTree::Delimited(
-                    delim_span,
-                    delim,
-                    tts.into()
-                ).into())
-            },
-            token::CloseDelim(delim) => {
-                // An unexpected closing delimiter (i.e., there is no
-                // matching opening delimiter).
-                let token_str = token_to_string(&self.token);
-                let msg = format!("unexpected close delimiter: `{}`", token_str);
-                let mut err = self.string_reader.sess.span_diagnostic
-                    .struct_span_err(self.token.span, &msg);
-
-                if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) {
-                    err.span_label(
-                        span,
-                        "this block is empty, you might have not meant to close it"
-                    );
-                }
-                err.span_label(self.token.span, "unexpected close delimiter");
-                Err(err)
-            },
-            _ => {
-                let tt = TokenTree::Token(self.token.take());
-                self.real_token();
-                let is_joint = self.joint_to_prev == Joint && self.token.is_op();
-                Ok((tt, if is_joint { Joint } else { NonJoint }))
-            }
-        }
-    }
-
-    fn real_token(&mut self) {
-        self.joint_to_prev = Joint;
-        loop {
-            let token = self.string_reader.next_token();
-            match token.kind {
-                token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => {
-                    self.joint_to_prev = NonJoint;
-                }
-                _ => {
-                    self.token = token;
-                    return;
-                }
-            }
-        }
-    }
-}
-
-#[derive(Default)]
-struct TokenStreamBuilder {
-    buf: Vec<TreeAndJoint>,
-}
-
-impl TokenStreamBuilder {
-    fn push(&mut self, (tree, joint): TreeAndJoint) {
-        if let Some((TokenTree::Token(prev_token), Joint)) = self.buf.last() {
-            if let TokenTree::Token(token) = &tree {
-                if let Some(glued) = prev_token.glue(token) {
-                    self.buf.pop();
-                    self.buf.push((TokenTree::Token(glued), joint));
-                    return;
-                }
-            }
-        }
-        self.buf.push((tree, joint))
-    }
-
-    fn into_token_stream(self) -> TokenStream {
-        TokenStream::new(self.buf)
-    }
-}
diff --git a/src/libsyntax/parse/lexer/unescape_error_reporting.rs b/src/libsyntax/parse/lexer/unescape_error_reporting.rs
deleted file mode 100644
index 5565015179c..00000000000
--- a/src/libsyntax/parse/lexer/unescape_error_reporting.rs
+++ /dev/null
@@ -1,215 +0,0 @@
-//! Utilities for rendering escape sequence errors as diagnostics.
-
-use std::ops::Range;
-use std::iter::once;
-
-use rustc_lexer::unescape::{EscapeError, Mode};
-use syntax_pos::{Span, BytePos};
-
-use crate::errors::{Handler, Applicability};
-
-pub(crate) fn emit_unescape_error(
-    handler: &Handler,
-    // interior part of the literal, without quotes
-    lit: &str,
-    // full span of the literal, including quotes
-    span_with_quotes: Span,
-    mode: Mode,
-    // range of the error inside `lit`
-    range: Range<usize>,
-    error: EscapeError,
-) {
-    log::debug!("emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
-                lit, span_with_quotes, mode, range, error);
-    let span = {
-        let Range { start, end } = range;
-        let (start, end) = (start as u32, end as u32);
-        let lo = span_with_quotes.lo() + BytePos(start + 1);
-        let hi = lo + BytePos(end - start);
-            span_with_quotes
-            .with_lo(lo)
-            .with_hi(hi)
-    };
-    let last_char = || {
-        let c = lit[range.clone()].chars().rev().next().unwrap();
-        let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
-        (c, span)
-    };
-    match error {
-        EscapeError::LoneSurrogateUnicodeEscape => {
-            handler.struct_span_err(span, "invalid unicode character escape")
-                .help("unicode escape must not be a surrogate")
-                .emit();
-        }
-        EscapeError::OutOfRangeUnicodeEscape => {
-            handler.struct_span_err(span, "invalid unicode character escape")
-                .help("unicode escape must be at most 10FFFF")
-                .emit();
-        }
-        EscapeError::MoreThanOneChar => {
-            let msg = if mode.is_bytes() {
-                "if you meant to write a byte string literal, use double quotes"
-            } else {
-                "if you meant to write a `str` literal, use double quotes"
-            };
-
-            handler
-                .struct_span_err(
-                    span_with_quotes,
-                    "character literal may only contain one codepoint",
-                )
-                .span_suggestion(
-                    span_with_quotes,
-                    msg,
-                    format!("\"{}\"", lit),
-                    Applicability::MachineApplicable,
-                ).emit()
-        }
-        EscapeError::EscapeOnlyChar => {
-            let (c, _span) = last_char();
-
-            let mut msg = if mode.is_bytes() {
-                "byte constant must be escaped: "
-            } else {
-                "character constant must be escaped: "
-            }.to_string();
-            push_escaped_char(&mut msg, c);
-
-            handler.span_err(span, msg.as_str())
-        }
-        EscapeError::BareCarriageReturn => {
-            let msg = if mode.in_double_quotes() {
-                "bare CR not allowed in string, use \\r instead"
-            } else {
-                "character constant must be escaped: \\r"
-            };
-            handler.span_err(span, msg);
-        }
-        EscapeError::BareCarriageReturnInRawString => {
-            assert!(mode.in_double_quotes());
-            let msg = "bare CR not allowed in raw string";
-            handler.span_err(span, msg);
-        }
-        EscapeError::InvalidEscape => {
-            let (c, span) = last_char();
-
-            let label = if mode.is_bytes() {
-                "unknown byte escape"
-            } else {
-                "unknown character escape"
-            };
-            let mut msg = label.to_string();
-            msg.push_str(": ");
-            push_escaped_char(&mut msg, c);
-
-            let mut diag = handler.struct_span_err(span, msg.as_str());
-            diag.span_label(span, label);
-            if c == '{' || c == '}' && !mode.is_bytes() {
-                diag.help("if used in a formatting string, \
-                           curly braces are escaped with `{{` and `}}`");
-            } else if c == '\r' {
-                diag.help("this is an isolated carriage return; \
-                           consider checking your editor and version control settings");
-            }
-            diag.emit();
-        }
-        EscapeError::TooShortHexEscape => {
-            handler.span_err(span, "numeric character escape is too short")
-        }
-        EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
-            let (c, span) = last_char();
-
-            let mut msg = if error == EscapeError::InvalidCharInHexEscape {
-                "invalid character in numeric character escape: "
-            } else {
-                "invalid character in unicode escape: "
-            }.to_string();
-            push_escaped_char(&mut msg, c);
-
-            handler.span_err(span, msg.as_str())
-        }
-        EscapeError::NonAsciiCharInByte => {
-            assert!(mode.is_bytes());
-            let (_c, span) = last_char();
-            handler.span_err(span, "byte constant must be ASCII. \
-                                    Use a \\xHH escape for a non-ASCII byte")
-        }
-        EscapeError::NonAsciiCharInByteString => {
-            assert!(mode.is_bytes());
-            let (_c, span) = last_char();
-            handler.span_err(span, "raw byte string must be ASCII")
-        }
-        EscapeError::OutOfRangeHexEscape => {
-            handler.span_err(span, "this form of character escape may only be used \
-                                    with characters in the range [\\x00-\\x7f]")
-        }
-        EscapeError::LeadingUnderscoreUnicodeEscape => {
-            let (_c, span) = last_char();
-            handler.span_err(span, "invalid start of unicode escape")
-        }
-        EscapeError::OverlongUnicodeEscape => {
-            handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
-        }
-        EscapeError::UnclosedUnicodeEscape => {
-            handler.span_err(span, "unterminated unicode escape (needed a `}`)")
-        }
-        EscapeError::NoBraceInUnicodeEscape => {
-            let msg = "incorrect unicode escape sequence";
-            let mut diag = handler.struct_span_err(span, msg);
-
-            let mut suggestion = "\\u{".to_owned();
-            let mut suggestion_len = 0;
-            let (c, char_span) = last_char();
-            let chars = once(c).chain(lit[range.end..].chars());
-            for c in chars.take(6).take_while(|c| c.is_digit(16)) {
-                suggestion.push(c);
-                suggestion_len += c.len_utf8();
-            }
-
-            if suggestion_len > 0 {
-                suggestion.push('}');
-                let lo = char_span.lo();
-                let hi = lo + BytePos(suggestion_len as u32);
-                diag.span_suggestion(
-                    span.with_lo(lo).with_hi(hi),
-                    "format of unicode escape sequences uses braces",
-                    suggestion,
-                    Applicability::MaybeIncorrect,
-                );
-            } else {
-                diag.span_label(span, msg);
-                diag.help(
-                    "format of unicode escape sequences is `\\u{...}`",
-                );
-            }
-
-            diag.emit();
-        }
-        EscapeError::UnicodeEscapeInByte => {
-            handler.span_err(span, "unicode escape sequences cannot be used \
-                                    as a byte or in a byte string")
-        }
-        EscapeError::EmptyUnicodeEscape => {
-            handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
-        }
-        EscapeError::ZeroChars => {
-            handler.span_err(span, "empty character literal")
-        }
-        EscapeError::LoneSlash => {
-            handler.span_err(span, "invalid trailing slash in literal")
-        }
-    }
-}
-
-/// Pushes a character to a message string for error reporting
-pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
-    match c {
-        '\u{20}'..='\u{7e}' => {
-            // Don't escape \, ' or " for user-facing messages
-            msg.push(c);
-        }
-        _ => {
-            msg.extend(c.escape_default());
-        }
-    }
-}
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
deleted file mode 100644
index 6eb995b61d3..00000000000
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ /dev/null
@@ -1,392 +0,0 @@
-// Characters and their corresponding confusables were collected from
-// http://www.unicode.org/Public/security/10.0.0/confusables.txt
-
-use super::StringReader;
-use errors::{Applicability, DiagnosticBuilder};
-use syntax_pos::{BytePos, Pos, Span, symbol::kw};
-use crate::token;
-
-#[rustfmt::skip] // for line breaks
-const UNICODE_ARRAY: &[(char, &str, char)] = &[
-    (' ', "Line Separator", ' '),
-    (' ', "Paragraph Separator", ' '),
-    (' ', "Ogham Space mark", ' '),
-    (' ', "En Quad", ' '),
-    (' ', "Em Quad", ' '),
-    (' ', "En Space", ' '),
-    (' ', "Em Space", ' '),
-    (' ', "Three-Per-Em Space", ' '),
-    (' ', "Four-Per-Em Space", ' '),
-    (' ', "Six-Per-Em Space", ' '),
-    (' ', "Punctuation Space", ' '),
-    (' ', "Thin Space", ' '),
-    (' ', "Hair Space", ' '),
-    (' ', "Medium Mathematical Space", ' '),
-    (' ', "No-Break Space", ' '),
-    (' ', "Figure Space", ' '),
-    (' ', "Narrow No-Break Space", ' '),
-    ('　', "Ideographic Space", ' '),
-
-    ('ߺ', "Nko Lajanyalan", '_'),
-    ('﹍', "Dashed Low Line", '_'),
-    ('﹎', "Centreline Low Line", '_'),
-    ('﹏', "Wavy Low Line", '_'),
-    ('＿', "Fullwidth Low Line", '_'),
-
-    ('‐', "Hyphen", '-'),
-    ('‑', "Non-Breaking Hyphen", '-'),
-    ('‒', "Figure Dash", '-'),
-    ('–', "En Dash", '-'),
-    ('—', "Em Dash", '-'),
-    ('﹘', "Small Em Dash", '-'),
-    ('۔', "Arabic Full Stop", '-'),
-    ('⁃', "Hyphen Bullet", '-'),
-    ('˗', "Modifier Letter Minus Sign", '-'),
-    ('−', "Minus Sign", '-'),
-    ('➖', "Heavy Minus Sign", '-'),
-    ('Ⲻ', "Coptic Letter Dialect-P Ni", '-'),
-    ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
-    ('－', "Fullwidth Hyphen-Minus", '-'),
-    ('―', "Horizontal Bar", '-'),
-    ('─', "Box Drawings Light Horizontal", '-'),
-    ('━', "Box Drawings Heavy Horizontal", '-'),
-    ('㇐', "CJK Stroke H", '-'),
-    ('ꟷ', "Latin Epigraphic Letter Sideways I", '-'),
-    ('ᅳ', "Hangul Jungseong Eu", '-'),
-    ('ㅡ', "Hangul Letter Eu", '-'),
-    ('一', "CJK Unified Ideograph-4E00", '-'),
-    ('⼀', "Kangxi Radical One", '-'),
-
-    ('؍', "Arabic Date Separator", ','),
-    ('٫', "Arabic Decimal Separator", ','),
-    ('‚', "Single Low-9 Quotation Mark", ','),
-    ('¸', "Cedilla", ','),
-    ('ꓹ', "Lisu Letter Tone Na Po", ','),
-    ('，', "Fullwidth Comma", ','),
-
-    (';', "Greek Question Mark", ';'),
-    ('；', "Fullwidth Semicolon", ';'),
-    ('︔', "Presentation Form For Vertical Semicolon", ';'),
-
-    ('ः', "Devanagari Sign Visarga", ':'),
-    ('ઃ', "Gujarati Sign Visarga", ':'),
-    ('：', "Fullwidth Colon", ':'),
-    ('։', "Armenian Full Stop", ':'),
-    ('܃', "Syriac Supralinear Colon", ':'),
-    ('܄', "Syriac Sublinear Colon", ':'),
-    ('᛬', "Runic Multiple Punctuation", ':'),
-    ('︰', "Presentation Form For Vertical Two Dot Leader", ':'),
-    ('᠃', "Mongolian Full Stop", ':'),
-    ('᠉', "Mongolian Manchu Full Stop", ':'),
-    ('⁚', "Two Dot Punctuation", ':'),
-    ('׃', "Hebrew Punctuation Sof Pasuq", ':'),
-    ('˸', "Modifier Letter Raised Colon", ':'),
-    ('꞉', "Modifier Letter Colon", ':'),
-    ('∶', "Ratio", ':'),
-    ('ː', "Modifier Letter Triangular Colon", ':'),
-    ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'),
-    ('︓', "Presentation Form For Vertical Colon", ':'),
-
-    ('！', "Fullwidth Exclamation Mark", '!'),
-    ('ǃ', "Latin Letter Retroflex Click", '!'),
-    ('ⵑ', "Tifinagh Letter Tuareg Yang", '!'),
-    ('︕', "Presentation Form For Vertical Exclamation Mark", '!'),
-
-    ('ʔ', "Latin Letter Glottal Stop", '?'),
-    ('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
-    ('ॽ', "Devanagari Letter Glottal Stop", '?'),
-    ('Ꭾ', "Cherokee Letter He", '?'),
-    ('ꛫ', "Bamum Letter Ntuu", '?'),
-    ('？', "Fullwidth Question Mark", '?'),
-    ('︖', "Presentation Form For Vertical Question Mark", '?'),
-
-    ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
-    ('․', "One Dot Leader", '.'),
-    ('܁', "Syriac Supralinear Full Stop", '.'),
-    ('܂', "Syriac Sublinear Full Stop", '.'),
-    ('꘎', "Vai Full Stop", '.'),
-    ('𐩐', "Kharoshthi Punctuation Dot", '.'),
-    ('٠', "Arabic-Indic Digit Zero", '.'),
-    ('۰', "Extended Arabic-Indic Digit Zero", '.'),
-    ('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
-    ('·', "Middle Dot", '.'),
-    ('・', "Katakana Middle Dot", '.'),
-    ('･', "Halfwidth Katakana Middle Dot", '.'),
-    ('᛫', "Runic Single Punctuation", '.'),
-    ('·', "Greek Ano Teleia", '.'),
-    ('⸱', "Word Separator Middle Dot", '.'),
-    ('𐄁', "Aegean Word Separator Dot", '.'),
-    ('•', "Bullet", '.'),
-    ('‧', "Hyphenation Point", '.'),
-    ('∙', "Bullet Operator", '.'),
-    ('⋅', "Dot Operator", '.'),
-    ('ꞏ', "Latin Letter Sinological Dot", '.'),
-    ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
-    ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
-    ('．', "Fullwidth Full Stop", '.'),
-    ('。', "Ideographic Full Stop", '.'),
-    ('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'),
-
-    ('՝', "Armenian Comma", '\''),
-    ('＇', "Fullwidth Apostrophe", '\''),
-    ('‘', "Left Single Quotation Mark", '\''),
-    ('’', "Right Single Quotation Mark", '\''),
-    ('‛', "Single High-Reversed-9 Quotation Mark", '\''),
-    ('′', "Prime", '\''),
-    ('‵', "Reversed Prime", '\''),
-    ('՚', "Armenian Apostrophe", '\''),
-    ('׳', "Hebrew Punctuation Geresh", '\''),
-    ('`', "Grave Accent", '\''),
-    ('`', "Greek Varia", '\''),
-    ('｀', "Fullwidth Grave Accent", '\''),
-    ('´', "Acute Accent", '\''),
-    ('΄', "Greek Tonos", '\''),
-    ('´', "Greek Oxia", '\''),
-    ('᾽', "Greek Koronis", '\''),
-    ('᾿', "Greek Psili", '\''),
-    ('῾', "Greek Dasia", '\''),
-    ('ʹ', "Modifier Letter Prime", '\''),
-    ('ʹ', "Greek Numeral Sign", '\''),
-    ('ˈ', "Modifier Letter Vertical Line", '\''),
-    ('ˊ', "Modifier Letter Acute Accent", '\''),
-    ('ˋ', "Modifier Letter Grave Accent", '\''),
-    ('˴', "Modifier Letter Middle Grave Accent", '\''),
-    ('ʻ', "Modifier Letter Turned Comma", '\''),
-    ('ʽ', "Modifier Letter Reversed Comma", '\''),
-    ('ʼ', "Modifier Letter Apostrophe", '\''),
-    ('ʾ', "Modifier Letter Right Half Ring", '\''),
-    ('ꞌ', "Latin Small Letter Saltillo", '\''),
-    ('י', "Hebrew Letter Yod", '\''),
-    ('ߴ', "Nko High Tone Apostrophe", '\''),
-    ('ߵ', "Nko Low Tone Apostrophe", '\''),
-    ('ᑊ', "Canadian Syllabics West-Cree P", '\''),
-    ('ᛌ', "Runic Letter Short-Twig-Sol S", '\''),
-    ('𖽑', "Miao Sign Aspiration", '\''),
-    ('𖽒', "Miao Sign Reformed Voicing", '\''),
-
-    ('᳓', "Vedic Sign Nihshvasa", '"'),
-    ('＂', "Fullwidth Quotation Mark", '"'),
-    ('“', "Left Double Quotation Mark", '"'),
-    ('”', "Right Double Quotation Mark", '"'),
-    ('‟', "Double High-Reversed-9 Quotation Mark", '"'),
-    ('″', "Double Prime", '"'),
-    ('‶', "Reversed Double Prime", '"'),
-    ('〃', "Ditto Mark", '"'),
-    ('״', "Hebrew Punctuation Gershayim", '"'),
-    ('˝', "Double Acute Accent", '"'),
-    ('ʺ', "Modifier Letter Double Prime", '"'),
-    ('˶', "Modifier Letter Middle Double Acute Accent", '"'),
-    ('˵', "Modifier Letter Middle Double Grave Accent", '"'),
-    ('ˮ', "Modifier Letter Double Apostrophe", '"'),
-    ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
-    ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
-    ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
-
-    ('（', "Fullwidth Left Parenthesis", '('),
-    ('❨', "Medium Left Parenthesis Ornament", '('),
-    ('﴾', "Ornate Left Parenthesis", '('),
-
-    ('）', "Fullwidth Right Parenthesis", ')'),
-    ('❩', "Medium Right Parenthesis Ornament", ')'),
-    ('﴿', "Ornate Right Parenthesis", ')'),
-
-    ('［', "Fullwidth Left Square Bracket", '['),
-    ('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
-    ('「', "Left Corner Bracket", '['),
-    ('『', "Left White Corner Bracket", '['),
-    ('【', "Left Black Lenticular Bracket", '['),
-    ('〔', "Left Tortoise Shell Bracket", '['),
-    ('〖', "Left White Lenticular Bracket", '['),
-    ('〘', "Left White Tortoise Shell Bracket", '['),
-    ('〚', "Left White Square Bracket", '['),
-
-    ('］', "Fullwidth Right Square Bracket", ']'),
-    ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
-    ('」', "Right Corner Bracket", ']'),
-    ('』', "Right White Corner Bracket", ']'),
-    ('】', "Right Black Lenticular Bracket", ']'),
-    ('〕', "Right Tortoise Shell Bracket", ']'),
-    ('〗', "Right White Lenticular Bracket", ']'),
-    ('〙', "Right White Tortoise Shell Bracket", ']'),
-    ('〛', "Right White Square Bracket", ']'),
-
-    ('❴', "Medium Left Curly Bracket Ornament", '{'),
-    ('𝄔', "Musical Symbol Brace", '{'),
-    ('｛', "Fullwidth Left Curly Bracket", '{'),
-
-    ('❵', "Medium Right Curly Bracket Ornament", '}'),
-    ('｝', "Fullwidth Right Curly Bracket", '}'),
-
-    ('⁎', "Low Asterisk", '*'),
-    ('٭', "Arabic Five Pointed Star", '*'),
-    ('∗', "Asterisk Operator", '*'),
-    ('𐌟', "Old Italic Letter Ess", '*'),
-    ('＊', "Fullwidth Asterisk", '*'),
-
-    ('᜵', "Philippine Single Punctuation", '/'),
-    ('⁁', "Caret Insertion Point", '/'),
-    ('∕', "Division Slash", '/'),
-    ('⁄', "Fraction Slash", '/'),
-    ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
-    ('⟋', "Mathematical Rising Diagonal", '/'),
-    ('⧸', "Big Solidus", '/'),
-    ('𝈺', "Greek Instrumental Notation Symbol-47", '/'),
-    ('㇓', "CJK Stroke Sp", '/'),
-    ('〳', "Vertical Kana Repeat Mark Upper Half", '/'),
-    ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'),
-    ('ノ', "Katakana Letter No", '/'),
-    ('丿', "CJK Unified Ideograph-4E3F", '/'),
-    ('⼃', "Kangxi Radical Slash", '/'),
-    ('／', "Fullwidth Solidus", '/'),
-
-    ('＼', "Fullwidth Reverse Solidus", '\\'),
-    ('﹨', "Small Reverse Solidus", '\\'),
-    ('∖', "Set Minus", '\\'),
-    ('⟍', "Mathematical Falling Diagonal", '\\'),
-    ('⧵', "Reverse Solidus Operator", '\\'),
-    ('⧹', "Big Reverse Solidus", '\\'),
-    ('⧹', "Greek Vocal Notation Symbol-16", '\\'),
-    ('⧹', "Greek Instrumental Symbol-48", '\\'),
-    ('㇔', "CJK Stroke D", '\\'),
-    ('丶', "CJK Unified Ideograph-4E36", '\\'),
-    ('⼂', "Kangxi Radical Dot", '\\'),
-    ('、', "Ideographic Comma", '\\'),
-    ('ヽ', "Katakana Iteration Mark", '\\'),
-
-    ('ꝸ', "Latin Small Letter Um", '&'),
-    ('＆', "Fullwidth Ampersand", '&'),
-
-    ('᛭', "Runic Cross Punctuation", '+'),
-    ('➕', "Heavy Plus Sign", '+'),
-    ('𐊛', "Lycian Letter H", '+'),
-    ('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
-    ('＋', "Fullwidth Plus Sign", '+'),
-
-    ('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
-    ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
-    ('˂', "Modifier Letter Left Arrowhead", '<'),
-    ('𝈶', "Greek Instrumental Symbol-40", '<'),
-    ('ᐸ', "Canadian Syllabics Pa", '<'),
-    ('ᚲ', "Runic Letter Kauna", '<'),
-    ('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'),
-    ('⟨', "Mathematical Left Angle Bracket", '<'),
-    ('〈', "Left-Pointing Angle Bracket", '<'),
-    ('〈', "Left Angle Bracket", '<'),
-    ('㇛', "CJK Stroke Pd", '<'),
-    ('く', "Hiragana Letter Ku", '<'),
-    ('𡿨', "CJK Unified Ideograph-21FE8", '<'),
-    ('《', "Left Double Angle Bracket", '<'),
-    ('＜', "Fullwidth Less-Than Sign", '<'),
-
-    ('᐀', "Canadian Syllabics Hyphen", '='),
-    ('⹀', "Double Hyphen", '='),
-    ('゠', "Katakana-Hiragana Double Hyphen", '='),
-    ('꓿', "Lisu Punctuation Full Stop", '='),
-    ('＝', "Fullwidth Equals Sign", '='),
-
-    ('›', "Single Right-Pointing Angle Quotation Mark", '>'),
-    ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
-    ('˃', "Modifier Letter Right Arrowhead", '>'),
-    ('𝈷', "Greek Instrumental Symbol-42", '>'),
-    ('ᐳ', "Canadian Syllabics Po", '>'),
-    ('𖼿', "Miao Letter Archaic Zza", '>'),
-    ('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'),
-    ('⟩', "Mathematical Right Angle Bracket", '>'),
-    ('〉', "Right-Pointing Angle Bracket", '>'),
-    ('〉', "Right Angle Bracket", '>'),
-    ('》', "Right Double Angle Bracket", '>'),
-    ('＞', "Fullwidth Greater-Than Sign", '>'),
-];
-
-// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
-// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
-// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
-// fancier error recovery to it, as there will be less overall work to do this way.
-const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
-    (' ', "Space", Some(token::Whitespace)),
-    ('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
-    ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
-    (',', "Comma", Some(token::Comma)),
-    (';', "Semicolon", Some(token::Semi)),
-    (':', "Colon", Some(token::Colon)),
-    ('!', "Exclamation Mark", Some(token::Not)),
-    ('?', "Question Mark", Some(token::Question)),
-    ('.', "Period", Some(token::Dot)),
-    ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))),
-    (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))),
-    ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))),
-    (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))),
-    ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))),
-    ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))),
-    ('*', "Asterisk", Some(token::BinOp(token::Star))),
-    ('/', "Slash", Some(token::BinOp(token::Slash))),
-    ('\\', "Backslash", None),
-    ('&', "Ampersand", Some(token::BinOp(token::And))),
-    ('+', "Plus Sign", Some(token::BinOp(token::Plus))),
-    ('<', "Less-Than Sign", Some(token::Lt)),
-    ('=', "Equals Sign", Some(token::Eq)),
-    ('>', "Greater-Than Sign", Some(token::Gt)),
-    // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
-    // spitting the correct token out.
-    ('\'', "Single Quote", None),
-    ('"', "Quotation Mark", None),
-];
-
-crate fn check_for_substitution<'a>(
-    reader: &StringReader<'a>,
-    pos: BytePos,
-    ch: char,
-    err: &mut DiagnosticBuilder<'a>,
-) -> Option<token::TokenKind> {
-    let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) {
-        Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char),
-        None => return None,
-    };
-
-    let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8()));
-
-    let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) {
-        Some((_ascii_char, ascii_name, token)) => (ascii_name, token),
-        None => {
-            let msg = format!("substitution character not found for '{}'", ch);
-            reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
-            return None;
-        }
-    };
-
-    // special help suggestion for "directed" double quotes
-    if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') {
-        let msg = format!(
-            "Unicode characters '“' (Left Double Quotation Mark) and \
-             '”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
-            ascii_char, ascii_name
-        );
-        err.span_suggestion(
-            Span::with_root_ctxt(
-                pos,
-                pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()),
-            ),
-            &msg,
-            format!("\"{}\"", s),
-            Applicability::MaybeIncorrect,
-        );
-    } else {
-        let msg = format!(
-            "Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
-            ch, u_name, ascii_char, ascii_name
-        );
-        err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect);
-    }
-    token.clone()
-}
-
-/// Extract string if found at current position with given delimiters
-fn peek_delimited(text: &str, from_ch: char, to_ch: char) -> Option<&str> {
-    let mut chars = text.chars();
-    let first_char = chars.next()?;
-    if first_char != from_ch {
-        return None;
-    }
-    let last_char_idx = chars.as_str().find(to_ch)?;
-    Some(&chars.as_str()[..last_char_idx])
-}
author	Mazdak Farrokhzad <twingoow@gmail.com>	2019-10-15 22:48:13 +0200
committer	Mazdak Farrokhzad <twingoow@gmail.com>	2019-11-10 03:57:18 +0100
commit	4ae2728fa8052915414127dce28245eb8f70842a (patch)
tree	27cc54d90904091e4dc9bf7ae5fa3b41be4b6187 /src/libsyntax/parse/lexer
parent	be023ebe850261c6bb202a02a686827d821c3697 (diff)
download	rust-4ae2728fa8052915414127dce28245eb8f70842a.tar.gz rust-4ae2728fa8052915414127dce28245eb8f70842a.zip