diff options
| author | bors <bors@rust-lang.org> | 2019-11-07 22:02:41 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2019-11-07 22:02:41 +0000 |
| commit | e8f43b72eb2596f360004f6cdf9cdde4b9e789e1 (patch) | |
| tree | 4e437fd2ceb9b0c236f649563c15e016816853ff /src/libsyntax/parse | |
| parent | 50f8aadd746ebc929a752e5ffb133936ee75c52f (diff) | |
| parent | 333899a736cc5a4c8cb5cd6585fea7395c9b160c (diff) | |
| download | rust-e8f43b72eb2596f360004f6cdf9cdde4b9e789e1.tar.gz rust-e8f43b72eb2596f360004f6cdf9cdde4b9e789e1.zip | |
Auto merge of #66189 - Centril:rollup-3bsf45s, r=Centril
Rollup of 5 pull requests Successful merges: - #63793 (Have tidy ensure that we document all `unsafe` blocks in libcore) - #64696 ([rustdoc] add sub settings) - #65916 (syntax: move stuff around) - #66087 (Update some build-pass ui tests to use check-pass where applicable) - #66182 (invalid_value lint: fix help text) Failed merges: r? @ghost
Diffstat (limited to 'src/libsyntax/parse')
23 files changed, 67 insertions, 1441 deletions
diff --git a/src/libsyntax/parse/classify.rs b/src/libsyntax/parse/classify.rs deleted file mode 100644 index 44560688750..00000000000 --- a/src/libsyntax/parse/classify.rs +++ /dev/null @@ -1,25 +0,0 @@ -//! Routines the parser uses to classify AST nodes - -// Predicates on exprs and stmts that the pretty-printer and parser use - -use crate::ast; - -/// Does this expression require a semicolon to be treated -/// as a statement? The negation of this: 'can this expression -/// be used as a statement without a semicolon' -- is used -/// as an early-bail-out in the parser so that, for instance, -/// if true {...} else {...} -/// |x| 5 -/// isn't parsed as (if true {...} else {...} | x) | 5 -pub fn expr_requires_semi_to_be_stmt(e: &ast::Expr) -> bool { - match e.kind { - ast::ExprKind::If(..) | - ast::ExprKind::Match(..) | - ast::ExprKind::Block(..) | - ast::ExprKind::While(..) | - ast::ExprKind::Loop(..) | - ast::ExprKind::ForLoop(..) | - ast::ExprKind::TryBlock(..) => false, - _ => true, - } -} diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs deleted file mode 100644 index ac79ce323bf..00000000000 --- a/src/libsyntax/parse/lexer/comments.rs +++ /dev/null @@ -1,254 +0,0 @@ -pub use CommentStyle::*; - -use crate::ast; -use crate::source_map::SourceMap; -use crate::parse::lexer::is_block_doc_comment; -use crate::parse::lexer::ParseSess; - -use syntax_pos::{BytePos, CharPos, Pos, FileName}; - -use std::usize; - -#[cfg(test)] -mod tests; - -#[derive(Clone, Copy, PartialEq, Debug)] -pub enum CommentStyle { - /// No code on either side of each line of the comment - Isolated, - /// Code exists to the left of the comment - Trailing, - /// Code before /* foo */ and after the comment - Mixed, - /// Just a manual blank line "\n\n", for layout - BlankLine, -} - -#[derive(Clone)] -pub struct Comment { - pub style: CommentStyle, - pub lines: Vec<String>, - pub pos: BytePos, -} - -fn is_doc_comment(s: &str) -> bool { - (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") || - (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!") -} - -pub fn doc_comment_style(comment: &str) -> ast::AttrStyle { - assert!(is_doc_comment(comment)); - if comment.starts_with("//!") || comment.starts_with("/*!") { - ast::AttrStyle::Inner - } else { - ast::AttrStyle::Outer - } -} - -pub fn strip_doc_comment_decoration(comment: &str) -> String { - /// remove whitespace-only lines from the start/end of lines - fn vertical_trim(lines: Vec<String>) -> Vec<String> { - let mut i = 0; - let mut j = lines.len(); - // first line of all-stars should be omitted - if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { - i += 1; - } - - while i < j && lines[i].trim().is_empty() { - i += 1; - } - // like the first, a last line of all stars should be omitted - if j > i && - lines[j - 1] - .chars() - .skip(1) - .all(|c| c == '*') { - j -= 1; - } - - while j > i && lines[j - 1].trim().is_empty() { - j -= 1; - } - - lines[i..j].to_vec() - } - - /// remove a "[ \t]*\*" block from each line, if possible - fn horizontal_trim(lines: Vec<String>) -> Vec<String> { - let mut i = usize::MAX; - let mut can_trim = true; - let mut first = true; - - for line in &lines { - for (j, c) in line.chars().enumerate() { - if j > i || !"* \t".contains(c) { - can_trim = false; - break; - } - if c == '*' { - if first { - i = j; - first = false; - } else if i != j { - can_trim = false; - } - break; - } - } - if i >= line.len() { - can_trim = false; - } - if !can_trim { - break; - } - } - - if can_trim { - lines.iter() - .map(|line| (&line[i + 1..line.len()]).to_string()) - .collect() - } else { - lines - } - } - - // one-line comments lose their prefix - const ONELINERS: &[&str] = &["///!", "///", "//!", "//"]; - - for prefix in ONELINERS { - if comment.starts_with(*prefix) { - return (&comment[prefix.len()..]).to_string(); - } - } - - if comment.starts_with("/*") { - let lines = comment[3..comment.len() - 2] - .lines() - .map(|s| s.to_string()) - .collect::<Vec<String>>(); - - let lines = vertical_trim(lines); - let lines = horizontal_trim(lines); - - return lines.join("\n"); - } - - panic!("not a doc-comment: {}", comment); -} - -/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char. -/// Otherwise returns `Some(k)` where `k` is first char offset after that leading -/// whitespace. Note that `k` may be outside bounds of `s`. -fn all_whitespace(s: &str, col: CharPos) -> Option<usize> { - let mut idx = 0; - for (i, ch) in s.char_indices().take(col.to_usize()) { - if !ch.is_whitespace() { - return None; - } - idx = i + ch.len_utf8(); - } - Some(idx) -} - -fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str { - let len = s.len(); - match all_whitespace(&s, col) { - Some(col) => if col < len { &s[col..] } else { "" }, - None => s, - } -} - -fn split_block_comment_into_lines( - text: &str, - col: CharPos, -) -> Vec<String> { - let mut res: Vec<String> = vec![]; - let mut lines = text.lines(); - // just push the first line - res.extend(lines.next().map(|it| it.to_string())); - // for other lines, strip common whitespace prefix - for line in lines { - res.push(trim_whitespace_prefix(line, col).to_string()) - } - res -} - -// it appears this function is called only from pprust... that's -// probably not a good thing. -crate fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment> { - let cm = SourceMap::new(sess.source_map().path_mapping().clone()); - let source_file = cm.new_source_file(path, src); - let text = (*source_file.src.as_ref().unwrap()).clone(); - - let text: &str = text.as_str(); - let start_bpos = source_file.start_pos; - let mut pos = 0; - let mut comments: Vec<Comment> = Vec::new(); - let mut code_to_the_left = false; - - if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { - comments.push(Comment { - style: Isolated, - lines: vec![text[..shebang_len].to_string()], - pos: start_bpos, - }); - pos += shebang_len; - } - - for token in rustc_lexer::tokenize(&text[pos..]) { - let token_text = &text[pos..pos + token.len]; - match token.kind { - rustc_lexer::TokenKind::Whitespace => { - if let Some(mut idx) = token_text.find('\n') { - code_to_the_left = false; - while let Some(next_newline) = &token_text[idx + 1..].find('\n') { - idx = idx + 1 + next_newline; - comments.push(Comment { - style: BlankLine, - lines: vec![], - pos: start_bpos + BytePos((pos + idx) as u32), - }); - } - } - } - rustc_lexer::TokenKind::BlockComment { terminated: _ } => { - if !is_block_doc_comment(token_text) { - let code_to_the_right = match text[pos + token.len..].chars().next() { - Some('\r') | Some('\n') => false, - _ => true, - }; - let style = match (code_to_the_left, code_to_the_right) { - (true, true) | (false, true) => Mixed, - (false, false) => Isolated, - (true, false) => Trailing, - }; - - // Count the number of chars since the start of the line by rescanning. - let pos_in_file = start_bpos + BytePos(pos as u32); - let line_begin_in_file = source_file.line_begin_pos(pos_in_file); - let line_begin_pos = (line_begin_in_file - start_bpos).to_usize(); - let col = CharPos(text[line_begin_pos..pos].chars().count()); - - let lines = split_block_comment_into_lines(token_text, col); - comments.push(Comment { style, lines, pos: pos_in_file }) - } - } - rustc_lexer::TokenKind::LineComment => { - if !is_doc_comment(token_text) { - comments.push(Comment { - style: if code_to_the_left { Trailing } else { Isolated }, - lines: vec![token_text.to_string()], - pos: start_bpos + BytePos(pos as u32), - }) - } - } - _ => { - code_to_the_left = true; - } - } - pos += token.len; - } - - comments -} diff --git a/src/libsyntax/parse/lexer/comments/tests.rs b/src/libsyntax/parse/lexer/comments/tests.rs deleted file mode 100644 index f9cd69fb50d..00000000000 --- a/src/libsyntax/parse/lexer/comments/tests.rs +++ /dev/null @@ -1,47 +0,0 @@ -use super::*; - -#[test] -fn test_block_doc_comment_1() { - let comment = "/**\n * Test \n ** Test\n * Test\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " Test \n* Test\n Test"); -} - -#[test] -fn test_block_doc_comment_2() { - let comment = "/**\n * Test\n * Test\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " Test\n Test"); -} - -#[test] -fn test_block_doc_comment_3() { - let comment = "/**\n let a: *i32;\n *a = 5;\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " let a: *i32;\n *a = 5;"); -} - -#[test] -fn test_block_doc_comment_4() { - let comment = "/*******************\n test\n *********************/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " test"); -} - -#[test] -fn test_line_doc_comment() { - let stripped = strip_doc_comment_decoration("/// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("///! test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("///test"); - assert_eq!(stripped, "test"); - let stripped = strip_doc_comment_decoration("///!test"); - assert_eq!(stripped, "test"); - let stripped = strip_doc_comment_decoration("//test"); - assert_eq!(stripped, "test"); -} diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index e2a7ea28b9b..b1b7b08c78a 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1,7 +1,7 @@ -use crate::parse::token::{self, Token, TokenKind}; +use crate::token::{self, Token, TokenKind}; use crate::sess::ParseSess; use crate::symbol::{sym, Symbol}; -use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char}; +use crate::util::comments; use errors::{FatalError, DiagnosticBuilder}; use syntax_pos::{BytePos, Pos, Span}; @@ -16,9 +16,10 @@ use log::debug; #[cfg(test)] mod tests; -pub mod comments; mod tokentrees; mod unicode_chars; +mod unescape_error_reporting; +use unescape_error_reporting::{emit_unescape_error, push_escaped_char}; #[derive(Clone, Debug)] pub struct UnmatchedBrace { @@ -178,7 +179,7 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::LineComment => { let string = self.str_from(start); // comments with only more "/"s are not doc comments - let tok = if is_doc_comment(string) { + let tok = if comments::is_line_doc_comment(string) { self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment"); token::DocComment(Symbol::intern(string)) } else { @@ -191,7 +192,7 @@ impl<'a> StringReader<'a> { let string = self.str_from(start); // block comments starting with "/**" or "/*!" are doc-comments // but comments with only "*"s between two "/"s are not - let is_doc_comment = is_block_doc_comment(string); + let is_doc_comment = comments::is_block_doc_comment(string); if !terminated { let msg = if is_doc_comment { @@ -642,18 +643,3 @@ impl<'a> StringReader<'a> { } } } - -fn is_doc_comment(s: &str) -> bool { - let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') || - s.starts_with("//!"); - debug!("is {:?} a doc comment? {}", s, res); - res -} - -fn is_block_doc_comment(s: &str) -> bool { - // Prevent `/**/` from being parsed as a doc comment - let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') || - s.starts_with("/*!")) && s.len() >= 5; - debug!("is {:?} a doc comment? {}", s, res); - res -} diff --git a/src/libsyntax/parse/lexer/tests.rs b/src/libsyntax/parse/lexer/tests.rs index de301b1fc49..baa6fb59537 100644 --- a/src/libsyntax/parse/lexer/tests.rs +++ b/src/libsyntax/parse/lexer/tests.rs @@ -2,7 +2,8 @@ use super::*; use crate::symbol::Symbol; use crate::source_map::{SourceMap, FilePathMapping}; -use crate::parse::token; +use crate::token; +use crate::util::comments::is_doc_comment; use crate::with_default_globals; use errors::{Handler, emitter::EmitterWriter}; diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index de8ac2c71e8..2b056434d4d 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -4,10 +4,11 @@ use syntax_pos::Span; use super::{StringReader, UnmatchedBrace}; use crate::print::pprust::token_to_string; -use crate::parse::token::{self, Token}; -use crate::parse::PResult; +use crate::token::{self, Token}; use crate::tokenstream::{DelimSpan, IsJoint::{self, *}, TokenStream, TokenTree, TreeAndJoint}; +use errors::PResult; + impl<'a> StringReader<'a> { crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) { let mut tt_reader = TokenTreesReader { diff --git a/src/libsyntax/parse/unescape_error_reporting.rs b/src/libsyntax/parse/lexer/unescape_error_reporting.rs index 5565015179c..5565015179c 100644 --- a/src/libsyntax/parse/unescape_error_reporting.rs +++ b/src/libsyntax/parse/lexer/unescape_error_reporting.rs diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 525b4215aff..6eb995b61d3 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -4,7 +4,7 @@ use super::StringReader; use errors::{Applicability, DiagnosticBuilder}; use syntax_pos::{BytePos, Pos, Span, symbol::kw}; -use crate::parse::token; +use crate::token; #[rustfmt::skip] // for line breaks const UNICODE_ARRAY: &[(char, &str, char)] = &[ diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs deleted file mode 100644 index a8eeac59954..00000000000 --- a/src/libsyntax/parse/literal.rs +++ /dev/null @@ -1,305 +0,0 @@ -//! Code related to parsing literals. - -use crate::ast::{self, Lit, LitKind}; -use crate::parse::token::{self, Token}; -use crate::symbol::{kw, sym, Symbol}; -use crate::tokenstream::TokenTree; - -use log::debug; -use rustc_data_structures::sync::Lrc; -use syntax_pos::Span; -use rustc_lexer::unescape::{unescape_char, unescape_byte}; -use rustc_lexer::unescape::{unescape_str, unescape_byte_str}; -use rustc_lexer::unescape::{unescape_raw_str, unescape_raw_byte_str}; - -use std::ascii; - -crate enum LitError { - NotLiteral, - LexerError, - InvalidSuffix, - InvalidIntSuffix, - InvalidFloatSuffix, - NonDecimalFloat(u32), - IntTooLarge, -} - -impl LitKind { - /// Converts literal token into a semantic literal. - fn from_lit_token(lit: token::Lit) -> Result<LitKind, LitError> { - let token::Lit { kind, symbol, suffix } = lit; - if suffix.is_some() && !kind.may_have_suffix() { - return Err(LitError::InvalidSuffix); - } - - Ok(match kind { - token::Bool => { - assert!(symbol.is_bool_lit()); - LitKind::Bool(symbol == kw::True) - } - token::Byte => return unescape_byte(&symbol.as_str()) - .map(LitKind::Byte).map_err(|_| LitError::LexerError), - token::Char => return unescape_char(&symbol.as_str()) - .map(LitKind::Char).map_err(|_| LitError::LexerError), - - // There are some valid suffixes for integer and float literals, - // so all the handling is done internally. - token::Integer => return integer_lit(symbol, suffix), - token::Float => return float_lit(symbol, suffix), - - token::Str => { - // If there are no characters requiring special treatment we can - // reuse the symbol from the token. Otherwise, we must generate a - // new symbol because the string in the LitKind is different to the - // string in the token. - let s = symbol.as_str(); - let symbol = if s.contains(&['\\', '\r'][..]) { - let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); - unescape_str(&s, &mut |_, unescaped_char| { - match unescaped_char { - Ok(c) => buf.push(c), - Err(_) => error = Err(LitError::LexerError), - } - }); - error?; - Symbol::intern(&buf) - } else { - symbol - }; - LitKind::Str(symbol, ast::StrStyle::Cooked) - } - token::StrRaw(n) => { - // Ditto. - let s = symbol.as_str(); - let symbol = if s.contains('\r') { - let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); - unescape_raw_str(&s, &mut |_, unescaped_char| { - match unescaped_char { - Ok(c) => buf.push(c), - Err(_) => error = Err(LitError::LexerError), - } - }); - error?; - buf.shrink_to_fit(); - Symbol::intern(&buf) - } else { - symbol - }; - LitKind::Str(symbol, ast::StrStyle::Raw(n)) - } - token::ByteStr => { - let s = symbol.as_str(); - let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_byte_str(&s, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(_) => error = Err(LitError::LexerError), - } - }); - error?; - buf.shrink_to_fit(); - LitKind::ByteStr(Lrc::new(buf)) - } - token::ByteStrRaw(_) => { - let s = symbol.as_str(); - let bytes = if s.contains('\r') { - let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_raw_byte_str(&s, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(_) => error = Err(LitError::LexerError), - } - }); - error?; - buf.shrink_to_fit(); - buf - } else { - symbol.to_string().into_bytes() - }; - - LitKind::ByteStr(Lrc::new(bytes)) - }, - token::Err => LitKind::Err(symbol), - }) - } - - /// Attempts to recover a token from semantic literal. - /// This function is used when the original token doesn't exist (e.g. the literal is created - /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn to_lit_token(&self) -> token::Lit { - let (kind, symbol, suffix) = match *self { - LitKind::Str(symbol, ast::StrStyle::Cooked) => { - // Don't re-intern unless the escaped string is different. - let s = symbol.as_str(); - let escaped = s.escape_default().to_string(); - let symbol = if s == escaped { symbol } else { Symbol::intern(&escaped) }; - (token::Str, symbol, None) - } - LitKind::Str(symbol, ast::StrStyle::Raw(n)) => { - (token::StrRaw(n), symbol, None) - } - LitKind::ByteStr(ref bytes) => { - let string = bytes.iter().cloned().flat_map(ascii::escape_default) - .map(Into::<char>::into).collect::<String>(); - (token::ByteStr, Symbol::intern(&string), None) - } - LitKind::Byte(byte) => { - let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect(); - (token::Byte, Symbol::intern(&string), None) - } - LitKind::Char(ch) => { - let string: String = ch.escape_default().map(Into::<char>::into).collect(); - (token::Char, Symbol::intern(&string), None) - } - LitKind::Int(n, ty) => { - let suffix = match ty { - ast::LitIntType::Unsigned(ty) => Some(ty.name()), - ast::LitIntType::Signed(ty) => Some(ty.name()), - ast::LitIntType::Unsuffixed => None, - }; - (token::Integer, sym::integer(n), suffix) - } - LitKind::Float(symbol, ty) => { - let suffix = match ty { - ast::LitFloatType::Suffixed(ty) => Some(ty.name()), - ast::LitFloatType::Unsuffixed => None, - }; - (token::Float, symbol, suffix) - } - LitKind::Bool(value) => { - let symbol = if value { kw::True } else { kw::False }; - (token::Bool, symbol, None) - } - LitKind::Err(symbol) => { - (token::Err, symbol, None) - } - }; - - token::Lit::new(kind, symbol, suffix) - } -} - -impl Lit { - /// Converts literal token into an AST literal. - crate fn from_lit_token(token: token::Lit, span: Span) -> Result<Lit, LitError> { - Ok(Lit { token, kind: LitKind::from_lit_token(token)?, span }) - } - - /// Converts arbitrary token into an AST literal. - crate fn from_token(token: &Token) -> Result<Lit, LitError> { - let lit = match token.kind { - token::Ident(name, false) if name.is_bool_lit() => - token::Lit::new(token::Bool, name, None), - token::Literal(lit) => - lit, - token::Interpolated(ref nt) => { - if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { - if let ast::ExprKind::Lit(lit) = &expr.kind { - return Ok(lit.clone()); - } - } - return Err(LitError::NotLiteral); - } - _ => return Err(LitError::NotLiteral) - }; - - Lit::from_lit_token(lit, token.span) - } - - /// Attempts to recover an AST literal from semantic literal. - /// This function is used when the original token doesn't exist (e.g. the literal is created - /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn from_lit_kind(kind: LitKind, span: Span) -> Lit { - Lit { token: kind.to_lit_token(), kind, span } - } - - /// Losslessly convert an AST literal into a token tree. - crate fn token_tree(&self) -> TokenTree { - let token = match self.token.kind { - token::Bool => token::Ident(self.token.symbol, false), - _ => token::Literal(self.token), - }; - TokenTree::token(token, self.span) - } -} - -fn strip_underscores(symbol: Symbol) -> Symbol { - // Do not allocate a new string unless necessary. - let s = symbol.as_str(); - if s.contains('_') { - let mut s = s.to_string(); - s.retain(|c| c != '_'); - return Symbol::intern(&s); - } - symbol -} - -fn filtered_float_lit(symbol: Symbol, suffix: Option<Symbol>, base: u32) - -> Result<LitKind, LitError> { - debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base); - if base != 10 { - return Err(LitError::NonDecimalFloat(base)); - } - Ok(match suffix { - Some(suf) => LitKind::Float(symbol, ast::LitFloatType::Suffixed(match suf { - sym::f32 => ast::FloatTy::F32, - sym::f64 => ast::FloatTy::F64, - _ => return Err(LitError::InvalidFloatSuffix), - })), - None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed) - }) -} - -fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> { - debug!("float_lit: {:?}, {:?}", symbol, suffix); - filtered_float_lit(strip_underscores(symbol), suffix, 10) -} - -fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> { - debug!("integer_lit: {:?}, {:?}", symbol, suffix); - let symbol = strip_underscores(symbol); - let s = symbol.as_str(); - - let base = match s.as_bytes() { - [b'0', b'x', ..] => 16, - [b'0', b'o', ..] => 8, - [b'0', b'b', ..] => 2, - _ => 10, - }; - - let ty = match suffix { - Some(suf) => match suf { - sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize), - sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8), - sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16), - sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32), - sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64), - sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128), - sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize), - sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8), - sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16), - sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32), - sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64), - sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128), - // `1f64` and `2f32` etc. are valid float literals, and - // `fxxx` looks more like an invalid float literal than invalid integer literal. - _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base), - _ => return Err(LitError::InvalidIntSuffix), - } - _ => ast::LitIntType::Unsuffixed - }; - - let s = &s[if base != 10 { 2 } else { 0 } ..]; - u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| { - // Small bases are lexed as if they were base 10, e.g, the string - // might be `0b10201`. This will cause the conversion above to fail, - // but these kinds of errors are already reported by the lexer. - let from_lexer = - base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); - if from_lexer { LitError::LexerError } else { LitError::IntTooLarge } - }) -} diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index b688dce87c1..18550762017 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -2,14 +2,12 @@ use crate::ast; use crate::parse::parser::{Parser, emit_unclosed_delims, make_unclosed_delims_error}; -use crate::parse::token::Nonterminal; +use crate::token::{self, Nonterminal}; use crate::tokenstream::{self, TokenStream, TokenTree}; use crate::print::pprust; use crate::sess::ParseSess; -use errors::{FatalError, Level, Diagnostic, DiagnosticBuilder}; -#[cfg(target_arch = "x86_64")] -use rustc_data_structures::static_assert_size; +use errors::{PResult, FatalError, Level, Diagnostic}; use rustc_data_structures::sync::Lrc; use syntax_pos::{Span, SourceFile, FileName}; @@ -25,18 +23,6 @@ mod tests; #[macro_use] pub mod parser; pub mod lexer; -pub mod token; - -crate mod classify; -crate mod literal; -crate mod unescape_error_reporting; - -pub type PResult<'a, T> = Result<T, DiagnosticBuilder<'a>>; - -// `PResult` is used a lot. Make sure it doesn't unintentionally get bigger. -// (See also the comment on `DiagnosticBuilderInner`.) -#[cfg(target_arch = "x86_64")] -static_assert_size!(PResult<'_, bool>, 16); #[derive(Clone)] pub struct Directory<'a> { diff --git a/src/libsyntax/parse/parser/attr.rs b/src/libsyntax/parse/parser/attr.rs index 1c292661f24..31f0a02a483 100644 --- a/src/libsyntax/parse/parser/attr.rs +++ b/src/libsyntax/parse/parser/attr.rs @@ -1,10 +1,14 @@ -use super::{SeqSep, PResult, Parser, TokenType, PathStyle}; +use super::{SeqSep, Parser, TokenType, PathStyle}; use crate::attr; use crate::ast; -use crate::parse::token::{self, Nonterminal, DelimToken}; +use crate::util::comments; +use crate::token::{self, Nonterminal, DelimToken}; use crate::tokenstream::{TokenStream, TokenTree}; use crate::source_map::Span; +use syntax_pos::Symbol; +use errors::PResult; + use log::debug; #[derive(Debug)] @@ -43,7 +47,7 @@ impl<'a> Parser<'a> { just_parsed_doc_comment = false; } token::DocComment(s) => { - let attr = attr::mk_doc_comment(s, self.token.span); + let attr = self.mk_doc_comment(s); if attr.style != ast::AttrStyle::Outer { let mut err = self.fatal("expected outer doc comment"); err.note("inner doc comments like this (starting with \ @@ -60,6 +64,11 @@ impl<'a> Parser<'a> { Ok(attrs) } + fn mk_doc_comment(&self, s: Symbol) -> ast::Attribute { + let style = comments::doc_comment_style(&s.as_str()); + attr::mk_doc_comment(style, s, self.token.span) + } + /// Matches `attribute = # ! [ meta_item ]`. /// /// If `permit_inner` is `true`, then a leading `!` indicates an inner @@ -228,7 +237,7 @@ impl<'a> Parser<'a> { } token::DocComment(s) => { // We need to get the position of this token before we bump. - let attr = attr::mk_doc_comment(s, self.token.span); + let attr = self.mk_doc_comment(s); if attr.style == ast::AttrStyle::Inner { attrs.push(attr); self.bump(); diff --git a/src/libsyntax/parse/parser/diagnostics.rs b/src/libsyntax/parse/parser/diagnostics.rs index 49a517a5c44..26d7f48025e 100644 --- a/src/libsyntax/parse/parser/diagnostics.rs +++ b/src/libsyntax/parse/parser/diagnostics.rs @@ -1,18 +1,16 @@ -use super::{ - BlockMode, PathStyle, SemiColonMode, TokenType, TokenExpectType, - SeqSep, PResult, Parser -}; +use super::{BlockMode, PathStyle, SemiColonMode, TokenType, TokenExpectType, SeqSep, Parser}; use crate::ast::{ self, Param, BinOpKind, BindingMode, BlockCheckMode, Expr, ExprKind, Ident, Item, ItemKind, Mutability, Pat, PatKind, PathSegment, QSelf, Ty, TyKind, }; -use crate::parse::token::{self, TokenKind, token_can_begin_expr}; +use crate::token::{self, TokenKind, token_can_begin_expr}; use crate::print::pprust; use crate::ptr::P; use crate::symbol::{kw, sym}; use crate::ThinVec; use crate::util::parser::AssocOp; -use errors::{Applicability, DiagnosticBuilder, DiagnosticId, pluralize}; + +use errors::{PResult, Applicability, DiagnosticBuilder, DiagnosticId, pluralize}; use rustc_data_structures::fx::FxHashSet; use syntax_pos::{Span, DUMMY_SP, MultiSpan, SpanSnippetError}; use log::{debug, trace}; diff --git a/src/libsyntax/parse/parser/expr.rs b/src/libsyntax/parse/parser/expr.rs index 509e6482dcc..800074035ce 100644 --- a/src/libsyntax/parse/parser/expr.rs +++ b/src/libsyntax/parse/parser/expr.rs @@ -1,24 +1,23 @@ -use super::{Parser, PResult, Restrictions, PrevTokenKind, TokenType, PathStyle, BlockMode}; +use super::{Parser, Restrictions, PrevTokenKind, TokenType, PathStyle, BlockMode}; use super::{SemiColonMode, SeqSep, TokenExpectType}; use super::pat::{GateOr, PARAM_EXPECTED}; use super::diagnostics::Error; -use crate::parse::literal::LitError; - use crate::ast::{ self, DUMMY_NODE_ID, Attribute, AttrStyle, Ident, CaptureBy, BlockCheckMode, Expr, ExprKind, RangeLimits, Label, Movability, IsAsync, Arm, Ty, TyKind, FunctionRetTy, Param, FnDecl, BinOpKind, BinOp, UnOp, Mac, AnonConst, Field, Lit, }; use crate::maybe_recover_from_interpolated_ty_qpath; -use crate::parse::classify; -use crate::parse::token::{self, Token, TokenKind}; +use crate::token::{self, Token, TokenKind}; use crate::print::pprust; use crate::ptr::P; use crate::source_map::{self, Span}; +use crate::util::classify; +use crate::util::literal::LitError; use crate::util::parser::{AssocOp, Fixity, prec_let_scrutinee_needs_par}; -use errors::Applicability; +use errors::{PResult, Applicability}; use syntax_pos::symbol::{kw, sym}; use syntax_pos::Symbol; use std::mem; diff --git a/src/libsyntax/parse/parser/generics.rs b/src/libsyntax/parse/parser/generics.rs index 3c094750b4d..ae9ecd8fe39 100644 --- a/src/libsyntax/parse/parser/generics.rs +++ b/src/libsyntax/parse/parser/generics.rs @@ -1,11 +1,13 @@ -use super::{Parser, PResult}; +use super::Parser; use crate::ast::{self, WhereClause, GenericParam, GenericParamKind, GenericBounds, Attribute}; -use crate::parse::token; +use crate::token; use crate::source_map::DUMMY_SP; use syntax_pos::symbol::{kw, sym}; +use errors::PResult; + impl<'a> Parser<'a> { /// Parses bounds of a lifetime parameter `BOUND + BOUND + BOUND`, possibly with trailing `+`. /// diff --git a/src/libsyntax/parse/parser/item.rs b/src/libsyntax/parse/parser/item.rs index 9d543055f23..3c618d75d34 100644 --- a/src/libsyntax/parse/parser/item.rs +++ b/src/libsyntax/parse/parser/item.rs @@ -1,4 +1,4 @@ -use super::{Parser, PResult, PathStyle}; +use super::{Parser, PathStyle}; use super::diagnostics::{Error, dummy_arg, ConsumeClosingDelim}; use crate::maybe_whole; @@ -17,7 +17,7 @@ use crate::ThinVec; use log::debug; use std::mem; -use errors::{Applicability, DiagnosticBuilder, DiagnosticId, StashKey}; +use errors::{PResult, Applicability, DiagnosticBuilder, DiagnosticId, StashKey}; use syntax_pos::BytePos; /// Whether the type alias or associated type is a concrete type or an opaque type. diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser/mod.rs index 1284e89f195..455f4172f5f 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser/mod.rs @@ -15,10 +15,10 @@ use crate::ast::{ self, Abi, DUMMY_NODE_ID, AttrStyle, Attribute, CrateSugar, Ident, IsAsync, MacDelimiter, Mutability, StrStyle, Visibility, VisibilityKind, Unsafety, }; -use crate::parse::{PResult, Directory, DirectoryOwnership}; +use crate::parse::{Directory, DirectoryOwnership}; use crate::parse::lexer::UnmatchedBrace; -use crate::parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration}; -use crate::parse::token::{self, Token, TokenKind, DelimToken}; +use crate::util::comments::{doc_comment_style, strip_doc_comment_decoration}; +use crate::token::{self, Token, TokenKind, DelimToken}; use crate::print::pprust; use crate::ptr::P; use crate::sess::ParseSess; @@ -27,7 +27,7 @@ use crate::symbol::{kw, sym, Symbol}; use crate::tokenstream::{self, DelimSpan, TokenTree, TokenStream, TreeAndJoint}; use crate::ThinVec; -use errors::{Applicability, DiagnosticBuilder, DiagnosticId, FatalError}; +use errors::{PResult, Applicability, DiagnosticBuilder, DiagnosticId, FatalError}; use syntax_pos::{Span, BytePos, DUMMY_SP, FileName}; use log::debug; diff --git a/src/libsyntax/parse/parser/module.rs b/src/libsyntax/parse/parser/module.rs index 242a17659a0..72049daaed3 100644 --- a/src/libsyntax/parse/parser/module.rs +++ b/src/libsyntax/parse/parser/module.rs @@ -1,14 +1,16 @@ -use super::{Parser, PResult}; +use super::Parser; use super::item::ItemInfo; use super::diagnostics::Error; use crate::attr; use crate::ast::{self, Ident, Attribute, ItemKind, Mod, Crate}; use crate::parse::{new_sub_parser_from_file, DirectoryOwnership}; -use crate::parse::token::{self, TokenKind}; +use crate::token::{self, TokenKind}; use crate::source_map::{SourceMap, Span, DUMMY_SP, FileName}; use crate::symbol::sym; +use errors::PResult; + use std::path::{self, Path, PathBuf}; /// Information about the path to a module. diff --git a/src/libsyntax/parse/parser/pat.rs b/src/libsyntax/parse/parser/pat.rs index cc8738edff7..f347300da71 100644 --- a/src/libsyntax/parse/parser/pat.rs +++ b/src/libsyntax/parse/parser/pat.rs @@ -1,16 +1,16 @@ -use super::{Parser, PResult, PathStyle}; +use super::{Parser, PathStyle}; use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole}; use crate::ptr::P; use crate::ast::{self, Attribute, Pat, PatKind, FieldPat, RangeEnd, RangeSyntax, Mac}; use crate::ast::{BindingMode, Ident, Mutability, Path, QSelf, Expr, ExprKind}; use crate::mut_visit::{noop_visit_pat, noop_visit_mac, MutVisitor}; -use crate::parse::token::{self}; +use crate::token; use crate::print::pprust; use crate::source_map::{respan, Span, Spanned}; use crate::ThinVec; use syntax_pos::symbol::{kw, sym}; -use errors::{Applicability, DiagnosticBuilder}; +use errors::{PResult, Applicability, DiagnosticBuilder}; type Expected = Option<&'static str>; diff --git a/src/libsyntax/parse/parser/path.rs b/src/libsyntax/parse/parser/path.rs index 4438d61d9ee..9ceb3ba1eb4 100644 --- a/src/libsyntax/parse/parser/path.rs +++ b/src/libsyntax/parse/parser/path.rs @@ -1,15 +1,15 @@ -use super::{Parser, PResult, TokenType}; +use super::{Parser, TokenType}; use crate::{maybe_whole, ThinVec}; use crate::ast::{self, QSelf, Path, PathSegment, Ident, ParenthesizedArgs, AngleBracketedArgs}; use crate::ast::{AnonConst, GenericArg, AssocTyConstraint, AssocTyConstraintKind, BlockCheckMode}; -use crate::parse::token::{self, Token}; +use crate::token::{self, Token}; use crate::source_map::{Span, BytePos}; use syntax_pos::symbol::{kw, sym}; use std::mem; use log::debug; -use errors::{Applicability, pluralize}; +use errors::{PResult, Applicability, pluralize}; /// Specifies how to parse a path. #[derive(Copy, Clone, PartialEq)] diff --git a/src/libsyntax/parse/parser/stmt.rs b/src/libsyntax/parse/parser/stmt.rs index 12c530f3cbb..30e47b7a0b2 100644 --- a/src/libsyntax/parse/parser/stmt.rs +++ b/src/libsyntax/parse/parser/stmt.rs @@ -1,4 +1,4 @@ -use super::{Parser, PResult, Restrictions, PrevTokenKind, SemiColonMode, BlockMode}; +use super::{Parser, Restrictions, PrevTokenKind, SemiColonMode, BlockMode}; use super::expr::LhsExpr; use super::path::PathStyle; use super::pat::GateOr; @@ -8,13 +8,14 @@ use crate::ptr::P; use crate::{maybe_whole, ThinVec}; use crate::ast::{self, DUMMY_NODE_ID, Stmt, StmtKind, Local, Block, BlockCheckMode, Expr, ExprKind}; use crate::ast::{Attribute, AttrStyle, VisibilityKind, MacStmtStyle, Mac, MacDelimiter}; -use crate::parse::{classify, DirectoryOwnership}; -use crate::parse::token; +use crate::parse::DirectoryOwnership; +use crate::util::classify; +use crate::token; use crate::source_map::{respan, Span}; use crate::symbol::{kw, sym}; use std::mem; -use errors::Applicability; +use errors::{PResult, Applicability}; impl<'a> Parser<'a> { /// Parses a statement. This stops just before trailing semicolons on everything but items. diff --git a/src/libsyntax/parse/parser/ty.rs b/src/libsyntax/parse/parser/ty.rs index b770b90705c..a891634e611 100644 --- a/src/libsyntax/parse/parser/ty.rs +++ b/src/libsyntax/parse/parser/ty.rs @@ -1,4 +1,4 @@ -use super::{Parser, PResult, PathStyle, PrevTokenKind, TokenType}; +use super::{Parser, PathStyle, PrevTokenKind, TokenType}; use super::item::ParamCfg; use crate::{maybe_whole, maybe_recover_from_interpolated_ty_qpath}; @@ -6,11 +6,11 @@ use crate::ptr::P; use crate::ast::{self, Ty, TyKind, MutTy, BareFnTy, FunctionRetTy, GenericParam, Lifetime, Ident}; use crate::ast::{TraitBoundModifier, TraitObjectSyntax, GenericBound, GenericBounds, PolyTraitRef}; use crate::ast::{Mutability, AnonConst, Mac}; -use crate::parse::token::{self, Token}; +use crate::token::{self, Token}; use crate::source_map::Span; use crate::symbol::{kw}; -use errors::{Applicability, pluralize}; +use errors::{PResult, Applicability, pluralize}; /// Returns `true` if `IDENT t` can start a type -- `IDENT::a::b`, `IDENT<u8, u8>`, /// `IDENT<<u8 as Trait>::AssocTy>`. diff --git a/src/libsyntax/parse/tests.rs b/src/libsyntax/parse/tests.rs index 169eb954efa..27ca2b6472f 100644 --- a/src/libsyntax/parse/tests.rs +++ b/src/libsyntax/parse/tests.rs @@ -2,9 +2,9 @@ use super::*; use crate::ast::{self, Name, PatKind}; use crate::attr::first_attr_value_str_by_name; -use crate::parse::{ParseSess, PResult}; -use crate::parse::new_parser_from_source_str; -use crate::parse::token::Token; +use crate::sess::ParseSess; +use crate::parse::{PResult, new_parser_from_source_str}; +use crate::token::Token; use crate::print::pprust::item_to_string; use crate::ptr::P; use crate::source_map::FilePathMapping; diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs deleted file mode 100644 index 6f3da344ccf..00000000000 --- a/src/libsyntax/parse/token.rs +++ /dev/null @@ -1,728 +0,0 @@ -pub use BinOpToken::*; -pub use Nonterminal::*; -pub use DelimToken::*; -pub use LitKind::*; -pub use TokenKind::*; - -use crate::ast; -use crate::ptr::P; -use crate::symbol::kw; -use crate::tokenstream::TokenTree; - -use syntax_pos::symbol::Symbol; -use syntax_pos::{self, Span, DUMMY_SP}; - -use std::fmt; -use std::mem; -#[cfg(target_arch = "x86_64")] -use rustc_data_structures::static_assert_size; -use rustc_data_structures::sync::Lrc; - -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] -pub enum BinOpToken { - Plus, - Minus, - Star, - Slash, - Percent, - Caret, - And, - Or, - Shl, - Shr, -} - -/// A delimiter token. -#[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] -pub enum DelimToken { - /// A round parenthesis (i.e., `(` or `)`). - Paren, - /// A square bracket (i.e., `[` or `]`). - Bracket, - /// A curly brace (i.e., `{` or `}`). - Brace, - /// An empty delimiter. - NoDelim, -} - -impl DelimToken { - pub fn len(self) -> usize { - if self == NoDelim { 0 } else { 1 } - } - - pub fn is_empty(self) -> bool { - self == NoDelim - } -} - -#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)] -pub enum LitKind { - Bool, // AST only, must never appear in a `Token` - Byte, - Char, - Integer, - Float, - Str, - StrRaw(u16), // raw string delimited by `n` hash symbols - ByteStr, - ByteStrRaw(u16), // raw byte string delimited by `n` hash symbols - Err, -} - -/// A literal token. -#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)] -pub struct Lit { - pub kind: LitKind, - pub symbol: Symbol, - pub suffix: Option<Symbol>, -} - -impl fmt::Display for Lit { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Lit { kind, symbol, suffix } = *self; - match kind { - Byte => write!(f, "b'{}'", symbol)?, - Char => write!(f, "'{}'", symbol)?, - Str => write!(f, "\"{}\"", symbol)?, - StrRaw(n) => write!(f, "r{delim}\"{string}\"{delim}", - delim="#".repeat(n as usize), - string=symbol)?, - ByteStr => write!(f, "b\"{}\"", symbol)?, - ByteStrRaw(n) => write!(f, "br{delim}\"{string}\"{delim}", - delim="#".repeat(n as usize), - string=symbol)?, - Integer | - Float | - Bool | - Err => write!(f, "{}", symbol)?, - } - - if let Some(suffix) = suffix { - write!(f, "{}", suffix)?; - } - - Ok(()) - } -} - -impl LitKind { - /// An English article for the literal token kind. - crate fn article(self) -> &'static str { - match self { - Integer | Err => "an", - _ => "a", - } - } - - crate fn descr(self) -> &'static str { - match self { - Bool => panic!("literal token contains `Lit::Bool`"), - Byte => "byte", - Char => "char", - Integer => "integer", - Float => "float", - Str | StrRaw(..) => "string", - ByteStr | ByteStrRaw(..) => "byte string", - Err => "error", - } - } - - crate fn may_have_suffix(self) -> bool { - match self { - Integer | Float | Err => true, - _ => false, - } - } -} - -impl Lit { - pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit { - Lit { kind, symbol, suffix } - } -} - -pub(crate) fn ident_can_begin_expr(name: ast::Name, span: Span, is_raw: bool) -> bool { - let ident_token = Token::new(Ident(name, is_raw), span); - token_can_begin_expr(&ident_token) -} - -pub(crate) fn token_can_begin_expr(ident_token: &Token) -> bool { - !ident_token.is_reserved_ident() || - ident_token.is_path_segment_keyword() || - match ident_token.kind { - TokenKind::Ident(ident, _) => [ - kw::Async, - kw::Do, - kw::Box, - kw::Break, - kw::Continue, - kw::False, - kw::For, - kw::If, - kw::Let, - kw::Loop, - kw::Match, - kw::Move, - kw::Return, - kw::True, - kw::Unsafe, - kw::While, - kw::Yield, - kw::Static, - ].contains(&ident), - _=> false, - } -} - -fn ident_can_begin_type(name: ast::Name, span: Span, is_raw: bool) -> bool { - let ident_token = Token::new(Ident(name, is_raw), span); - - !ident_token.is_reserved_ident() || - ident_token.is_path_segment_keyword() || - [ - kw::Underscore, - kw::For, - kw::Impl, - kw::Fn, - kw::Unsafe, - kw::Extern, - kw::Typeof, - kw::Dyn, - ].contains(&name) -} - -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)] -pub enum TokenKind { - /* Expression-operator symbols. */ - Eq, - Lt, - Le, - EqEq, - Ne, - Ge, - Gt, - AndAnd, - OrOr, - Not, - Tilde, - BinOp(BinOpToken), - BinOpEq(BinOpToken), - - /* Structural symbols */ - At, - Dot, - DotDot, - DotDotDot, - DotDotEq, - Comma, - Semi, - Colon, - ModSep, - RArrow, - LArrow, - FatArrow, - Pound, - Dollar, - Question, - /// Used by proc macros for representing lifetimes, not generated by lexer right now. - SingleQuote, - /// An opening delimiter (e.g., `{`). - OpenDelim(DelimToken), - /// A closing delimiter (e.g., `}`). - CloseDelim(DelimToken), - - /* Literals */ - Literal(Lit), - - /* Name components */ - Ident(ast::Name, /* is_raw */ bool), - Lifetime(ast::Name), - - Interpolated(Lrc<Nonterminal>), - - // Can be expanded into several tokens. - /// A doc comment. - DocComment(ast::Name), - - // Junk. These carry no data because we don't really care about the data - // they *would* carry, and don't really want to allocate a new ident for - // them. Instead, users could extract that from the associated span. - - /// Whitespace. - Whitespace, - /// A comment. - Comment, - Shebang(ast::Name), - /// A completely invalid token which should be skipped. - Unknown(ast::Name), - - Eof, -} - -// `TokenKind` is used a lot. Make sure it doesn't unintentionally get bigger. -#[cfg(target_arch = "x86_64")] -static_assert_size!(TokenKind, 16); - -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)] -pub struct Token { - pub kind: TokenKind, - pub span: Span, -} - -impl TokenKind { - pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind { - Literal(Lit::new(kind, symbol, suffix)) - } - - /// Returns tokens that are likely to be typed accidentally instead of the current token. - /// Enables better error recovery when the wrong token is found. - crate fn similar_tokens(&self) -> Option<Vec<TokenKind>> { - match *self { - Comma => Some(vec![Dot, Lt, Semi]), - Semi => Some(vec![Colon, Comma]), - _ => None - } - } -} - -impl Token { - pub fn new(kind: TokenKind, span: Span) -> Self { - Token { kind, span } - } - - /// Some token that will be thrown away later. - crate fn dummy() -> Self { - Token::new(TokenKind::Whitespace, DUMMY_SP) - } - - /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary. - pub fn from_ast_ident(ident: ast::Ident) -> Self { - Token::new(Ident(ident.name, ident.is_raw_guess()), ident.span) - } - - /// Return this token by value and leave a dummy token in its place. - pub fn take(&mut self) -> Self { - mem::replace(self, Token::dummy()) - } - - crate fn is_op(&self) -> bool { - match self.kind { - OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | - Ident(..) | Lifetime(..) | Interpolated(..) | - Whitespace | Comment | Shebang(..) | Eof => false, - _ => true, - } - } - - crate fn is_like_plus(&self) -> bool { - match self.kind { - BinOp(Plus) | BinOpEq(Plus) => true, - _ => false, - } - } - - /// Returns `true` if the token can appear at the start of an expression. - pub fn can_begin_expr(&self) -> bool { - match self.kind { - Ident(name, is_raw) => - ident_can_begin_expr(name, self.span, is_raw), // value name or keyword - OpenDelim(..) | // tuple, array or block - Literal(..) | // literal - Not | // operator not - BinOp(Minus) | // unary minus - BinOp(Star) | // dereference - BinOp(Or) | OrOr | // closure - BinOp(And) | // reference - AndAnd | // double reference - // DotDotDot is no longer supported, but we need some way to display the error - DotDot | DotDotDot | DotDotEq | // range notation - Lt | BinOp(Shl) | // associated path - ModSep | // global path - Lifetime(..) | // labeled loop - Pound => true, // expression attributes - Interpolated(ref nt) => match **nt { - NtLiteral(..) | - NtIdent(..) | - NtExpr(..) | - NtBlock(..) | - NtPath(..) | - NtLifetime(..) => true, - _ => false, - }, - _ => false, - } - } - - /// Returns `true` if the token can appear at the start of a type. - pub fn can_begin_type(&self) -> bool { - match self.kind { - Ident(name, is_raw) => - ident_can_begin_type(name, self.span, is_raw), // type name or keyword - OpenDelim(Paren) | // tuple - OpenDelim(Bracket) | // array - Not | // never - BinOp(Star) | // raw pointer - BinOp(And) | // reference - AndAnd | // double reference - Question | // maybe bound in trait object - Lifetime(..) | // lifetime bound in trait object - Lt | BinOp(Shl) | // associated path - ModSep => true, // global path - Interpolated(ref nt) => match **nt { - NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true, - _ => false, - }, - _ => false, - } - } - - /// Returns `true` if the token can appear at the start of a const param. - crate fn can_begin_const_arg(&self) -> bool { - match self.kind { - OpenDelim(Brace) => true, - Interpolated(ref nt) => match **nt { - NtExpr(..) | NtBlock(..) | NtLiteral(..) => true, - _ => false, - } - _ => self.can_begin_literal_or_bool(), - } - } - - /// Returns `true` if the token can appear at the start of a generic bound. - crate fn can_begin_bound(&self) -> bool { - self.is_path_start() || self.is_lifetime() || self.is_keyword(kw::For) || - self == &Question || self == &OpenDelim(Paren) - } - - /// Returns `true` if the token is any literal - pub fn is_lit(&self) -> bool { - match self.kind { - Literal(..) => true, - _ => false, - } - } - - /// Returns `true` if the token is any literal, a minus (which can prefix a literal, - /// for example a '-42', or one of the boolean idents). - pub fn can_begin_literal_or_bool(&self) -> bool { - match self.kind { - Literal(..) | BinOp(Minus) => true, - Ident(name, false) if name.is_bool_lit() => true, - Interpolated(ref nt) => match **nt { - NtLiteral(..) => true, - _ => false, - }, - _ => false, - } - } - - /// Returns an identifier if this token is an identifier. - pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> { - match self.kind { - Ident(name, is_raw) => Some((ast::Ident::new(name, self.span), is_raw)), - Interpolated(ref nt) => match **nt { - NtIdent(ident, is_raw) => Some((ident, is_raw)), - _ => None, - }, - _ => None, - } - } - - /// Returns a lifetime identifier if this token is a lifetime. - pub fn lifetime(&self) -> Option<ast::Ident> { - match self.kind { - Lifetime(name) => Some(ast::Ident::new(name, self.span)), - Interpolated(ref nt) => match **nt { - NtLifetime(ident) => Some(ident), - _ => None, - }, - _ => None, - } - } - - /// Returns `true` if the token is an identifier. - pub fn is_ident(&self) -> bool { - self.ident().is_some() - } - - /// Returns `true` if the token is a lifetime. - crate fn is_lifetime(&self) -> bool { - self.lifetime().is_some() - } - - /// Returns `true` if the token is a identifier whose name is the given - /// string slice. - crate fn is_ident_named(&self, name: Symbol) -> bool { - self.ident().map_or(false, |(ident, _)| ident.name == name) - } - - /// Returns `true` if the token is an interpolated path. - fn is_path(&self) -> bool { - if let Interpolated(ref nt) = self.kind { - if let NtPath(..) = **nt { - return true; - } - } - false - } - - /// Would `maybe_whole_expr` in `parser.rs` return `Ok(..)`? - /// That is, is this a pre-parsed expression dropped into the token stream - /// (which happens while parsing the result of macro expansion)? - crate fn is_whole_expr(&self) -> bool { - if let Interpolated(ref nt) = self.kind { - if let NtExpr(_) | NtLiteral(_) | NtPath(_) | NtIdent(..) | NtBlock(_) = **nt { - return true; - } - } - - false - } - - /// Returns `true` if the token is either the `mut` or `const` keyword. - crate fn is_mutability(&self) -> bool { - self.is_keyword(kw::Mut) || - self.is_keyword(kw::Const) - } - - crate fn is_qpath_start(&self) -> bool { - self == &Lt || self == &BinOp(Shl) - } - - crate fn is_path_start(&self) -> bool { - self == &ModSep || self.is_qpath_start() || self.is_path() || - self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident() - } - - /// Returns `true` if the token is a given keyword, `kw`. - pub fn is_keyword(&self, kw: Symbol) -> bool { - self.is_non_raw_ident_where(|id| id.name == kw) - } - - crate fn is_path_segment_keyword(&self) -> bool { - self.is_non_raw_ident_where(ast::Ident::is_path_segment_keyword) - } - - // Returns true for reserved identifiers used internally for elided lifetimes, - // unnamed method parameters, crate root module, error recovery etc. - crate fn is_special_ident(&self) -> bool { - self.is_non_raw_ident_where(ast::Ident::is_special) - } - - /// Returns `true` if the token is a keyword used in the language. - crate fn is_used_keyword(&self) -> bool { - self.is_non_raw_ident_where(ast::Ident::is_used_keyword) - } - - /// Returns `true` if the token is a keyword reserved for possible future use. - crate fn is_unused_keyword(&self) -> bool { - self.is_non_raw_ident_where(ast::Ident::is_unused_keyword) - } - - /// Returns `true` if the token is either a special identifier or a keyword. - pub fn is_reserved_ident(&self) -> bool { - self.is_non_raw_ident_where(ast::Ident::is_reserved) - } - - /// Returns `true` if the token is the identifier `true` or `false`. - crate fn is_bool_lit(&self) -> bool { - self.is_non_raw_ident_where(|id| id.name.is_bool_lit()) - } - - /// Returns `true` if the token is a non-raw identifier for which `pred` holds. - fn is_non_raw_ident_where(&self, pred: impl FnOnce(ast::Ident) -> bool) -> bool { - match self.ident() { - Some((id, false)) => pred(id), - _ => false, - } - } - - crate fn glue(&self, joint: &Token) -> Option<Token> { - let kind = match self.kind { - Eq => match joint.kind { - Eq => EqEq, - Gt => FatArrow, - _ => return None, - }, - Lt => match joint.kind { - Eq => Le, - Lt => BinOp(Shl), - Le => BinOpEq(Shl), - BinOp(Minus) => LArrow, - _ => return None, - }, - Gt => match joint.kind { - Eq => Ge, - Gt => BinOp(Shr), - Ge => BinOpEq(Shr), - _ => return None, - }, - Not => match joint.kind { - Eq => Ne, - _ => return None, - }, - BinOp(op) => match joint.kind { - Eq => BinOpEq(op), - BinOp(And) if op == And => AndAnd, - BinOp(Or) if op == Or => OrOr, - Gt if op == Minus => RArrow, - _ => return None, - }, - Dot => match joint.kind { - Dot => DotDot, - DotDot => DotDotDot, - _ => return None, - }, - DotDot => match joint.kind { - Dot => DotDotDot, - Eq => DotDotEq, - _ => return None, - }, - Colon => match joint.kind { - Colon => ModSep, - _ => return None, - }, - SingleQuote => match joint.kind { - Ident(name, false) => Lifetime(Symbol::intern(&format!("'{}", name))), - _ => return None, - }, - - Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot | - DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar | - Question | OpenDelim(..) | CloseDelim(..) | - Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) | - Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None, - }; - - Some(Token::new(kind, self.span.to(joint.span))) - } - - // See comments in `Nonterminal::to_tokenstream` for why we care about - // *probably* equal here rather than actual equality - crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool { - if mem::discriminant(&self.kind) != mem::discriminant(&other.kind) { - return false - } - match (&self.kind, &other.kind) { - (&Eq, &Eq) | - (&Lt, &Lt) | - (&Le, &Le) | - (&EqEq, &EqEq) | - (&Ne, &Ne) | - (&Ge, &Ge) | - (&Gt, &Gt) | - (&AndAnd, &AndAnd) | - (&OrOr, &OrOr) | - (&Not, &Not) | - (&Tilde, &Tilde) | - (&At, &At) | - (&Dot, &Dot) | - (&DotDot, &DotDot) | - (&DotDotDot, &DotDotDot) | - (&DotDotEq, &DotDotEq) | - (&Comma, &Comma) | - (&Semi, &Semi) | - (&Colon, &Colon) | - (&ModSep, &ModSep) | - (&RArrow, &RArrow) | - (&LArrow, &LArrow) | - (&FatArrow, &FatArrow) | - (&Pound, &Pound) | - (&Dollar, &Dollar) | - (&Question, &Question) | - (&Whitespace, &Whitespace) | - (&Comment, &Comment) | - (&Eof, &Eof) => true, - - (&BinOp(a), &BinOp(b)) | - (&BinOpEq(a), &BinOpEq(b)) => a == b, - - (&OpenDelim(a), &OpenDelim(b)) | - (&CloseDelim(a), &CloseDelim(b)) => a == b, - - (&DocComment(a), &DocComment(b)) | - (&Shebang(a), &Shebang(b)) => a == b, - - (&Literal(a), &Literal(b)) => a == b, - - (&Lifetime(a), &Lifetime(b)) => a == b, - (&Ident(a, b), &Ident(c, d)) => b == d && (a == c || - a == kw::DollarCrate || - c == kw::DollarCrate), - - (&Interpolated(_), &Interpolated(_)) => false, - - _ => panic!("forgot to add a token?"), - } - } -} - -impl PartialEq<TokenKind> for Token { - fn eq(&self, rhs: &TokenKind) -> bool { - self.kind == *rhs - } -} - -#[derive(Clone, RustcEncodable, RustcDecodable)] -/// For interpolation during macro expansion. -pub enum Nonterminal { - NtItem(P<ast::Item>), - NtBlock(P<ast::Block>), - NtStmt(ast::Stmt), - NtPat(P<ast::Pat>), - NtExpr(P<ast::Expr>), - NtTy(P<ast::Ty>), - NtIdent(ast::Ident, /* is_raw */ bool), - NtLifetime(ast::Ident), - NtLiteral(P<ast::Expr>), - /// Stuff inside brackets for attributes - NtMeta(ast::AttrItem), - NtPath(ast::Path), - NtVis(ast::Visibility), - NtTT(TokenTree), - // Used only for passing items to proc macro attributes (they are not - // strictly necessary for that, `Annotatable` can be converted into - // tokens directly, but doing that naively regresses pretty-printing). - NtTraitItem(ast::TraitItem), - NtImplItem(ast::ImplItem), - NtForeignItem(ast::ForeignItem), -} - -impl PartialEq for Nonterminal { - fn eq(&self, rhs: &Self) -> bool { - match (self, rhs) { - (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) => - ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs, - (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs, - (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs, - // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them - // correctly based on data from AST. This will prevent them from matching each other - // in macros. The comparison will become possible only when each nonterminal has an - // attached token stream from which it was parsed. - _ => false, - } - } -} - -impl fmt::Debug for Nonterminal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - NtItem(..) => f.pad("NtItem(..)"), - NtBlock(..) => f.pad("NtBlock(..)"), - NtStmt(..) => f.pad("NtStmt(..)"), - NtPat(..) => f.pad("NtPat(..)"), - NtExpr(..) => f.pad("NtExpr(..)"), - NtTy(..) => f.pad("NtTy(..)"), - NtIdent(..) => f.pad("NtIdent(..)"), - NtLiteral(..) => f.pad("NtLiteral(..)"), - NtMeta(..) => f.pad("NtMeta(..)"), - NtPath(..) => f.pad("NtPath(..)"), - NtTT(..) => f.pad("NtTT(..)"), - NtImplItem(..) => f.pad("NtImplItem(..)"), - NtTraitItem(..) => f.pad("NtTraitItem(..)"), - NtForeignItem(..) => f.pad("NtForeignItem(..)"), - NtVis(..) => f.pad("NtVis(..)"), - NtLifetime(..) => f.pad("NtLifetime(..)"), - } - } -} |
