//! Code related to parsing literals. use crate::ast::{self, Ident, Lit, LitKind}; use crate::parse::parser::Parser; use crate::parse::PResult; use crate::parse::token::{self, Token}; use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; use crate::print::pprust; use crate::symbol::{keywords, Symbol}; use crate::tokenstream::{TokenStream, TokenTree}; use errors::{Applicability, Handler}; use log::debug; use rustc_data_structures::sync::Lrc; use syntax_pos::Span; use std::ascii; macro_rules! err { ($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => { match $opt_diag { Some(($span, $diag)) => { $($body)* } None => return None, } } } impl LitKind { /// Converts literal token with a suffix into a semantic literal. /// Works speculatively and may return `None` if diagnostic handler is not passed. /// If diagnostic handler is passed, always returns `Some`, /// possibly after reporting non-fatal errors and recovery. fn from_lit_token( lit: token::Lit, suf: Option, diag: Option<(Span, &Handler)> ) -> Option { if suf.is_some() && !lit.may_have_suffix() { err!(diag, |span, diag| { expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf) }); } Some(match lit { token::Bool(i) => { assert!(i == keywords::True.name() || i == keywords::False.name()); LitKind::Bool(i == keywords::True.name()) } token::Byte(i) => { match unescape_byte(&i.as_str()) { Ok(c) => LitKind::Byte(c), Err(_) => LitKind::Err(i), } }, token::Char(i) => { match unescape_char(&i.as_str()) { Ok(c) => LitKind::Char(c), Err(_) => LitKind::Err(i), } }, token::Err(i) => LitKind::Err(i), // There are some valid suffixes for integer and float literals, // so all the handling is done internally. token::Integer(s) => return integer_lit(&s.as_str(), suf, diag), token::Float(s) => return float_lit(&s.as_str(), suf, diag), token::Str_(mut sym) => { // If there are no characters requiring special treatment we can // reuse the symbol from the Token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the Token. let mut has_error = false; let s = &sym.as_str(); if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { let mut buf = String::with_capacity(s.len()); unescape_str(s, &mut |_, unescaped_char| { match unescaped_char { Ok(c) => buf.push(c), Err(_) => has_error = true, } }); if has_error { return Some(LitKind::Err(sym)); } sym = Symbol::intern(&buf) } LitKind::Str(sym, ast::StrStyle::Cooked) } token::StrRaw(mut sym, n) => { // Ditto. let s = &sym.as_str(); if s.contains('\r') { sym = Symbol::intern(&raw_str_lit(s)); } LitKind::Str(sym, ast::StrStyle::Raw(n)) } token::ByteStr(i) => { let s = &i.as_str(); let mut buf = Vec::with_capacity(s.len()); let mut has_error = false; unescape_byte_str(s, &mut |_, unescaped_byte| { match unescaped_byte { Ok(c) => buf.push(c), Err(_) => has_error = true, } }); if has_error { return Some(LitKind::Err(i)); } buf.shrink_to_fit(); LitKind::ByteStr(Lrc::new(buf)) } token::ByteStrRaw(i, _) => { LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())) } }) } /// Attempts to recover a token from semantic literal. /// This function is used when the original token doesn't exist (e.g. the literal is created /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). pub fn to_lit_token(&self) -> (token::Lit, Option) { match *self { LitKind::Str(string, ast::StrStyle::Cooked) => { let escaped = string.as_str().escape_default().to_string(); (token::Lit::Str_(Symbol::intern(&escaped)), None) } LitKind::Str(string, ast::StrStyle::Raw(n)) => { (token::Lit::StrRaw(string, n), None) } LitKind::ByteStr(ref bytes) => { let string = bytes.iter().cloned().flat_map(ascii::escape_default) .map(Into::::into).collect::(); (token::Lit::ByteStr(Symbol::intern(&string)), None) } LitKind::Byte(byte) => { let string: String = ascii::escape_default(byte).map(Into::::into).collect(); (token::Lit::Byte(Symbol::intern(&string)), None) } LitKind::Char(ch) => { let string: String = ch.escape_default().map(Into::::into).collect(); (token::Lit::Char(Symbol::intern(&string)), None) } LitKind::Int(n, ty) => { let suffix = match ty { ast::LitIntType::Unsigned(ty) => Some(Symbol::intern(ty.ty_to_string())), ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), ast::LitIntType::Unsuffixed => None, }; (token::Lit::Integer(Symbol::intern(&n.to_string())), suffix) } LitKind::Float(symbol, ty) => { (token::Lit::Float(symbol), Some(Symbol::intern(ty.ty_to_string()))) } LitKind::FloatUnsuffixed(symbol) => (token::Lit::Float(symbol), None), LitKind::Bool(value) => { let kw = if value { keywords::True } else { keywords::False }; (token::Lit::Bool(kw.name()), None) } LitKind::Err(val) => (token::Lit::Err(val), None), } } } impl Lit { /// Converts literal token with a suffix into an AST literal. /// Works speculatively and may return `None` if diagnostic handler is not passed. /// If diagnostic handler is passed, may return `Some`, /// possibly after reporting non-fatal errors and recovery, or `None` for irrecoverable errors. crate fn from_token( token: &token::Token, span: Span, diag: Option<(Span, &Handler)>, ) -> Option { let (token, suffix) = match *token { token::Ident(ident, false) if ident.name == keywords::True.name() || ident.name == keywords::False.name() => (token::Bool(ident.name), None), token::Literal(token, suffix) => (token, suffix), token::Interpolated(ref nt) => { if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { if let ast::ExprKind::Lit(lit) = &expr.node { return Some(lit.clone()); } } return None; } _ => return None, }; let node = LitKind::from_lit_token(token, suffix, diag)?; Some(Lit { node, token, suffix, span }) } /// Attempts to recover an AST literal from semantic literal. /// This function is used when the original token doesn't exist (e.g. the literal is created /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). pub fn from_lit_kind(node: LitKind, span: Span) -> Lit { let (token, suffix) = node.to_lit_token(); Lit { node, token, suffix, span } } /// Losslessly convert an AST literal into a token stream. crate fn tokens(&self) -> TokenStream { let token = match self.token { token::Bool(symbol) => Token::Ident(Ident::with_empty_ctxt(symbol), false), token => Token::Literal(token, self.suffix), }; TokenTree::Token(self.span, token).into() } } impl<'a> Parser<'a> { /// Matches `lit = true | false | token_lit`. crate fn parse_lit(&mut self) -> PResult<'a, Lit> { let diag = Some((self.span, &self.sess.span_diagnostic)); if let Some(lit) = Lit::from_token(&self.token, self.span, diag) { self.bump(); return Ok(lit); } else if self.token == token::Dot { // Recover `.4` as `0.4`. let recovered = self.look_ahead(1, |t| { if let token::Literal(token::Integer(val), suf) = *t { let next_span = self.look_ahead_span(1); if self.span.hi() == next_span.lo() { let sym = String::from("0.") + &val.as_str(); let token = token::Literal(token::Float(Symbol::intern(&sym)), suf); return Some((token, self.span.to(next_span))); } } None }); if let Some((token, span)) = recovered { self.diagnostic() .struct_span_err(span, "float literals must have an integer part") .span_suggestion( span, "must have an integer part", pprust::token_to_string(&token), Applicability::MachineApplicable, ) .emit(); let diag = Some((span, &self.sess.span_diagnostic)); if let Some(lit) = Lit::from_token(&token, span, diag) { self.bump(); self.bump(); return Ok(lit); } } } Err(self.span_fatal(self.span, &format!("unexpected token: {}", self.this_token_descr()))) } } crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option) { match suffix { None => {/* everything ok */} Some(suf) => { let text = suf.as_str(); if text.is_empty() { diag.span_bug(sp, "found empty literal suffix in Some") } let mut err = if kind == "a tuple index" && ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) { // #59553: warn instead of reject out of hand to allow the fix to percolate // through the ecosystem when people fix their macros let mut err = diag.struct_span_warn( sp, &format!("suffixes on {} are invalid", kind), ); err.note(&format!( "`{}` is *temporarily* accepted on tuple index fields as it was \ incorrectly accepted on stable for a few releases", text, )); err.help( "on proc macros, you'll want to use `syn::Index::from` or \ `proc_macro::Literal::*_unsuffixed` for code that will desugar \ to tuple field access", ); err.note( "for more context, see https://github.com/rust-lang/rust/issues/60210", ); err } else { diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) }; err.span_label(sp, format!("invalid suffix `{}`", text)); err.emit(); } } } /// Parses a string representing a raw string literal into its final form. The /// only operation this does is convert embedded CRLF into a single LF. fn raw_str_lit(lit: &str) -> String { debug!("raw_str_lit: given {}", lit.escape_default()); let mut res = String::with_capacity(lit.len()); let mut chars = lit.chars().peekable(); while let Some(c) = chars.next() { if c == '\r' { if *chars.peek().unwrap() != '\n' { panic!("lexer accepted bare CR"); } chars.next(); res.push('\n'); } else { res.push(c); } } res.shrink_to_fit(); res } // check if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) } fn filtered_float_lit(data: Symbol, suffix: Option, diag: Option<(Span, &Handler)>) -> Option { debug!("filtered_float_lit: {}, {:?}", data, suffix); let suffix = match suffix { Some(suffix) => suffix, None => return Some(LitKind::FloatUnsuffixed(data)), }; Some(match &*suffix.as_str() { "f32" => LitKind::Float(data, ast::FloatTy::F32), "f64" => LitKind::Float(data, ast::FloatTy::F64), suf => { err!(diag, |span, diag| { if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { // if it looks like a width, lets try to be helpful. let msg = format!("invalid width `{}` for float literal", &suf[1..]); diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit() } else { let msg = format!("invalid suffix `{}` for float literal", suf); diag.struct_span_err(span, &msg) .span_label(span, format!("invalid suffix `{}`", suf)) .help("valid suffixes are `f32` and `f64`") .emit(); } }); LitKind::FloatUnsuffixed(data) } }) } fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) -> Option { debug!("float_lit: {:?}, {:?}", s, suffix); // FIXME #2252: bounds checking float literals is deferred until trans // Strip underscores without allocating a new String unless necessary. let s2; let s = if s.chars().any(|c| c == '_') { s2 = s.chars().filter(|&c| c != '_').collect::(); &s2 } else { s }; filtered_float_lit(Symbol::intern(s), suffix, diag) } fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) -> Option { // s can only be ascii, byte indexing is fine // Strip underscores without allocating a new String unless necessary. let s2; let mut s = if s.chars().any(|c| c == '_') { s2 = s.chars().filter(|&c| c != '_').collect::(); &s2 } else { s }; debug!("integer_lit: {}, {:?}", s, suffix); let mut base = 10; let orig = s; let mut ty = ast::LitIntType::Unsuffixed; if s.starts_with('0') && s.len() > 1 { match s.as_bytes()[1] { b'x' => base = 16, b'o' => base = 8, b'b' => base = 2, _ => { } } } // 1f64 and 2f32 etc. are valid float literals. if let Some(suf) = suffix { if looks_like_width_suffix(&['f'], &suf.as_str()) { let err = match base { 16 => Some("hexadecimal float literal is not supported"), 8 => Some("octal float literal is not supported"), 2 => Some("binary float literal is not supported"), _ => None, }; if let Some(err) = err { err!(diag, |span, diag| { diag.struct_span_err(span, err) .span_label(span, "not supported") .emit(); }); } return filtered_float_lit(Symbol::intern(s), Some(suf), diag) } } if base != 10 { s = &s[2..]; } if let Some(suf) = suffix { if suf.as_str().is_empty() { err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some")); } ty = match &*suf.as_str() { "isize" => ast::LitIntType::Signed(ast::IntTy::Isize), "i8" => ast::LitIntType::Signed(ast::IntTy::I8), "i16" => ast::LitIntType::Signed(ast::IntTy::I16), "i32" => ast::LitIntType::Signed(ast::IntTy::I32), "i64" => ast::LitIntType::Signed(ast::IntTy::I64), "i128" => ast::LitIntType::Signed(ast::IntTy::I128), "usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize), "u8" => ast::LitIntType::Unsigned(ast::UintTy::U8), "u16" => ast::LitIntType::Unsigned(ast::UintTy::U16), "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32), "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64), "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128), suf => { // i and u look like widths, so lets // give an error message along those lines err!(diag, |span, diag| { if looks_like_width_suffix(&['i', 'u'], suf) { let msg = format!("invalid width `{}` for integer literal", &suf[1..]); diag.struct_span_err(span, &msg) .help("valid widths are 8, 16, 32, 64 and 128") .emit(); } else { let msg = format!("invalid suffix `{}` for numeric literal", suf); diag.struct_span_err(span, &msg) .span_label(span, format!("invalid suffix `{}`", suf)) .help("the suffix must be one of the integral types \ (`u32`, `isize`, etc)") .emit(); } }); ty } } } debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \ string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); Some(match u128::from_str_radix(s, base) { Ok(r) => LitKind::Int(r, ty), Err(_) => { // small bases are lexed as if they were base 10, e.g, the string // might be `0b10201`. This will cause the conversion above to fail, // but these cases have errors in the lexer: we don't want to emit // two errors, and we especially don't want to emit this error since // it isn't necessarily true. let already_errored = base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); if !already_errored { err!(diag, |span, diag| diag.span_err(span, "int literal is too large")); } LitKind::Int(0, ty) } }) }