diff options
Diffstat (limited to 'compiler')
29 files changed, 712 insertions, 336 deletions
diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index b5dba0713bf..e3ac8a8784a 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1821,6 +1821,8 @@ pub enum LitKind { /// A byte string (`b"foo"`). Not stored as a symbol because it might be /// non-utf8, and symbols only allow utf8 strings. ByteStr(Lrc<[u8]>, StrStyle), + /// A C String (`c"foo"`). Guaranteed to only have `\0` at the end. + CStr(Lrc<[u8]>, StrStyle), /// A byte char (`b'f'`). Byte(u8), /// A character literal (`'a'`). @@ -1875,6 +1877,7 @@ impl LitKind { // unsuffixed variants LitKind::Str(..) | LitKind::ByteStr(..) + | LitKind::CStr(..) | LitKind::Byte(..) | LitKind::Char(..) | LitKind::Int(_, LitIntType::Unsuffixed) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index f947ae4d057..42b843482a3 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -74,6 +74,8 @@ pub enum LitKind { StrRaw(u8), // raw string delimited by `n` hash symbols ByteStr, ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols + CStr, + CStrRaw(u8), Err, } @@ -141,6 +143,10 @@ impl fmt::Display for Lit { delim = "#".repeat(n as usize), string = symbol )?, + CStr => write!(f, "c\"{symbol}\"")?, + CStrRaw(n) => { + write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))? + } Integer | Float | Bool | Err => write!(f, "{symbol}")?, } @@ -170,6 +176,7 @@ impl LitKind { Float => "float", Str | StrRaw(..) => "string", ByteStr | ByteStrRaw(..) => "byte string", + CStr | CStrRaw(..) => "C string", Err => "error", } } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 74b842ac96e..15a54fe13d0 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,9 +2,13 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; -use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; +use rustc_lexer::unescape::{ + byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, + Mode, +}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; +use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -35,6 +39,7 @@ pub enum LitError { InvalidFloatSuffix, NonDecimalFloat(u32), IntTooLarge(u32), + NulInCStr(Range<usize>), } impl LitKind { @@ -158,6 +163,52 @@ impl LitKind { LitKind::ByteStr(bytes.into(), StrStyle::Raw(n)) } + token::CStr => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_c_string(s, Mode::CStr, &mut |span, c| match c { + Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { + error = Err(LitError::NulInCStr(span)); + } + Ok(CStrUnit::Byte(b)) => buf.push(b), + Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), + Ok(CStrUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } + }); + error?; + buf.push(0); + LitKind::CStr(buf.into(), StrStyle::Cooked) + } + token::CStrRaw(n) => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { + Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { + error = Err(LitError::NulInCStr(span)); + } + Ok(CStrUnit::Byte(b)) => buf.push(b), + Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), + Ok(CStrUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } + }); + error?; + buf.push(0); + LitKind::CStr(buf.into(), StrStyle::Raw(n)) + } token::Err => LitKind::Err, }) } @@ -191,6 +242,14 @@ impl fmt::Display for LitKind { string = symbol )?; } + LitKind::CStr(ref bytes, StrStyle::Cooked) => { + write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))? + } + LitKind::CStr(ref bytes, StrStyle::Raw(n)) => { + // This can only be valid UTF-8. + let symbol = str::from_utf8(bytes).unwrap(); + write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?; + } LitKind::Int(n, ty) => { write!(f, "{n}")?; match ty { @@ -237,6 +296,8 @@ impl MetaItemLit { LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n), LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr, LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n), + LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr, + LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n), LitKind::Byte(_) => token::Byte, LitKind::Char(_) => token::Char, LitKind::Int(..) => token::Integer, diff --git a/compiler/rustc_ast_passes/src/feature_gate.rs b/compiler/rustc_ast_passes/src/feature_gate.rs index a46fe9e898f..b960671bf6e 100644 --- a/compiler/rustc_ast_passes/src/feature_gate.rs +++ b/compiler/rustc_ast_passes/src/feature_gate.rs @@ -572,6 +572,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session) { } }; } + gate_all!(c_str_literals, "`c\"..\"` literals are experimental"); gate_all!( if_let_guard, "`if let` guards are experimental", diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index ae346510ccc..3f80728a260 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -210,6 +210,10 @@ pub fn literal_to_string(lit: token::Lit) -> String { token::ByteStrRaw(n) => { format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol) } + token::CStr => format!("c\"{symbol}\""), + token::CStrRaw(n) => { + format!("cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize)) + } token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(), }; diff --git a/compiler/rustc_attr/src/builtin.rs b/compiler/rustc_attr/src/builtin.rs index d2d3792345f..2a3092d3c7b 100644 --- a/compiler/rustc_attr/src/builtin.rs +++ b/compiler/rustc_attr/src/builtin.rs @@ -5,6 +5,7 @@ use rustc_ast::{Attribute, LitKind, MetaItem, MetaItemKind, MetaItemLit, NestedM use rustc_ast_pretty::pprust; use rustc_feature::{find_gated_cfg, is_builtin_attr_name, Features, GatedCfg}; use rustc_macros::HashStable_Generic; +use rustc_session::config::ExpectedValues; use rustc_session::lint::builtin::UNEXPECTED_CFGS; use rustc_session::lint::BuiltinLintDiagnostics; use rustc_session::parse::{feature_err, ParseSess}; @@ -581,32 +582,32 @@ pub fn cfg_matches( ) -> bool { eval_condition(cfg, sess, features, &mut |cfg| { try_gate_cfg(cfg.name, cfg.span, sess, features); - if let Some(names_valid) = &sess.check_config.names_valid { - if !names_valid.contains(&cfg.name) { + match sess.check_config.expecteds.get(&cfg.name) { + Some(ExpectedValues::Some(values)) if !values.contains(&cfg.value) => { sess.buffer_lint_with_diagnostic( UNEXPECTED_CFGS, cfg.span, lint_node_id, - "unexpected `cfg` condition name", - BuiltinLintDiagnostics::UnexpectedCfg((cfg.name, cfg.name_span), None), + "unexpected `cfg` condition value", + BuiltinLintDiagnostics::UnexpectedCfgValue( + (cfg.name, cfg.name_span), + cfg.value.map(|v| (v, cfg.value_span.unwrap())), + ), ); } - } - if let Some(value) = cfg.value { - if let Some(values) = &sess.check_config.values_valid.get(&cfg.name) { - if !values.contains(&value) { - sess.buffer_lint_with_diagnostic( - UNEXPECTED_CFGS, - cfg.span, - lint_node_id, - "unexpected `cfg` condition value", - BuiltinLintDiagnostics::UnexpectedCfg( - (cfg.name, cfg.name_span), - cfg.value_span.map(|vs| (value, vs)), - ), - ); - } + None if sess.check_config.exhaustive_names => { + sess.buffer_lint_with_diagnostic( + UNEXPECTED_CFGS, + cfg.span, + lint_node_id, + "unexpected `cfg` condition name", + BuiltinLintDiagnostics::UnexpectedCfgName( + (cfg.name, cfg.name_span), + cfg.value.map(|v| (v, cfg.value_span.unwrap())), + ), + ); } + _ => { /* not unexpected */ } } sess.config.contains(&(cfg.name, cfg.value)) }) diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index b92964d03e9..50e88ae2eee 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -32,6 +32,10 @@ pub fn expand_concat( Ok(ast::LitKind::Bool(b)) => { accumulator.push_str(&b.to_string()); } + Ok(ast::LitKind::CStr(..)) => { + cx.span_err(e.span, "cannot concatenate a C string literal"); + has_errors = true; + } Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { cx.emit_err(errors::ConcatBytestr { span: e.span }); has_errors = true; diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index ba639c0a9fe..5ef35af0a05 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -18,6 +18,11 @@ fn invalid_type_err( }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); match ast::LitKind::from_token_lit(token_lit) { + Ok(ast::LitKind::CStr(_, _)) => { + // FIXME(c_str_literals): should concatenation of C string literals + // include the null bytes in the end? + cx.span_err(span, "cannot concatenate C string literals"); + } Ok(ast::LitKind::Char(_)) => { let sugg = snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet }); diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index c95148013eb..53d97f35201 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2249,7 +2249,7 @@ extern "C" { pub fn LLVMRustHasFeature(T: &TargetMachine, s: *const c_char) -> bool; - pub fn LLVMRustPrintTargetCPUs(T: &TargetMachine); + pub fn LLVMRustPrintTargetCPUs(T: &TargetMachine, cpu: *const c_char); pub fn LLVMRustGetTargetFeaturesCount(T: &TargetMachine) -> size_t; pub fn LLVMRustGetTargetFeature( T: &TargetMachine, diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 46692fd5e8b..2fbdab9f8ce 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -329,7 +329,14 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) { require_inited(); let tm = create_informational_target_machine(sess); match req { - PrintRequest::TargetCPUs => unsafe { llvm::LLVMRustPrintTargetCPUs(tm) }, + PrintRequest::TargetCPUs => { + // SAFETY generate a C compatible string from a byte slice to pass + // the target CPU name into LLVM, the lifetime of the reference is + // at least as long as the C function + let cpu_cstring = CString::new(handle_native(sess.target.cpu.as_ref())) + .unwrap_or_else(|e| bug!("failed to convert to cstring: {}", e)); + unsafe { llvm::LLVMRustPrintTargetCPUs(tm, cpu_cstring.as_ptr()) }; + } PrintRequest::TargetFeatures => print_target_features(sess, tm), _ => bug!("rustc_codegen_llvm can't handle print request: {:?}", req), } diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 1e7d07bc22d..891e84a2f30 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind { token::StrRaw(n) => LitKind::StrRaw(n), token::ByteStr => LitKind::ByteStr, token::ByteStrRaw(n) => LitKind::ByteStrRaw(n), + token::CStr => LitKind::CStr, + token::CStrRaw(n) => LitKind::CStrRaw(n), token::Err => LitKind::Err, token::Bool => unreachable!(), } @@ -78,6 +80,8 @@ impl ToInternal<token::LitKind> for LitKind { LitKind::StrRaw(n) => token::StrRaw(n), LitKind::ByteStr => token::ByteStr, LitKind::ByteStrRaw(n) => token::ByteStrRaw(n), + LitKind::CStr => token::CStr, + LitKind::CStrRaw(n) => token::CStrRaw(n), LitKind::Err => token::Err, } } @@ -436,6 +440,8 @@ impl server::FreeFunctions for Rustc<'_, '_> { | token::LitKind::StrRaw(_) | token::LitKind::ByteStr | token::LitKind::ByteStrRaw(_) + | token::LitKind::CStr + | token::LitKind::CStrRaw(_) | token::LitKind::Err => return Err(()), token::LitKind::Integer | token::LitKind::Float => {} } diff --git a/compiler/rustc_feature/src/active.rs b/compiler/rustc_feature/src/active.rs index dc5dc4608a0..7e7df0e9584 100644 --- a/compiler/rustc_feature/src/active.rs +++ b/compiler/rustc_feature/src/active.rs @@ -313,6 +313,8 @@ declare_features! ( (active, async_closure, "1.37.0", Some(62290), None), /// Allows async functions to be declared, implemented, and used in traits. (active, async_fn_in_trait, "1.66.0", Some(91611), None), + /// Allows `c"foo"` literals. + (active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None), /// Treat `extern "C"` function as nounwind. (active, c_unwind, "1.52.0", Some(74990), None), /// Allows using C-variadics. diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs index 0a8bd1d6123..1f08befb180 100644 --- a/compiler/rustc_hir/src/lang_items.rs +++ b/compiler/rustc_hir/src/lang_items.rs @@ -333,6 +333,7 @@ language_item_table! { RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None; String, sym::String, string, Target::Struct, GenericRequirement::None; + CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None; } pub enum GenericRequirement { diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index 78bd489a44b..4b8fc7303a2 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { opt_ty.unwrap_or_else(|| self.next_float_var()) } ast::LitKind::Bool(_) => tcx.types.bool, + ast::LitKind::CStr(_, _) => tcx.mk_imm_ref( + tcx.lifetimes.re_static, + tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span))) + .skip_binder(), + ), ast::LitKind::Err => tcx.ty_error_misc(), } } diff --git a/compiler/rustc_interface/src/interface.rs b/compiler/rustc_interface/src/interface.rs index 8e9150ba8ad..9d9f4ee13f4 100644 --- a/compiler/rustc_interface/src/interface.rs +++ b/compiler/rustc_interface/src/interface.rs @@ -9,11 +9,12 @@ use rustc_data_structures::OnDrop; use rustc_errors::registry::Registry; use rustc_errors::{ErrorGuaranteed, Handler}; use rustc_lint::LintStore; -use rustc_middle::ty; +use rustc_middle::{bug, ty}; use rustc_parse::maybe_new_parser_from_source_str; use rustc_query_impl::QueryCtxt; use rustc_query_system::query::print_query_stack; -use rustc_session::config::{self, CheckCfg, ErrorOutputType, Input, OutputFilenames}; +use rustc_session::config::{self, ErrorOutputType, Input, OutputFilenames}; +use rustc_session::config::{CheckCfg, ExpectedValues}; use rustc_session::lint; use rustc_session::parse::{CrateConfig, ParseSess}; use rustc_session::Session; @@ -121,9 +122,9 @@ pub fn parse_cfgspecs(cfgspecs: Vec<String>) -> FxHashSet<(String, Option<String /// Converts strings provided as `--check-cfg [specs]` into a `CheckCfg`. pub fn parse_check_cfg(specs: Vec<String>) -> CheckCfg { rustc_span::create_default_session_if_not_set_then(move |_| { - let mut cfg = CheckCfg::default(); + let mut check_cfg = CheckCfg::default(); - 'specs: for s in specs { + for s in specs { let sess = ParseSess::with_silent_emitter(Some(format!( "this error occurred on the command line: `--check-cfg={s}`" ))); @@ -137,40 +138,54 @@ pub fn parse_check_cfg(specs: Vec<String>) -> CheckCfg { concat!("invalid `--check-cfg` argument: `{}` (", $reason, ")"), s ), - ); + ) }; } + let expected_error = || { + error!( + "expected `names(name1, name2, ... nameN)` or \ + `values(name, \"value1\", \"value2\", ... \"valueN\")`" + ) + }; + match maybe_new_parser_from_source_str(&sess, filename, s.to_string()) { Ok(mut parser) => match parser.parse_meta_item() { Ok(meta_item) if parser.token == token::Eof => { if let Some(args) = meta_item.meta_item_list() { if meta_item.has_name(sym::names) { - let names_valid = - cfg.names_valid.get_or_insert_with(|| FxHashSet::default()); + check_cfg.exhaustive_names = true; for arg in args { if arg.is_word() && arg.ident().is_some() { let ident = arg.ident().expect("multi-segment cfg key"); - names_valid.insert(ident.name.to_string()); + check_cfg + .expecteds + .entry(ident.name.to_string()) + .or_insert(ExpectedValues::Any); } else { error!("`names()` arguments must be simple identifiers"); } } - continue 'specs; } else if meta_item.has_name(sym::values) { if let Some((name, values)) = args.split_first() { if name.is_word() && name.ident().is_some() { let ident = name.ident().expect("multi-segment cfg key"); - let ident_values = cfg - .values_valid + let expected_values = check_cfg + .expecteds .entry(ident.name.to_string()) - .or_insert_with(|| FxHashSet::default()); + .or_insert_with(|| { + ExpectedValues::Some(FxHashSet::default()) + }); + + let ExpectedValues::Some(expected_values) = expected_values else { + bug!("shoudn't be possible") + }; for val in values { if let Some(LitKind::Str(s, _)) = val.lit().map(|lit| &lit.kind) { - ident_values.insert(s.to_string()); + expected_values.insert(Some(s.to_string())); } else { error!( "`values()` arguments must be string literals" @@ -178,35 +193,40 @@ pub fn parse_check_cfg(specs: Vec<String>) -> CheckCfg { } } - continue 'specs; + if values.is_empty() { + expected_values.insert(None); + } } else { error!( "`values()` first argument must be a simple identifier" ); } } else if args.is_empty() { - cfg.well_known_values = true; - continue 'specs; + check_cfg.exhaustive_values = true; + } else { + expected_error(); } + } else { + expected_error(); } + } else { + expected_error(); } } - Ok(..) => {} - Err(err) => err.cancel(), + Ok(..) => expected_error(), + Err(err) => { + err.cancel(); + expected_error(); + } }, - Err(errs) => drop(errs), + Err(errs) => { + drop(errs); + expected_error(); + } } - - error!( - "expected `names(name1, name2, ... nameN)` or \ - `values(name, \"value1\", \"value2\", ... \"valueN\")`" - ); } - if let Some(names_valid) = &mut cfg.names_valid { - names_valid.extend(cfg.values_valid.keys().cloned()); - } - cfg + check_cfg }) } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index b3f4b5cd5e5..c07dc19a0ac 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -186,12 +186,16 @@ pub enum LiteralKind { Str { terminated: bool }, /// "b"abc"", "b"abc" ByteStr { terminated: bool }, + /// `c"abc"`, `c"abc` + CStr { terminated: bool }, /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates /// an invalid literal. RawStr { n_hashes: Option<u8> }, /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None` /// indicates an invalid literal. RawByteStr { n_hashes: Option<u8> }, + /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal. + RawCStr { n_hashes: Option<u8> }, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -357,39 +361,18 @@ impl Cursor<'_> { }, // Byte literal, byte string literal, raw byte string literal or identifier. - 'b' => match (self.first(), self.second()) { - ('\'', _) => { - self.bump(); - let terminated = self.single_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Byte { terminated }; - Literal { kind, suffix_start } - } - ('"', _) => { - self.bump(); - let terminated = self.double_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = ByteStr { terminated }; - Literal { kind, suffix_start } - } - ('r', '"') | ('r', '#') => { - self.bump(); - let res = self.raw_double_quoted_string(2); - let suffix_start = self.pos_within_token(); - if res.is_ok() { - self.eat_literal_suffix(); - } - let kind = RawByteStr { n_hashes: res.ok() }; - Literal { kind, suffix_start } - } - _ => self.ident_or_unknown_prefix(), - }, + 'b' => self.c_or_byte_string( + |terminated| ByteStr { terminated }, + |n_hashes| RawByteStr { n_hashes }, + Some(|terminated| Byte { terminated }), + ), + + // c-string literal, raw c-string literal or identifier. + 'c' => self.c_or_byte_string( + |terminated| CStr { terminated }, + |n_hashes| RawCStr { n_hashes }, + None, + ), // Identifier (this should be checked after other variant that can // start as identifier). @@ -553,6 +536,47 @@ impl Cursor<'_> { } } + fn c_or_byte_string( + &mut self, + mk_kind: impl FnOnce(bool) -> LiteralKind, + mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind, + single_quoted: Option<fn(bool) -> LiteralKind>, + ) -> TokenKind { + match (self.first(), self.second(), single_quoted) { + ('\'', _, Some(mk_kind)) => { + self.bump(); + let terminated = self.single_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = mk_kind(terminated); + Literal { kind, suffix_start } + } + ('"', _, _) => { + self.bump(); + let terminated = self.double_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = mk_kind(terminated); + Literal { kind, suffix_start } + } + ('r', '"', _) | ('r', '#', _) => { + self.bump(); + let res = self.raw_double_quoted_string(2); + let suffix_start = self.pos_within_token(); + if res.is_ok() { + self.eat_literal_suffix(); + } + let kind = mk_kind_raw(res.ok()); + Literal { kind, suffix_start } + } + _ => self.ident_or_unknown_prefix(), + } + } + fn number(&mut self, first_digit: char) -> LiteralKind { debug_assert!('0' <= self.prev() && self.prev() <= '9'); let mut base = Base::Decimal; diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index bb4d91247b8..c9ad54d8d98 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -86,10 +86,45 @@ where let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte); callback(0..(src.len() - chars.as_str().len()), res); } - Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), + Mode::Str | Mode::ByteStr => unescape_str_common(src, mode, callback), + Mode::RawStr | Mode::RawByteStr => { unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) } + Mode::CStr | Mode::RawCStr => unreachable!(), + } +} + +/// A unit within CStr. Must not be a nul character. +pub enum CStrUnit { + Byte(u8), + Char(char), +} + +impl From<u8> for CStrUnit { + fn from(value: u8) -> Self { + CStrUnit::Byte(value) + } +} + +impl From<char> for CStrUnit { + fn from(value: char) -> Self { + CStrUnit::Char(value) + } +} + +pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F) +where + F: FnMut(Range<usize>, Result<CStrUnit, EscapeError>), +{ + if mode == Mode::RawCStr { + unescape_raw_str_or_raw_byte_str( + src, + mode.characters_should_be_ascii(), + &mut |r, result| callback(r, result.map(CStrUnit::Char)), + ); + } else { + unescape_str_common(src, mode, callback); } } @@ -114,34 +149,69 @@ pub enum Mode { ByteStr, RawStr, RawByteStr, + CStr, + RawCStr, } impl Mode { pub fn in_double_quotes(self) -> bool { match self { - Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true, + Mode::Str + | Mode::ByteStr + | Mode::RawStr + | Mode::RawByteStr + | Mode::CStr + | Mode::RawCStr => true, Mode::Char | Mode::Byte => false, } } - pub fn is_byte(self) -> bool { + /// Non-byte literals should have `\xXX` escapes that are within the ASCII range. + pub fn ascii_escapes_should_be_ascii(self) -> bool { + match self { + Mode::Char | Mode::Str | Mode::RawStr => true, + Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => false, + } + } + + /// Whether characters within the literal must be within the ASCII range + pub fn characters_should_be_ascii(self) -> bool { + match self { + Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, + Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false, + } + } + + /// Byte literals do not allow unicode escape. + pub fn is_unicode_escape_disallowed(self) -> bool { match self { Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, - Mode::Char | Mode::Str | Mode::RawStr => false, + Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false, + } + } + + pub fn prefix_noraw(self) -> &'static str { + match self { + Mode::Byte | Mode::ByteStr | Mode::RawByteStr => "b", + Mode::CStr | Mode::RawCStr => "c", + Mode::Char | Mode::Str | Mode::RawStr => "", } } } -fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> { +fn scan_escape<T: From<u8> + From<char>>( + chars: &mut Chars<'_>, + mode: Mode, +) -> Result<T, EscapeError> { // Previous character was '\\', unescape what follows. let res = match chars.next().ok_or(EscapeError::LoneSlash)? { - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '0' => '\0', + '"' => b'"', + 'n' => b'\n', + 'r' => b'\r', + 't' => b'\t', + '\\' => b'\\', + '\'' => b'\'', + '0' => b'\0', 'x' => { // Parse hexadecimal character code. @@ -154,76 +224,78 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError let value = hi * 16 + lo; - // For a non-byte literal verify that it is within ASCII range. - if !is_byte && !is_ascii(value) { + if mode.ascii_escapes_should_be_ascii() && !is_ascii(value) { return Err(EscapeError::OutOfRangeHexEscape); } - let value = value as u8; - value as char + value as u8 } - 'u' => { - // We've parsed '\u', now we have to parse '{..}'. + 'u' => return scan_unicode(chars, mode.is_unicode_escape_disallowed()).map(Into::into), + _ => return Err(EscapeError::InvalidEscape), + }; + Ok(res.into()) +} + +fn scan_unicode( + chars: &mut Chars<'_>, + is_unicode_escape_disallowed: bool, +) -> Result<char, EscapeError> { + // We've parsed '\u', now we have to parse '{..}'. - if chars.next() != Some('{') { - return Err(EscapeError::NoBraceInUnicodeEscape); - } + if chars.next() != Some('{') { + return Err(EscapeError::NoBraceInUnicodeEscape); + } - // First character must be a hexadecimal digit. - let mut n_digits = 1; - let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { - '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), - '}' => return Err(EscapeError::EmptyUnicodeEscape), - c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, - }; - - // First character is valid, now parse the rest of the number - // and closing brace. - loop { - match chars.next() { - None => return Err(EscapeError::UnclosedUnicodeEscape), - Some('_') => continue, - Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - - // Incorrect syntax has higher priority for error reporting - // than unallowed value for a literal. - if is_byte { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or_else(|| { - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - })?; - } - Some(c) => { - let digit: u32 = - c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; - n_digits += 1; - if n_digits > 6 { - // Stop updating value since we're sure that it's incorrect already. - continue; - } - value = value * 16 + digit; + // First character must be a hexadecimal digit. + let mut n_digits = 1; + let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { + '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), + '}' => return Err(EscapeError::EmptyUnicodeEscape), + c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, + }; + + // First character is valid, now parse the rest of the number + // and closing brace. + loop { + match chars.next() { + None => return Err(EscapeError::UnclosedUnicodeEscape), + Some('_') => continue, + Some('}') => { + if n_digits > 6 { + return Err(EscapeError::OverlongUnicodeEscape); + } + + // Incorrect syntax has higher priority for error reporting + // than unallowed value for a literal. + if is_unicode_escape_disallowed { + return Err(EscapeError::UnicodeEscapeInByte); + } + + break std::char::from_u32(value).ok_or_else(|| { + if value > 0x10FFFF { + EscapeError::OutOfRangeUnicodeEscape + } else { + EscapeError::LoneSurrogateUnicodeEscape } - }; + }); } - } - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(res) + Some(c) => { + let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; + n_digits += 1; + if n_digits > 6 { + // Stop updating value since we're sure that it's incorrect already. + continue; + } + value = value * 16 + digit; + } + }; + } } #[inline] -fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> { - if is_byte && !c.is_ascii() { +fn ascii_check(c: char, characters_should_be_ascii: bool) -> Result<char, EscapeError> { + if characters_should_be_ascii && !c.is_ascii() { // Byte literal can't be a non-ascii character. Err(EscapeError::NonAsciiCharInByte) } else { @@ -234,7 +306,7 @@ fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> { fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> { let c = chars.next().ok_or(EscapeError::ZeroChars)?; let res = match c { - '\\' => scan_escape(chars, is_byte), + '\\' => scan_escape(chars, if is_byte { Mode::Byte } else { Mode::Char }), '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), _ => ascii_check(c, is_byte), @@ -247,9 +319,9 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, E /// Takes a contents of a string literal (without quotes) and produces a /// sequence of escaped characters or errors. -fn unescape_str_or_byte_str<F>(src: &str, is_byte: bool, callback: &mut F) +fn unescape_str_common<F, T: From<u8> + From<char>>(src: &str, mode: Mode, callback: &mut F) where - F: FnMut(Range<usize>, Result<char, EscapeError>), + F: FnMut(Range<usize>, Result<T, EscapeError>), { let mut chars = src.chars(); @@ -266,47 +338,49 @@ where // if unescaped '\' character is followed by '\n'. // For details see [Rust language reference] // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars, start, callback); + skip_ascii_whitespace(&mut chars, start, &mut |range, err| { + callback(range, Err(err)) + }); continue; } - _ => scan_escape(&mut chars, is_byte), + _ => scan_escape::<T>(&mut chars, mode), } } - '\n' => Ok('\n'), - '\t' => Ok('\t'), + '\n' => Ok(b'\n'.into()), + '\t' => Ok(b'\t'.into()), '"' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, is_byte), + _ => ascii_check(c, mode.characters_should_be_ascii()).map(Into::into), }; let end = src.len() - chars.as_str().len(); - callback(start..end, res); + callback(start..end, res.map(Into::into)); } +} - fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F) - where - F: FnMut(Range<usize>, Result<char, EscapeError>), - { - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; - callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning)); - } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().nth(0) { - if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning)); - } +fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F) +where + F: FnMut(Range<usize>, EscapeError), +{ + let tail = chars.as_str(); + let first_non_space = tail + .bytes() + .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') + .unwrap_or(tail.len()); + if tail[1..first_non_space].contains('\n') { + // The +1 accounts for the escaping slash. + let end = start + first_non_space + 1; + callback(start..end, EscapeError::MultipleSkippedLinesWarning); + } + let tail = &tail[first_non_space..]; + if let Some(c) = tail.chars().nth(0) { + if c.is_whitespace() { + // For error reporting, we would like the span to contain the character that was not + // skipped. The +1 is necessary to account for the leading \ that started the escape. + let end = start + first_non_space + c.len_utf8() + 1; + callback(start..end, EscapeError::UnskippedWhitespaceWarning); } - *chars = tail.chars(); } + *chars = tail.chars(); } /// Takes a contents of a string literal (without quotes) and produces a diff --git a/compiler/rustc_lint/src/builtin.rs b/compiler/rustc_lint/src/builtin.rs index aeb791901bd..01052698850 100644 --- a/compiler/rustc_lint/src/builtin.rs +++ b/compiler/rustc_lint/src/builtin.rs @@ -63,6 +63,7 @@ use rustc_middle::ty::layout::{LayoutError, LayoutOf}; use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_middle::ty::subst::GenericArgKind; use rustc_middle::ty::{self, Instance, Ty, TyCtxt, VariantDef}; +use rustc_session::config::ExpectedValues; use rustc_session::lint::{BuiltinLintDiagnostics, FutureIncompatibilityReason}; use rustc_span::edition::Edition; use rustc_span::source_map::Spanned; @@ -3306,16 +3307,15 @@ impl EarlyLintPass for UnexpectedCfgs { let cfg = &cx.sess().parse_sess.config; let check_cfg = &cx.sess().parse_sess.check_config; for &(name, value) in cfg { - if let Some(names_valid) = &check_cfg.names_valid && !names_valid.contains(&name){ - cx.emit_lint(UNEXPECTED_CFGS, BuiltinUnexpectedCliConfigName { - name, - }); - } - if let Some(value) = value && let Some(values) = check_cfg.values_valid.get(&name) && !values.contains(&value) { - cx.emit_lint( - UNEXPECTED_CFGS, - BuiltinUnexpectedCliConfigValue { name, value }, - ); + match check_cfg.expecteds.get(&name) { + Some(ExpectedValues::Some(values)) if !values.contains(&value) => { + let value = value.unwrap_or(kw::Empty); + cx.emit_lint(UNEXPECTED_CFGS, BuiltinUnexpectedCliConfigValue { name, value }); + } + None if check_cfg.exhaustive_names => { + cx.emit_lint(UNEXPECTED_CFGS, BuiltinUnexpectedCliConfigName { name }); + } + _ => { /* expected */ } } } } diff --git a/compiler/rustc_lint/src/context.rs b/compiler/rustc_lint/src/context.rs index d4898ffe883..53d7cf74cde 100644 --- a/compiler/rustc_lint/src/context.rs +++ b/compiler/rustc_lint/src/context.rs @@ -36,6 +36,7 @@ use rustc_middle::middle::stability; use rustc_middle::ty::layout::{LayoutError, LayoutOfHelpers, TyAndLayout}; use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_middle::ty::{self, print::Printer, subst::GenericArg, RegisteredTools, Ty, TyCtxt}; +use rustc_session::config::ExpectedValues; use rustc_session::lint::{BuiltinLintDiagnostics, LintExpectationId}; use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId}; use rustc_session::Session; @@ -768,22 +769,52 @@ pub trait LintContext: Sized { db.help(help); db.note("see the asm section of Rust By Example <https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels> for more information"); }, - BuiltinLintDiagnostics::UnexpectedCfg((name, name_span), None) => { - let Some(names_valid) = &sess.parse_sess.check_config.names_valid else { - bug!("it shouldn't be possible to have a diagnostic on a name if name checking is not enabled"); - }; - let possibilities: Vec<Symbol> = names_valid.iter().map(|s| *s).collect(); + BuiltinLintDiagnostics::UnexpectedCfgName((name, name_span), value) => { + let possibilities: Vec<Symbol> = sess.parse_sess.check_config.expecteds.keys().map(|s| *s).collect(); // Suggest the most probable if we found one if let Some(best_match) = find_best_match_for_name(&possibilities, name, None) { - db.span_suggestion(name_span, "did you mean", best_match, Applicability::MaybeIncorrect); + if let Some(ExpectedValues::Some(best_match_values)) = + sess.parse_sess.check_config.expecteds.get(&best_match) { + let mut possibilities = best_match_values.iter() + .flatten() + .map(Symbol::as_str) + .collect::<Vec<_>>(); + possibilities.sort(); + + if let Some((value, value_span)) = value { + if best_match_values.contains(&Some(value)) { + db.span_suggestion(name_span, "there is a config with a similar name and value", best_match, Applicability::MaybeIncorrect); + } else if best_match_values.contains(&None) { + db.span_suggestion(name_span.to(value_span), "there is a config with a similar name and no value", best_match, Applicability::MaybeIncorrect); + } else if let Some(first_value) = possibilities.first() { + db.span_suggestion(name_span.to(value_span), "there is a config with a similar name and different values", format!("{best_match} = \"{first_value}\""), Applicability::MaybeIncorrect); + } else { + db.span_suggestion(name_span.to(value_span), "there is a config with a similar name and different values", best_match, Applicability::MaybeIncorrect); + }; + } else { + db.span_suggestion(name_span, "there is a config with a similar name", best_match, Applicability::MaybeIncorrect); + } + + if !possibilities.is_empty() { + let possibilities = possibilities.join("`, `"); + db.help(format!("expected values for `{best_match}` are: `{possibilities}`")); + } + } else { + db.span_suggestion(name_span, "there is a config with a similar name", best_match, Applicability::MaybeIncorrect); + } } }, - BuiltinLintDiagnostics::UnexpectedCfg((name, name_span), Some((value, value_span))) => { - let Some(values) = &sess.parse_sess.check_config.values_valid.get(&name) else { + BuiltinLintDiagnostics::UnexpectedCfgValue((name, name_span), value) => { + let Some(ExpectedValues::Some(values)) = &sess.parse_sess.check_config.expecteds.get(&name) else { bug!("it shouldn't be possible to have a diagnostic on a value whose name is not in values"); }; - let possibilities: Vec<Symbol> = values.iter().map(|&s| s).collect(); + let mut have_none_possibility = false; + let possibilities: Vec<Symbol> = values.iter() + .inspect(|a| have_none_possibility |= a.is_none()) + .copied() + .flatten() + .collect(); // Show the full list if all possible values for a given name, but don't do it // for names as the possibilities could be very long @@ -792,17 +823,24 @@ pub trait LintContext: Sized { let mut possibilities = possibilities.iter().map(Symbol::as_str).collect::<Vec<_>>(); possibilities.sort(); - let possibilities = possibilities.join(", "); - db.note(format!("expected values for `{name}` are: {possibilities}")); + let possibilities = possibilities.join("`, `"); + let none = if have_none_possibility { "(none), " } else { "" }; + + db.note(format!("expected values for `{name}` are: {none}`{possibilities}`")); } - // Suggest the most probable if we found one - if let Some(best_match) = find_best_match_for_name(&possibilities, value, None) { - db.span_suggestion(value_span, "did you mean", format!("\"{best_match}\""), Applicability::MaybeIncorrect); + if let Some((value, value_span)) = value { + // Suggest the most probable if we found one + if let Some(best_match) = find_best_match_for_name(&possibilities, value, None) { + db.span_suggestion(value_span, "there is a expected value with a similar name", format!("\"{best_match}\""), Applicability::MaybeIncorrect); + + } + } else if let &[first_possibility] = &possibilities[..] { + db.span_suggestion(name_span.shrink_to_hi(), "specify a config value", format!(" = \"{first_possibility}\""), Applicability::MaybeIncorrect); } - } else { + } else if have_none_possibility { db.note(format!("no expected value for `{name}`")); - if name != sym::feature { + if let Some((_value, value_span)) = value { db.span_suggestion(name_span.shrink_to_hi().to(value_span), "remove the value", "", Applicability::MaybeIncorrect); } } diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs index 7ea472ed504..e27e322db88 100644 --- a/compiler/rustc_lint_defs/src/lib.rs +++ b/compiler/rustc_lint_defs/src/lib.rs @@ -496,7 +496,8 @@ pub enum BuiltinLintDiagnostics { BreakWithLabelAndLoop(Span), NamedAsmLabel(String), UnicodeTextFlow(Span, String), - UnexpectedCfg((Symbol, Span), Option<(Symbol, Span)>), + UnexpectedCfgName((Symbol, Span), Option<(Symbol, Span)>), + UnexpectedCfgValue((Symbol, Span), Option<(Symbol, Span)>), DeprecatedWhereclauseLocation(Span, String), SingleUseLifetime { /// Span of the parameter which declares this lifetime. diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index 1acdc95ca8d..e88a3cdf620 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -307,7 +307,7 @@ static size_t getLongestEntryLength(ArrayRef<KV> Table) { return MaxLen; } -extern "C" void LLVMRustPrintTargetCPUs(LLVMTargetMachineRef TM) { +extern "C" void LLVMRustPrintTargetCPUs(LLVMTargetMachineRef TM, const char* TargetCPU) { const TargetMachine *Target = unwrap(TM); const MCSubtargetInfo *MCInfo = Target->getMCSubtargetInfo(); const Triple::ArchType HostArch = Triple(sys::getDefaultTargetTriple()).getArch(); @@ -323,9 +323,18 @@ extern "C" void LLVMRustPrintTargetCPUs(LLVMTargetMachineRef TM) { printf(" %-*s - Select the CPU of the current host (currently %.*s).\n", MaxCPULen, "native", (int)HostCPU.size(), HostCPU.data()); } - for (auto &CPU : CPUTable) - printf(" %-*s\n", MaxCPULen, CPU.Key); - printf("\n"); + for (auto &CPU : CPUTable) { + // Compare cpu against current target to label the default + if (strcmp(CPU.Key, TargetCPU) == 0) { + printf(" %-*s - This is the default target CPU" + " for the current build target (currently %s).", + MaxCPULen, CPU.Key, Target->getTargetTriple().str().c_str()); + } + else { + printf(" %-*s", MaxCPULen, CPU.Key); + } + printf("\n"); + } } extern "C" size_t LLVMRustGetTargetFeaturesCount(LLVMTargetMachineRef TM) { diff --git a/compiler/rustc_mir_build/src/build/expr/as_constant.rs b/compiler/rustc_mir_build/src/build/expr/as_constant.rs index fbcfd433724..59549435233 100644 --- a/compiler/rustc_mir_build/src/build/expr/as_constant.rs +++ b/compiler/rustc_mir_build/src/build/expr/as_constant.rs @@ -146,6 +146,12 @@ pub(crate) fn lit_to_mir_constant<'tcx>( let id = tcx.allocate_bytes(data); ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx)) } + (ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if Some(def.did()) == tcx.lang_items().c_str()) => + { + let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]); + let allocation = tcx.mk_const_alloc(allocation); + ConstValue::Slice { data: allocation, start: 0, end: data.len() } + } (ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => { ConstValue::Scalar(Scalar::from_uint(*n, Size::from_bytes(1))) } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index b1c0dedd3c7..51e90489002 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,3 +1,5 @@ +use std::ops::Range; + use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; use crate::make_unclosed_delims_error; @@ -6,7 +8,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; -use rustc_lexer::unescape::{self, Mode}; +use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::Cursor; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_session::lint::builtin::{ @@ -204,6 +206,9 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::Literal { kind, suffix_start } => { let suffix_start = start + BytePos(suffix_start); let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); + if let token::LitKind::CStr | token::LitKind::CStrRaw(_) = kind { + self.sess.gated_spans.gate(sym::c_str_literals, self.mk_sp(start, self.pos)); + } let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); if string == "_" { @@ -415,6 +420,16 @@ impl<'a> StringReader<'a> { } self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } + rustc_lexer::LiteralKind::CStr { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start + BytePos(1), end), + "unterminated C string", + error_code!(E0767), + ) + } + self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); @@ -433,6 +448,15 @@ impl<'a> StringReader<'a> { self.report_raw_str_error(start, 2); } } + rustc_lexer::LiteralKind::RawCStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + let kind = token::CStrRaw(n_hashes); + self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + } else { + self.report_raw_str_error(start, 2); + } + } rustc_lexer::LiteralKind::Int { base, empty_int } => { if empty_int { let span = self.mk_sp(start, end); @@ -648,7 +672,7 @@ impl<'a> StringReader<'a> { self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } - fn cook_quoted( + fn cook_common( &self, kind: token::LitKind, mode: Mode, @@ -656,12 +680,13 @@ impl<'a> StringReader<'a> { end: BytePos, prefix_len: u32, postfix_len: u32, + unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)), ) -> (token::LitKind, Symbol) { let mut has_fatal_err = false; let content_start = start + BytePos(prefix_len); let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); - unescape::unescape_literal(lit_content, mode, &mut |range, result| { + unescape(lit_content, mode, &mut |range, result| { // Here we only check for errors. The actual unescaping is done later. if let Err(err) = result { let span_with_quotes = self.mk_sp(start, end); @@ -692,6 +717,38 @@ impl<'a> StringReader<'a> { (token::Err, self.symbol_from_to(start, end)) } } + + fn cook_quoted( + &self, + kind: token::LitKind, + mode: Mode, + start: BytePos, + end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) -> (token::LitKind, Symbol) { + self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { + unescape::unescape_literal(src, mode, &mut |span, result| { + callback(span, result.map(drop)) + }) + }) + } + + fn cook_c_string( + &self, + kind: token::LitKind, + mode: Mode, + start: BytePos, + end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) -> (token::LitKind, Symbol) { + self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { + unescape::unescape_c_string(src, mode, &mut |span, result| { + callback(span, result.map(drop)) + }) + }) + } } pub fn nfc_normalize(string: &str) -> Symbol { diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index d8bcf816fb2..eb9625f923a 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -78,8 +78,7 @@ pub(crate) fn emit_unescape_error( } }; let sugg = sugg.unwrap_or_else(|| { - let is_byte = mode.is_byte(); - let prefix = if is_byte { "b" } else { "" }; + let prefix = mode.prefix_noraw(); let mut escaped = String::with_capacity(lit.len()); let mut chrs = lit.chars().peekable(); while let Some(first) = chrs.next() { @@ -97,7 +96,11 @@ pub(crate) fn emit_unescape_error( }; } let sugg = format!("{prefix}\"{escaped}\""); - MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg } + MoreThanOneCharSugg::Quotes { + span: span_with_quotes, + is_byte: mode == Mode::Byte, + sugg, + } }); handler.emit_err(UnescapeError::MoreThanOneChar { span: span_with_quotes, @@ -112,7 +115,7 @@ pub(crate) fn emit_unescape_error( char_span, escaped_sugg: c.escape_default().to_string(), escaped_msg: escaped_char(c), - byte: mode.is_byte(), + byte: mode == Mode::Byte, }); } EscapeError::BareCarriageReturn => { @@ -126,12 +129,15 @@ pub(crate) fn emit_unescape_error( EscapeError::InvalidEscape => { let (c, span) = last_char(); - let label = - if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" }; + let label = if mode == Mode::Byte || mode == Mode::ByteStr { + "unknown byte escape" + } else { + "unknown character escape" + }; let ec = escaped_char(c); let mut diag = handler.struct_span_err(span, format!("{}: `{}`", label, ec)); diag.span_label(span, label); - if c == '{' || c == '}' && !mode.is_byte() { + if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) { diag.help( "if used in a formatting string, curly braces are escaped with `{{` and `}}`", ); @@ -141,7 +147,7 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else { - if !mode.is_byte() { + if mode == Mode::Str || mode == Mode::Char { diag.span_suggestion( span_with_quotes, "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index f58f8919e5c..61396ee0d4a 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1870,6 +1870,7 @@ impl<'a> Parser<'a> { let recovered = self.recover_after_dot(); let token = recovered.as_ref().unwrap_or(&self.token); let span = token.span; + token::Lit::from_token(token).map(|token_lit| { self.bump(); (token_lit, span) diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl index c897275bee2..a8fe560d1a7 100644 --- a/compiler/rustc_session/messages.ftl +++ b/compiler/rustc_session/messages.ftl @@ -101,3 +101,5 @@ session_invalid_int_literal_width = invalid width `{$width}` for integer literal .help = valid widths are 8, 16, 32, 64 and 128 session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg} + +session_nul_in_c_str = null characters in C string literals are not supported diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index cfdba1120ec..18917120256 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -1064,37 +1064,76 @@ pub fn to_crate_config(cfg: FxHashSet<(String, Option<String>)>) -> CrateConfig /// The parsed `--check-cfg` options pub struct CheckCfg<T = String> { - /// The set of all `names()`, if None no name checking is performed - pub names_valid: Option<FxHashSet<T>>, + /// Is well known names activated + pub exhaustive_names: bool, /// Is well known values activated - pub well_known_values: bool, - /// The set of all `values()` - pub values_valid: FxHashMap<T, FxHashSet<T>>, + pub exhaustive_values: bool, + /// All the expected values for a config name + pub expecteds: FxHashMap<T, ExpectedValues<T>>, } impl<T> Default for CheckCfg<T> { fn default() -> Self { CheckCfg { - names_valid: Default::default(), - values_valid: Default::default(), - well_known_values: false, + exhaustive_names: false, + exhaustive_values: false, + expecteds: FxHashMap::default(), } } } impl<T> CheckCfg<T> { - fn map_data<O: Eq + Hash>(&self, f: impl Fn(&T) -> O) -> CheckCfg<O> { + fn map_data<O: Eq + Hash>(self, f: impl Fn(T) -> O) -> CheckCfg<O> { CheckCfg { - names_valid: self - .names_valid - .as_ref() - .map(|names_valid| names_valid.iter().map(|a| f(a)).collect()), - values_valid: self - .values_valid - .iter() - .map(|(a, b)| (f(a), b.iter().map(|b| f(b)).collect())) + exhaustive_names: self.exhaustive_names, + exhaustive_values: self.exhaustive_values, + expecteds: self + .expecteds + .into_iter() + .map(|(name, values)| { + ( + f(name), + match values { + ExpectedValues::Some(values) => ExpectedValues::Some( + values.into_iter().map(|b| b.map(|b| f(b))).collect(), + ), + ExpectedValues::Any => ExpectedValues::Any, + }, + ) + }) .collect(), - well_known_values: self.well_known_values, + } + } +} + +pub enum ExpectedValues<T> { + Some(FxHashSet<Option<T>>), + Any, +} + +impl<T: Eq + Hash> ExpectedValues<T> { + fn insert(&mut self, value: T) -> bool { + match self { + ExpectedValues::Some(expecteds) => expecteds.insert(Some(value)), + ExpectedValues::Any => false, + } + } +} + +impl<T: Eq + Hash> Extend<T> for ExpectedValues<T> { + fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { + match self { + ExpectedValues::Some(expecteds) => expecteds.extend(iter.into_iter().map(Some)), + ExpectedValues::Any => {} + } + } +} + +impl<'a, T: Eq + Hash + Copy + 'a> Extend<&'a T> for ExpectedValues<T> { + fn extend<I: IntoIterator<Item = &'a T>>(&mut self, iter: I) { + match self { + ExpectedValues::Some(expecteds) => expecteds.extend(iter.into_iter().map(|a| Some(*a))), + ExpectedValues::Any => {} } } } @@ -1103,58 +1142,27 @@ impl<T> CheckCfg<T> { /// `rustc_interface::interface::Config` accepts this in the compiler configuration, /// but the symbol interner is not yet set up then, so we must convert it later. pub fn to_crate_check_config(cfg: CheckCfg) -> CrateCheckConfig { - cfg.map_data(|s| Symbol::intern(s)) + cfg.map_data(|s| Symbol::intern(&s)) } impl CrateCheckConfig { - /// Fills a `CrateCheckConfig` with well-known configuration names. - fn fill_well_known_names(&mut self) { - // NOTE: This should be kept in sync with `default_configuration` and - // `fill_well_known_values` - const WELL_KNOWN_NAMES: &[Symbol] = &[ - // rustc - sym::unix, - sym::windows, - sym::target_os, - sym::target_family, - sym::target_arch, - sym::target_endian, - sym::target_pointer_width, - sym::target_env, - sym::target_abi, - sym::target_vendor, - sym::target_thread_local, - sym::target_has_atomic_load_store, - sym::target_has_atomic, - sym::target_has_atomic_equal_alignment, - sym::target_feature, - sym::panic, - sym::sanitize, - sym::debug_assertions, - sym::proc_macro, - sym::test, - sym::feature, - // rustdoc - sym::doc, - sym::doctest, - // miri - sym::miri, - ]; - - // We only insert well-known names if `names()` was activated - if let Some(names_valid) = &mut self.names_valid { - names_valid.extend(WELL_KNOWN_NAMES); - } - } - - /// Fills a `CrateCheckConfig` with well-known configuration values. - fn fill_well_known_values(&mut self, current_target: &Target) { - if !self.well_known_values { + pub fn fill_well_known(&mut self, current_target: &Target) { + if !self.exhaustive_values && !self.exhaustive_names { return; } - // NOTE: This should be kept in sync with `default_configuration` and - // `fill_well_known_names` + let no_values = || { + let mut values = FxHashSet::default(); + values.insert(None); + ExpectedValues::Some(values) + }; + + let empty_values = || { + let values = FxHashSet::default(); + ExpectedValues::Some(values) + }; + + // NOTE: This should be kept in sync with `default_configuration` let panic_values = &PanicStrategy::all(); @@ -1174,6 +1182,9 @@ impl CrateCheckConfig { // Unknown possible values: // - `feature` // - `target_feature` + for name in [sym::feature, sym::target_feature] { + self.expecteds.entry(name).or_insert(ExpectedValues::Any); + } // No-values for name in [ @@ -1187,20 +1198,23 @@ impl CrateCheckConfig { sym::debug_assertions, sym::target_thread_local, ] { - self.values_valid.entry(name).or_default(); + self.expecteds.entry(name).or_insert_with(no_values); } // Pre-defined values - self.values_valid.entry(sym::panic).or_default().extend(panic_values); - self.values_valid.entry(sym::sanitize).or_default().extend(sanitize_values); - self.values_valid.entry(sym::target_has_atomic).or_default().extend(atomic_values); - self.values_valid + self.expecteds.entry(sym::panic).or_insert_with(empty_values).extend(panic_values); + self.expecteds.entry(sym::sanitize).or_insert_with(empty_values).extend(sanitize_values); + self.expecteds + .entry(sym::target_has_atomic) + .or_insert_with(no_values) + .extend(atomic_values); + self.expecteds .entry(sym::target_has_atomic_load_store) - .or_default() + .or_insert_with(no_values) .extend(atomic_values); - self.values_valid + self.expecteds .entry(sym::target_has_atomic_equal_alignment) - .or_default() + .or_insert_with(no_values) .extend(atomic_values); // Target specific values @@ -1218,47 +1232,50 @@ impl CrateCheckConfig { // Initialize (if not already initialized) for &e in VALUES { - self.values_valid.entry(e).or_default(); + let entry = self.expecteds.entry(e); + if !self.exhaustive_values { + entry.or_insert(ExpectedValues::Any); + } else { + entry.or_insert_with(empty_values); + } } - // Get all values map at once otherwise it would be costly. - // (8 values * 220 targets ~= 1760 times, at the time of writing this comment). - let [ - values_target_os, - values_target_family, - values_target_arch, - values_target_endian, - values_target_env, - values_target_abi, - values_target_vendor, - values_target_pointer_width, - ] = self - .values_valid - .get_many_mut(VALUES) - .expect("unable to get all the check-cfg values buckets"); - - for target in TARGETS - .iter() - .map(|target| Target::expect_builtin(&TargetTriple::from_triple(target))) - .chain(iter::once(current_target.clone())) - { - values_target_os.insert(Symbol::intern(&target.options.os)); - values_target_family - .extend(target.options.families.iter().map(|family| Symbol::intern(family))); - values_target_arch.insert(Symbol::intern(&target.arch)); - values_target_endian.insert(Symbol::intern(target.options.endian.as_str())); - values_target_env.insert(Symbol::intern(&target.options.env)); - values_target_abi.insert(Symbol::intern(&target.options.abi)); - values_target_vendor.insert(Symbol::intern(&target.options.vendor)); - values_target_pointer_width.insert(sym::integer(target.pointer_width)); + if self.exhaustive_values { + // Get all values map at once otherwise it would be costly. + // (8 values * 220 targets ~= 1760 times, at the time of writing this comment). + let [ + values_target_os, + values_target_family, + values_target_arch, + values_target_endian, + values_target_env, + values_target_abi, + values_target_vendor, + values_target_pointer_width, + ] = self + .expecteds + .get_many_mut(VALUES) + .expect("unable to get all the check-cfg values buckets"); + + for target in TARGETS + .iter() + .map(|target| Target::expect_builtin(&TargetTriple::from_triple(target))) + .chain(iter::once(current_target.clone())) + { + values_target_os.insert(Symbol::intern(&target.options.os)); + values_target_family.extend( + target.options.families.iter().map(|family| Symbol::intern(family)), + ); + values_target_arch.insert(Symbol::intern(&target.arch)); + values_target_endian.insert(Symbol::intern(target.options.endian.as_str())); + values_target_env.insert(Symbol::intern(&target.options.env)); + values_target_abi.insert(Symbol::intern(&target.options.abi)); + values_target_vendor.insert(Symbol::intern(&target.options.vendor)); + values_target_pointer_width.insert(sym::integer(target.pointer_width)); + } } } } - - pub fn fill_well_known(&mut self, current_target: &Target) { - self.fill_well_known_names(); - self.fill_well_known_values(current_target); - } } pub fn build_configuration(sess: &Session, mut user_cfg: CrateConfig) -> CrateConfig { diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs index 0df62c2064e..546c0fa8e03 100644 --- a/compiler/rustc_session/src/errors.rs +++ b/compiler/rustc_session/src/errors.rs @@ -6,7 +6,7 @@ use rustc_ast::token; use rustc_ast::util::literal::LitError; use rustc_errors::{error_code, DiagnosticMessage, EmissionGuarantee, IntoDiagnostic, MultiSpan}; use rustc_macros::Diagnostic; -use rustc_span::{Span, Symbol}; +use rustc_span::{BytePos, Span, Symbol}; use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple}; #[derive(Diagnostic)] @@ -323,6 +323,13 @@ pub(crate) struct BinaryFloatLiteralNotSupported { pub span: Span, } +#[derive(Diagnostic)] +#[diag(session_nul_in_c_str)] +pub(crate) struct NulInCStr { + #[primary_span] + pub span: Span, +} + pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { @@ -401,6 +408,12 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: }; sess.emit_err(IntLiteralTooLarge { span, limit }); } + LitError::NulInCStr(range) => { + let lo = BytePos(span.lo().0 + range.start as u32 + 2); + let hi = BytePos(span.lo().0 + range.end as u32 + 2); + let span = span.with_lo(lo).with_hi(hi); + sess.emit_err(NulInCStr { span }); + } } } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 1140a922f9f..58015d5d502 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -441,6 +441,7 @@ symbols! { bridge, bswap, c_str, + c_str_literals, c_unwind, c_variadic, c_void, |
