diff options
Diffstat (limited to 'compiler/rustc_span/src')
| -rw-r--r-- | compiler/rustc_span/src/analyze_source_file.rs | 13 | ||||
| -rw-r--r-- | compiler/rustc_span/src/caching_source_map_view.rs | 36 | ||||
| -rw-r--r-- | compiler/rustc_span/src/def_id.rs | 18 | ||||
| -rw-r--r-- | compiler/rustc_span/src/edit_distance.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_span/src/hygiene.rs | 45 | ||||
| -rw-r--r-- | compiler/rustc_span/src/lib.rs | 131 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map.rs | 32 | ||||
| -rw-r--r-- | compiler/rustc_span/src/span_encoding.rs | 5 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 491 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol/tests.rs | 12 |
10 files changed, 548 insertions, 237 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs index 55d899c9ada..c32593a6d95 100644 --- a/compiler/rustc_span/src/analyze_source_file.rs +++ b/compiler/rustc_span/src/analyze_source_file.rs @@ -29,18 +29,7 @@ pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<Multi (lines, multi_byte_chars) } -// cfg(bootstrap) -macro_rules! cfg_select_dispatch { - ($($tokens:tt)*) => { - #[cfg(bootstrap)] - cfg_match! { $($tokens)* } - - #[cfg(not(bootstrap))] - cfg_select! { $($tokens)* } - }; -} - -cfg_select_dispatch! { +cfg_select! { any(target_arch = "x86", target_arch = "x86_64") => { fn analyze_source_file_dispatch( src: &str, diff --git a/compiler/rustc_span/src/caching_source_map_view.rs b/compiler/rustc_span/src/caching_source_map_view.rs index d8a4cc2f2e2..41cfaa3ee34 100644 --- a/compiler/rustc_span/src/caching_source_map_view.rs +++ b/compiler/rustc_span/src/caching_source_map_view.rs @@ -2,7 +2,7 @@ use std::ops::Range; use std::sync::Arc; use crate::source_map::SourceMap; -use crate::{BytePos, Pos, RelativeBytePos, SourceFile, SpanData}; +use crate::{BytePos, Pos, RelativeBytePos, SourceFile, SpanData, StableSourceFileId}; #[derive(Clone)] struct CacheEntry { @@ -114,7 +114,7 @@ impl<'sm> CachingSourceMapView<'sm> { pub fn span_data_to_lines_and_cols( &mut self, span_data: &SpanData, - ) -> Option<(Arc<SourceFile>, usize, BytePos, usize, BytePos)> { + ) -> Option<(StableSourceFileId, usize, BytePos, usize, BytePos)> { self.time_stamp += 1; // Check if lo and hi are in the cached lines. @@ -123,27 +123,25 @@ impl<'sm> CachingSourceMapView<'sm> { if lo_cache_idx != -1 && hi_cache_idx != -1 { // Cache hit for span lo and hi. Check if they belong to the same file. - let result = { - let lo = &self.line_cache[lo_cache_idx as usize]; - let hi = &self.line_cache[hi_cache_idx as usize]; + let lo_file_index = self.line_cache[lo_cache_idx as usize].file_index; + let hi_file_index = self.line_cache[hi_cache_idx as usize].file_index; - if lo.file_index != hi.file_index { - return None; - } - - ( - Arc::clone(&lo.file), - lo.line_number, - span_data.lo - lo.line.start, - hi.line_number, - span_data.hi - hi.line.start, - ) - }; + if lo_file_index != hi_file_index { + return None; + } self.line_cache[lo_cache_idx as usize].touch(self.time_stamp); self.line_cache[hi_cache_idx as usize].touch(self.time_stamp); - return Some(result); + let lo = &self.line_cache[lo_cache_idx as usize]; + let hi = &self.line_cache[hi_cache_idx as usize]; + return Some(( + lo.file.stable_id, + lo.line_number, + span_data.lo - lo.line.start, + hi.line_number, + span_data.hi - hi.line.start, + )); } // No cache hit or cache hit for only one of span lo and hi. @@ -226,7 +224,7 @@ impl<'sm> CachingSourceMapView<'sm> { assert_eq!(lo.file_index, hi.file_index); Some(( - Arc::clone(&lo.file), + lo.file.stable_id, lo.line_number, span_data.lo - lo.line.start, hi.line_number, diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs index 641bac88ad0..77f01548bca 100644 --- a/compiler/rustc_span/src/def_id.rs +++ b/compiler/rustc_span/src/def_id.rs @@ -110,6 +110,7 @@ impl DefPathHash { /// Builds a new [DefPathHash] with the given [StableCrateId] and /// `local_hash`, where `local_hash` must be unique within its crate. + #[inline] pub fn new(stable_crate_id: StableCrateId, local_hash: Hash64) -> DefPathHash { DefPathHash(Fingerprint::new(stable_crate_id.0, local_hash)) } @@ -404,21 +405,21 @@ rustc_data_structures::define_id_collections!( impl<CTX: HashStableContext> HashStable<CTX> for DefId { #[inline] fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { - self.to_stable_hash_key(hcx).hash_stable(hcx, hasher); + hcx.def_path_hash(*self).hash_stable(hcx, hasher); } } impl<CTX: HashStableContext> HashStable<CTX> for LocalDefId { #[inline] fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { - self.to_stable_hash_key(hcx).hash_stable(hcx, hasher); + hcx.def_path_hash(self.to_def_id()).local_hash().hash_stable(hcx, hasher); } } impl<CTX: HashStableContext> HashStable<CTX> for CrateNum { #[inline] fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { - self.to_stable_hash_key(hcx).hash_stable(hcx, hasher); + self.as_def_id().to_stable_hash_key(hcx).stable_crate_id().hash_stable(hcx, hasher); } } @@ -464,30 +465,36 @@ macro_rules! typed_def_id { pub struct $Name(DefId); impl $Name { + #[inline] pub const fn new_unchecked(def_id: DefId) -> Self { Self(def_id) } + #[inline] pub fn to_def_id(self) -> DefId { self.into() } + #[inline] pub fn is_local(self) -> bool { self.0.is_local() } + #[inline] pub fn as_local(self) -> Option<$LocalName> { self.0.as_local().map($LocalName::new_unchecked) } } impl From<$LocalName> for $Name { + #[inline] fn from(local: $LocalName) -> Self { Self(local.0.to_def_id()) } } impl From<$Name> for DefId { + #[inline] fn from(typed: $Name) -> Self { typed.0 } @@ -500,26 +507,31 @@ macro_rules! typed_def_id { impl !PartialOrd for $LocalName {} impl $LocalName { + #[inline] pub const fn new_unchecked(def_id: LocalDefId) -> Self { Self(def_id) } + #[inline] pub fn to_def_id(self) -> DefId { self.0.into() } + #[inline] pub fn to_local_def_id(self) -> LocalDefId { self.0 } } impl From<$LocalName> for LocalDefId { + #[inline] fn from(typed: $LocalName) -> Self { typed.0 } } impl From<$LocalName> for DefId { + #[inline] fn from(typed: $LocalName) -> Self { typed.0.into() } diff --git a/compiler/rustc_span/src/edit_distance.rs b/compiler/rustc_span/src/edit_distance.rs index 4f3202b694c..416e9daa8fb 100644 --- a/compiler/rustc_span/src/edit_distance.rs +++ b/compiler/rustc_span/src/edit_distance.rs @@ -130,7 +130,7 @@ pub fn edit_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<u 1 // Exact substring match, but not a total word match so return non-zero } else if !big_len_diff { // Not a big difference in length, discount cost of length difference - score + (len_diff + 1) / 2 + score + len_diff.div_ceil(2) } else { // A big difference in length, add back the difference in length to the score score + len_diff diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs index b621920d62b..19494ffc37e 100644 --- a/compiler/rustc_span/src/hygiene.rs +++ b/compiler/rustc_span/src/hygiene.rs @@ -322,6 +322,7 @@ impl ExpnId { /// `expn_id.outer_expn_is_descendant_of(ctxt)` is equivalent to but faster than /// `expn_id.is_descendant_of(ctxt.outer_expn())`. + #[inline] pub fn outer_expn_is_descendant_of(self, ctxt: SyntaxContext) -> bool { HygieneData::with(|data| data.is_descendant_of(self, data.outer_expn(ctxt))) } @@ -394,6 +395,7 @@ impl HygieneData { } } + #[inline] fn with<R>(f: impl FnOnce(&mut HygieneData) -> R) -> R { with_session_globals(|session_globals| f(&mut session_globals.hygiene_data.borrow_mut())) } @@ -406,6 +408,7 @@ impl HygieneData { } } + #[inline] fn local_expn_data(&self, expn_id: LocalExpnId) -> &ExpnData { self.local_expn_data[expn_id].as_ref().expect("no expansion data for an expansion ID") } @@ -437,23 +440,28 @@ impl HygieneData { } } + #[inline] fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext { self.syntax_context_data[ctxt.0 as usize].opaque } + #[inline] fn normalize_to_macro_rules(&self, ctxt: SyntaxContext) -> SyntaxContext { self.syntax_context_data[ctxt.0 as usize].opaque_and_semiopaque } + #[inline] fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId { self.syntax_context_data[ctxt.0 as usize].outer_expn } + #[inline] fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) { let data = &self.syntax_context_data[ctxt.0 as usize]; (data.outer_expn, data.outer_transparency) } + #[inline] fn parent_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContext { self.syntax_context_data[ctxt.0 as usize].parent } @@ -718,11 +726,13 @@ impl SyntaxContext { SyntaxContext(raw as u32) } + #[inline] fn from_usize(raw: usize) -> SyntaxContext { SyntaxContext(u32::try_from(raw).unwrap()) } /// Extend a syntax context with a given expansion and transparency. + #[inline] pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> SyntaxContext { HygieneData::with(|data| data.apply_mark(self, expn_id, transparency)) } @@ -743,10 +753,12 @@ impl SyntaxContext { /// of g (call it g1), calling remove_mark will result in the SyntaxContext for the /// invocation of f that created g1. /// Returns the mark that was removed. + #[inline] pub fn remove_mark(&mut self) -> ExpnId { HygieneData::with(|data| data.remove_mark(self).0) } + #[inline] pub fn marks(self) -> Vec<(ExpnId, Transparency)> { HygieneData::with(|data| data.marks(self)) } @@ -756,7 +768,9 @@ impl SyntaxContext { /// /// ```rust /// #![feature(decl_macro)] - /// mod foo { pub fn f() {} } // `f`'s `SyntaxContext` is empty. + /// mod foo { + /// pub fn f() {} // `f`'s `SyntaxContext` is empty. + /// } /// m!(f); /// macro m($f:ident) { /// mod bar { @@ -776,11 +790,13 @@ impl SyntaxContext { /// ``` /// This returns the expansion whose definition scope we use to privacy check the resolution, /// or `None` if we privacy check as usual (i.e., not w.r.t. a macro definition scope). + #[inline] pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { HygieneData::with(|data| data.adjust(self, expn_id)) } /// Like `SyntaxContext::adjust`, but also normalizes `self` to macros 2.0. + #[inline] pub(crate) fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { HygieneData::with(|data| { *self = data.normalize_to_macros_2_0(*self); @@ -901,10 +917,12 @@ impl SyntaxContext { HygieneData::with(|data| data.outer_mark(self)) } + #[inline] pub(crate) fn dollar_crate_name(self) -> Symbol { HygieneData::with(|data| data.syntax_context_data[self.0 as usize].dollar_crate_name) } + #[inline] pub fn edition(self) -> Edition { HygieneData::with(|data| data.expn_data(data.outer_expn(self)).edition) } @@ -1135,7 +1153,7 @@ impl ExpnKind { } /// The kind of macro invocation or definition. -#[derive(Clone, Copy, PartialEq, Eq, Encodable, Decodable, Hash, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Encodable, Decodable, Hash, Debug)] #[derive(HashStable_Generic)] pub enum MacroKind { /// A bang macro `foo!()`. @@ -1191,11 +1209,6 @@ impl AstPass { /// The kind of compiler desugaring. #[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable, HashStable_Generic)] pub enum DesugaringKind { - /// We desugar `if c { i } else { e }` to `match $ExprKind::Use(c) { true => i, _ => e }`. - /// However, we do not want to blame `c` for unreachability but rather say that `i` - /// is unreachable. This desugaring kind allows us to avoid blaming `c`. - /// This also applies to `while` loops. - CondTemporary, QuestionMark, TryBlock, YeetExpr, @@ -1213,13 +1226,23 @@ pub enum DesugaringKind { Contract, /// A pattern type range start/end PatTyRange, + /// A format literal. + FormatLiteral { + /// Was this format literal written in the source? + /// - `format!("boo")` => Yes, + /// - `format!(concat!("b", "o", "o"))` => No, + /// - `format!(include_str!("boo.txt"))` => No, + /// + /// If it wasn't written in the source then we have to be careful with suggestions about + /// rewriting it. + source: bool, + }, } impl DesugaringKind { /// The description wording should combine well with "desugaring of {}". pub fn descr(self) -> &'static str { match self { - DesugaringKind::CondTemporary => "`if` or `while` condition", DesugaringKind::Async => "`async` block or function", DesugaringKind::Await => "`await` expression", DesugaringKind::QuestionMark => "operator `?`", @@ -1231,6 +1254,10 @@ impl DesugaringKind { DesugaringKind::BoundModifier => "trait bound modifier", DesugaringKind::Contract => "contract check", DesugaringKind::PatTyRange => "pattern type", + DesugaringKind::FormatLiteral { source: true } => "format string literal", + DesugaringKind::FormatLiteral { source: false } => { + "expression that expanded into a format string literal" + } } } @@ -1238,7 +1265,6 @@ impl DesugaringKind { /// like `from_desugaring = "QuestionMark"` pub fn matches(&self, value: &str) -> bool { match self { - DesugaringKind::CondTemporary => value == "CondTemporary", DesugaringKind::Async => value == "Async", DesugaringKind::Await => value == "Await", DesugaringKind::QuestionMark => value == "QuestionMark", @@ -1250,6 +1276,7 @@ impl DesugaringKind { DesugaringKind::BoundModifier => value == "BoundModifier", DesugaringKind::Contract => value == "Contract", DesugaringKind::PatTyRange => value == "PatTyRange", + DesugaringKind::FormatLiteral { .. } => value == "FormatLiteral", } } } diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index ed74dea5f1e..ae6755f0764 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -17,17 +17,16 @@ // tidy-alphabetical-start #![allow(internal_features)] -#![cfg_attr(bootstrap, feature(cfg_match))] -#![cfg_attr(not(bootstrap), feature(cfg_select))] +#![cfg_attr(bootstrap, feature(round_char_boundary))] #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] #![doc(rust_logo)] #![feature(array_windows)] +#![feature(cfg_select)] #![feature(core_io_borrowed_buf)] #![feature(if_let_guard)] #![feature(map_try_insert)] #![feature(negative_impls)] #![feature(read_buf)] -#![feature(round_char_boundary)] #![feature(rustc_attrs)] #![feature(rustdoc_internals)] // tidy-alphabetical-end @@ -66,7 +65,10 @@ mod span_encoding; pub use span_encoding::{DUMMY_SP, Span}; pub mod symbol; -pub use symbol::{Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym}; +pub use symbol::{ + ByteSymbol, Ident, MacroRulesNormalizedIdent, Macros20NormalizedIdent, STDLIB_STABLE_CRATES, + Symbol, kw, sym, +}; mod analyze_source_file; pub mod fatal_error; @@ -166,6 +168,7 @@ where } } +#[inline] pub fn with_session_globals<R, F>(f: F) -> R where F: FnOnce(&SessionGlobals) -> R, @@ -417,7 +420,7 @@ impl FileName { } } - pub fn prefer_remapped_unconditionaly(&self) -> FileNameDisplay<'_> { + pub fn prefer_remapped_unconditionally(&self) -> FileNameDisplay<'_> { FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Remapped } } @@ -714,12 +717,17 @@ impl Span { (!ctxt.is_root()).then(|| ctxt.outer_expn_data().call_site) } - /// Walk down the expansion ancestors to find a span that's contained within `outer`. + /// Find the first ancestor span that's contained within `outer`. /// - /// The span returned by this method may have a different [`SyntaxContext`] as `outer`. + /// This method traverses the macro expansion ancestors until it finds the first span + /// that's contained within `outer`. + /// + /// The span returned by this method may have a different [`SyntaxContext`] than `outer`. /// If you need to extend the span, use [`find_ancestor_inside_same_ctxt`] instead, /// because joining spans with different syntax contexts can create unexpected results. /// + /// This is used to find the span of the macro call when a parent expr span, i.e. `outer`, is known. + /// /// [`find_ancestor_inside_same_ctxt`]: Self::find_ancestor_inside_same_ctxt pub fn find_ancestor_inside(mut self, outer: Span) -> Option<Span> { while !outer.contains(self) { @@ -728,8 +736,10 @@ impl Span { Some(self) } - /// Walk down the expansion ancestors to find a span with the same [`SyntaxContext`] as - /// `other`. + /// Find the first ancestor span with the same [`SyntaxContext`] as `other`. + /// + /// This method traverses the macro expansion ancestors until it finds a span + /// that has the same [`SyntaxContext`] as `other`. /// /// Like [`find_ancestor_inside_same_ctxt`], but specifically for when spans might not /// overlap. Take care when using this, and prefer [`find_ancestor_inside`] or @@ -745,9 +755,12 @@ impl Span { Some(self) } - /// Walk down the expansion ancestors to find a span that's contained within `outer` and + /// Find the first ancestor span that's contained within `outer` and /// has the same [`SyntaxContext`] as `outer`. /// + /// This method traverses the macro expansion ancestors until it finds a span + /// that is both contained within `outer` and has the same [`SyntaxContext`] as `outer`. + /// /// This method is the combination of [`find_ancestor_inside`] and /// [`find_ancestor_in_same_ctxt`] and should be preferred when extending the returned span. /// If you do not need to modify the span, use [`find_ancestor_inside`] instead. @@ -761,43 +774,43 @@ impl Span { Some(self) } - /// Recursively walk down the expansion ancestors to find the oldest ancestor span with the same - /// [`SyntaxContext`] the initial span. + /// Find the first ancestor span that does not come from an external macro. /// - /// This method is suitable for peeling through *local* macro expansions to find the "innermost" - /// span that is still local and shares the same [`SyntaxContext`]. For example, given + /// This method traverses the macro expansion ancestors until it finds a span + /// that is either from user-written code or from a local macro (defined in the current crate). /// - /// ```ignore (illustrative example, contains type error) - /// macro_rules! outer { - /// ($x: expr) => { - /// inner!($x) - /// } - /// } + /// External macros are those defined in dependencies or the standard library. + /// This method is useful for reporting errors in user-controllable code and avoiding + /// diagnostics inside external macros. /// - /// macro_rules! inner { - /// ($x: expr) => { - /// format!("error: {}", $x) - /// //~^ ERROR mismatched types - /// } - /// } + /// # See also /// - /// fn bar(x: &str) -> Result<(), Box<dyn std::error::Error>> { - /// Err(outer!(x)) - /// } - /// ``` + /// - [`Self::find_ancestor_not_from_macro`] + /// - [`Self::in_external_macro`] + pub fn find_ancestor_not_from_extern_macro(mut self, sm: &SourceMap) -> Option<Span> { + while self.in_external_macro(sm) { + self = self.parent_callsite()?; + } + Some(self) + } + + /// Find the first ancestor span that does not come from any macro expansion. /// - /// if provided the initial span of `outer!(x)` inside `bar`, this method will recurse - /// the parent callsites until we reach `format!("error: {}", $x)`, at which point it is the - /// oldest ancestor span that is both still local and shares the same [`SyntaxContext`] as the - /// initial span. - pub fn find_oldest_ancestor_in_same_ctxt(self) -> Span { - let mut cur = self; - while cur.eq_ctxt(self) - && let Some(parent_callsite) = cur.parent_callsite() - { - cur = parent_callsite; + /// This method traverses the macro expansion ancestors until it finds a span + /// that originates from user-written code rather than any macro-generated code. + /// + /// This method is useful for reporting errors at the exact location users wrote code + /// and providing suggestions at directly editable locations. + /// + /// # See also + /// + /// - [`Self::find_ancestor_not_from_extern_macro`] + /// - [`Span::from_expansion`] + pub fn find_ancestor_not_from_macro(mut self) -> Option<Span> { + while self.from_expansion() { + self = self.parent_callsite()?; } - cur + Some(self) } /// Edition of the crate from which this span came. @@ -1184,11 +1197,12 @@ rustc_index::newtype_index! { /// It is similar to rustc_type_ir's TyEncoder. pub trait SpanEncoder: Encoder { fn encode_span(&mut self, span: Span); - fn encode_symbol(&mut self, symbol: Symbol); + fn encode_symbol(&mut self, sym: Symbol); + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol); fn encode_expn_id(&mut self, expn_id: ExpnId); fn encode_syntax_context(&mut self, syntax_context: SyntaxContext); - /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx. - /// Therefore, make sure to include the context when encode a `CrateNum`. + /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a + /// tcx. Therefore, make sure to include the context when encode a `CrateNum`. fn encode_crate_num(&mut self, crate_num: CrateNum); fn encode_def_index(&mut self, def_index: DefIndex); fn encode_def_id(&mut self, def_id: DefId); @@ -1201,8 +1215,12 @@ impl SpanEncoder for FileEncoder { span.hi.encode(self); } - fn encode_symbol(&mut self, symbol: Symbol) { - self.emit_str(symbol.as_str()); + fn encode_symbol(&mut self, sym: Symbol) { + self.emit_str(sym.as_str()); + } + + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) { + self.emit_byte_str(byte_sym.as_byte_str()); } fn encode_expn_id(&mut self, _expn_id: ExpnId) { @@ -1239,6 +1257,12 @@ impl<E: SpanEncoder> Encodable<E> for Symbol { } } +impl<E: SpanEncoder> Encodable<E> for ByteSymbol { + fn encode(&self, s: &mut E) { + s.encode_byte_symbol(*self); + } +} + impl<E: SpanEncoder> Encodable<E> for ExpnId { fn encode(&self, s: &mut E) { s.encode_expn_id(*self) @@ -1280,6 +1304,7 @@ impl<E: SpanEncoder> Encodable<E> for AttrId { pub trait SpanDecoder: Decoder { fn decode_span(&mut self) -> Span; fn decode_symbol(&mut self) -> Symbol; + fn decode_byte_symbol(&mut self) -> ByteSymbol; fn decode_expn_id(&mut self) -> ExpnId; fn decode_syntax_context(&mut self) -> SyntaxContext; fn decode_crate_num(&mut self) -> CrateNum; @@ -1300,6 +1325,10 @@ impl SpanDecoder for MemDecoder<'_> { Symbol::intern(self.read_str()) } + fn decode_byte_symbol(&mut self) -> ByteSymbol { + ByteSymbol::intern(self.read_byte_str()) + } + fn decode_expn_id(&mut self) -> ExpnId { panic!("cannot decode `ExpnId` with `MemDecoder`"); } @@ -1337,6 +1366,12 @@ impl<D: SpanDecoder> Decodable<D> for Symbol { } } +impl<D: SpanDecoder> Decodable<D> for ByteSymbol { + fn decode(s: &mut D) -> ByteSymbol { + s.decode_byte_symbol() + } +} + impl<D: SpanDecoder> Decodable<D> for ExpnId { fn decode(s: &mut D) -> ExpnId { s.decode_expn_id() @@ -2600,7 +2635,7 @@ pub trait HashStableContext { fn span_data_to_lines_and_cols( &mut self, span: &SpanData, - ) -> Option<(Arc<SourceFile>, usize, BytePos, usize, BytePos)>; + ) -> Option<(StableSourceFileId, usize, BytePos, usize, BytePos)>; fn hashing_controls(&self) -> HashingControls; } @@ -2657,7 +2692,7 @@ where }; Hash::hash(&TAG_VALID_SPAN, hasher); - Hash::hash(&file.stable_id, hasher); + Hash::hash(&file, hasher); // Hash both the length and the end location (line/column) of a span. If we // hash only the length, for example, then two otherwise equal spans with diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 8a3644163ca..166842e374b 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -9,6 +9,7 @@ //! within the `SourceMap`, which upon request can be converted to line and column //! information, source code snippets, etc. +use std::fs::File; use std::io::{self, BorrowedBuf, Read}; use std::{fs, path}; @@ -115,13 +116,18 @@ impl FileLoader for RealFileLoader { } fn read_file(&self, path: &Path) -> io::Result<String> { - if path.metadata().is_ok_and(|metadata| metadata.len() > SourceFile::MAX_FILE_SIZE.into()) { + let mut file = File::open(path)?; + let size = file.metadata().map(|metadata| metadata.len()).ok().unwrap_or(0); + + if size > SourceFile::MAX_FILE_SIZE.into() { return Err(io::Error::other(format!( "text files larger than {} bytes are unsupported", SourceFile::MAX_FILE_SIZE ))); } - fs::read_to_string(path) + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + Ok(contents) } fn read_binary_file(&self, path: &Path) -> io::Result<Arc<[u8]>> { @@ -826,10 +832,10 @@ impl SourceMap { /// Given a `Span`, tries to get a shorter span ending just after the first occurrence of `char` /// `c`. pub fn span_through_char(&self, sp: Span, c: char) -> Span { - if let Ok(snippet) = self.span_to_snippet(sp) { - if let Some(offset) = snippet.find(c) { - return sp.with_hi(BytePos(sp.lo().0 + (offset + c.len_utf8()) as u32)); - } + if let Ok(snippet) = self.span_to_snippet(sp) + && let Some(offset) = snippet.find(c) + { + return sp.with_hi(BytePos(sp.lo().0 + (offset + c.len_utf8()) as u32)); } sp } @@ -911,12 +917,13 @@ impl SourceMap { /// Returns a new span representing just the last character of this span. pub fn end_point(&self, sp: Span) -> Span { - let pos = sp.hi().0; + let sp = sp.data(); + let pos = sp.hi.0; let width = self.find_width_of_character_at_span(sp, false); let corrected_end_position = pos.checked_sub(width).unwrap_or(pos); - let end_point = BytePos(cmp::max(corrected_end_position, sp.lo().0)); + let end_point = BytePos(cmp::max(corrected_end_position, sp.lo.0)); sp.with_lo(end_point) } @@ -930,8 +937,9 @@ impl SourceMap { if sp.is_dummy() { return sp; } - let start_of_next_point = sp.hi().0; + let sp = sp.data(); + let start_of_next_point = sp.hi.0; let width = self.find_width_of_character_at_span(sp, true); // If the width is 1, then the next span should only contain the next char besides current ending. // However, in the case of a multibyte character, where the width != 1, the next span should @@ -940,7 +948,7 @@ impl SourceMap { start_of_next_point.checked_add(width).unwrap_or(start_of_next_point); let end_of_next_point = BytePos(cmp::max(start_of_next_point + 1, end_of_next_point)); - Span::new(BytePos(start_of_next_point), end_of_next_point, sp.ctxt(), None) + Span::new(BytePos(start_of_next_point), end_of_next_point, sp.ctxt, None) } /// Check whether span is followed by some specified expected string in limit scope @@ -963,9 +971,7 @@ impl SourceMap { /// Finds the width of the character, either before or after the end of provided span, /// depending on the `forwards` parameter. #[instrument(skip(self, sp))] - fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 { - let sp = sp.data(); - + fn find_width_of_character_at_span(&self, sp: SpanData, forwards: bool) -> u32 { if sp.lo == sp.hi && !forwards { debug!("early return empty span"); return 1; diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index a4a47dc99b0..e34efa784de 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -74,9 +74,8 @@ use crate::{BytePos, SPAN_TRACK, SpanData}; /// because `parent` isn't currently used by default. /// /// In order to reliably use parented spans in incremental compilation, -/// the dependency to the parent definition's span. This is performed -/// using the callback `SPAN_TRACK` to access the query engine. -/// +/// accesses to `lo` and `hi` must introduce a dependency to the parent definition's span. +/// This is performed using the callback `SPAN_TRACK` to access the query engine. #[derive(Clone, Copy, Eq, PartialEq, Hash)] #[rustc_pass_by_value] pub struct Span { diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index d66f98871b9..aedeb0e14ed 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -3,6 +3,7 @@ //! type, and vice versa. use std::hash::{Hash, Hasher}; +use std::ops::Deref; use std::{fmt, str}; use rustc_arena::DroplessArena; @@ -20,18 +21,17 @@ mod tests; // The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`. symbols! { - // This list includes things that are definitely keywords (e.g. `if`), - // a few things that are definitely not keywords (e.g. the empty symbol, - // `{{root}}`) and things where there is disagreement between people and/or - // documents (such as the Rust Reference) about whether it is a keyword - // (e.g. `_`). + // This list includes things that are definitely keywords (e.g. `if`), a + // few things that are definitely not keywords (e.g. `{{root}}`) and things + // where there is disagreement between people and/or documents (such as the + // Rust Reference) about whether it is a keyword (e.g. `_`). // // If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` // predicates and `used_keywords`. Also consider adding new keywords to the // `ui/parser/raw/raw-idents.rs` test. Keywords { - // Special reserved identifiers used internally for elided lifetimes, - // unnamed method parameters, crate root module, error recovery etc. + // Special reserved identifiers used internally for unnamed method + // parameters, crate root module, etc. // Matching predicates: `is_special`/`is_reserved` // // tidy-alphabetical-start @@ -150,14 +150,10 @@ symbols! { // As well as the symbols listed, there are symbols for the strings // "0", "1", ..., "9", which are accessible via `sym::integer`. // - // The proc macro will abort if symbols are not in alphabetical order (as - // defined by `impl Ord for str`) or if any symbols are duplicated. Vim - // users can sort the list by selecting it and executing the command - // `:'<,'>!LC_ALL=C sort`. - // // There is currently no checking that all symbols are used; that would be // nice to have. Symbols { + // tidy-alphabetical-start Abi, AcqRel, Acquire, @@ -175,18 +171,18 @@ symbols! { AsyncGenPending, AsyncGenReady, AtomicBool, - AtomicI128, + AtomicI8, AtomicI16, AtomicI32, AtomicI64, - AtomicI8, + AtomicI128, AtomicIsize, AtomicPtr, - AtomicU128, + AtomicU8, AtomicU16, AtomicU32, AtomicU64, - AtomicU8, + AtomicU128, AtomicUsize, BTreeEntry, BTreeMap, @@ -283,6 +279,7 @@ symbols! { IterPeekable, Iterator, IteratorItem, + IteratorMap, Layout, Left, LinkedList, @@ -328,6 +325,7 @@ symbols! { RangeSub, RangeTo, RangeToInclusive, + RangeToInclusiveCopy, Rc, RcWeak, Ready, @@ -394,10 +392,10 @@ symbols! { __D, __H, __S, + __T, __awaitee, __try_var, - _d, - _e, + _t, _task_context, a32, aarch64_target_feature, @@ -407,6 +405,8 @@ symbols! { abi_amdgpu_kernel, abi_avr_interrupt, abi_c_cmse_nonsecure_call, + abi_cmse_nonsecure_call, + abi_custom, abi_efiapi, abi_gpu_kernel, abi_msp430_interrupt, @@ -429,6 +429,8 @@ symbols! { aggregate_raw_ptr, alias, align, + align_of, + align_of_val, alignment, all, alloc, @@ -445,6 +447,7 @@ symbols! { altivec, alu32, always, + analysis, and, and_then, anon, @@ -463,6 +466,7 @@ symbols! { arm, arm_target_feature, array, + as_dash_needed: "as-needed", as_ptr, as_ref, as_str, @@ -537,13 +541,16 @@ symbols! { att_syntax, attr, attr_literals, + attribute, attributes, audit_that, augmented_assignments, auto_traits, + autodiff, autodiff_forward, autodiff_reverse, automatically_derived, + available_externally, avx, avx10_target_feature, avx512_target_feature, @@ -575,6 +582,7 @@ symbols! { box_new, box_patterns, box_syntax, + boxed_slice, bpf_target_feature, braced_empty_structs, branch, @@ -584,7 +592,9 @@ symbols! { btreemap_contains_key, btreemap_insert, btreeset_iter, + built, builtin_syntax, + bundle, c, c_dash_variadic, c_str, @@ -603,10 +613,10 @@ symbols! { catch_unwind, cause, cdylib, - ceilf128, ceilf16, ceilf32, ceilf64, + ceilf128, cfg, cfg_accessible, cfg_attr, @@ -624,6 +634,7 @@ symbols! { cfg_relocation_model, cfg_sanitize, cfg_sanitizer_cfi, + cfg_select, cfg_target_abi, cfg_target_compact, cfg_target_feature, @@ -639,6 +650,7 @@ symbols! { cfi_encoding, char, char_is_ascii, + char_to_digit, child_id, child_kill, client, @@ -672,6 +684,7 @@ symbols! { cold_path, collapse_debuginfo, column, + common, compare_bytes, compare_exchange, compare_exchange_weak, @@ -689,6 +702,7 @@ symbols! { const_closures, const_compare_raw_pointers, const_constructor, + const_continue, const_deallocate, const_destruct, const_eval_limit, @@ -712,6 +726,7 @@ symbols! { const_indexing, const_let, const_loop, + const_make_global, const_mut_refs, const_panic, const_panic_fmt, @@ -737,14 +752,15 @@ symbols! { contracts_ensures, contracts_internals, contracts_requires, + convert, convert_identity, copy, copy_closures, copy_nonoverlapping, - copysignf128, copysignf16, copysignf32, copysignf64, + copysignf128, core, core_panic, core_panic_2015_macro, @@ -757,10 +773,10 @@ symbols! { coroutine_state, coroutine_yield, coroutines, - cosf128, cosf16, cosf32, cosf64, + cosf128, count, coverage, coverage_attribute, @@ -804,6 +820,7 @@ symbols! { decl_macro, declare_lint_pass, decode, + decorated, default_alloc_error_handler, default_field_values, default_fn, @@ -836,12 +853,15 @@ symbols! { derive, derive_coerce_pointee, derive_const, + derive_const_issue: "118304", derive_default_enum, + derive_from, derive_smart_pointer, destruct, destructuring_assignment, diagnostic, diagnostic_namespace, + dialect, direct, discriminant_kind, discriminant_type, @@ -868,8 +888,8 @@ symbols! { dotdot_in_tuple_patterns, dotdoteq_in_patterns, dreg, - dreg_low16, dreg_low8, + dreg_low16, drop, drop_in_place, drop_types_in_const, @@ -885,6 +905,7 @@ symbols! { dynamic_no_pic: "dynamic-no-pic", e, edition_panic, + effective_target_features, effects, eh_catch_typeinfo, eh_personality, @@ -922,16 +943,16 @@ symbols! { exhaustive_integer_patterns, exhaustive_patterns, existential_type, - exp2f128, exp2f16, exp2f32, exp2f64, + exp2f128, expect, expected, - expf128, expf16, expf32, expf64, + expf128, explicit_extern_abis, explicit_generic_args_with_impl_trait, explicit_tail_calls, @@ -949,12 +970,10 @@ symbols! { extern_prelude, extern_system_varargs, extern_types, + extern_weak, external, external_doc, f, - f128, - f128_epsilon, - f128_nan, f16, f16_epsilon, f16_nan, @@ -993,10 +1012,13 @@ symbols! { f64_legacy_const_neg_infinity, f64_legacy_const_radix, f64_nan, - fabsf128, + f128, + f128_epsilon, + f128_nan, fabsf16, fabsf32, fabsf64, + fabsf128, fadd_algebraic, fadd_fast, fake_variadic, @@ -1018,22 +1040,22 @@ symbols! { flags, float, float_to_int_unchecked, - floorf128, floorf16, floorf32, floorf64, - fmaf128, + floorf128, fmaf16, fmaf32, fmaf64, + fmaf128, fmt, fmt_debug, fmul_algebraic, fmul_fast, - fmuladdf128, fmuladdf16, fmuladdf32, fmuladdf64, + fmuladdf128, fn_align, fn_body, fn_delegation, @@ -1044,6 +1066,7 @@ symbols! { fn_ptr_addr, fn_ptr_trait, forbid, + force_target_feature, forget, format, format_args, @@ -1056,6 +1079,7 @@ symbols! { format_macro, format_placeholder, format_unsafe_arg, + framework, freeze, freeze_impls, freg, @@ -1077,7 +1101,6 @@ symbols! { fs_create_dir, fsub_algebraic, fsub_fast, - fsxr, full, fundamental, fused_iterator, @@ -1085,6 +1108,7 @@ symbols! { future_drop_poll, future_output, future_trait, + fxsr, gdb_script_file, ge, gen_blocks, @@ -1134,13 +1158,12 @@ symbols! { html_root_url, hwaddress, i, - i128, - i128_legacy_const_max, - i128_legacy_const_min, - i128_legacy_fn_max_value, - i128_legacy_fn_min_value, - i128_legacy_mod, - i128_type, + i8, + i8_legacy_const_max, + i8_legacy_const_min, + i8_legacy_fn_max_value, + i8_legacy_fn_min_value, + i8_legacy_mod, i16, i16_legacy_const_max, i16_legacy_const_min, @@ -1159,12 +1182,13 @@ symbols! { i64_legacy_fn_max_value, i64_legacy_fn_min_value, i64_legacy_mod, - i8, - i8_legacy_const_max, - i8_legacy_const_min, - i8_legacy_fn_max_value, - i8_legacy_fn_min_value, - i8_legacy_mod, + i128, + i128_legacy_const_max, + i128_legacy_const_min, + i128_legacy_fn_max_value, + i128_legacy_fn_min_value, + i128_legacy_mod, + i128_type, ident, if_let, if_let_guard, @@ -1197,6 +1221,7 @@ symbols! { infer_static_outlives_requirements, inherent_associated_types, inherit, + initial, inlateout, inline, inline_const, @@ -1206,6 +1231,7 @@ symbols! { instruction_set, integer_: "integer", // underscore to avoid clashing with the function `sym::integer` below integral, + internal, internal_features, into_async_iter_into_iter, into_future, @@ -1214,6 +1240,8 @@ symbols! { intrinsics, intrinsics_unaligned_volatile_load, intrinsics_unaligned_volatile_store, + io_error_new, + io_errorkind, io_stderr, io_stdout, irrefutable_let_patterns, @@ -1240,6 +1268,7 @@ symbols! { iterator, iterator_collect_fn, kcfi, + kernel_address, keylocker_x86, keyword, kind, @@ -1251,6 +1280,7 @@ symbols! { lang, lang_items, large_assignments, + last, lateout, lazy_normalization_consts, lazy_type_alias, @@ -1271,6 +1301,7 @@ symbols! { link_arg_attribute, link_args, link_cfg, + link_dash_arg: "link-arg", link_llvm_intrinsics, link_name, link_ordinal, @@ -1278,31 +1309,37 @@ symbols! { linkage, linker, linker_messages, + linkonce, + linkonce_odr, lint_reasons, literal, load, loaded_from_disk, local, local_inner_macros, - log10f128, - log10f16, - log10f32, - log10f64, - log2f128, log2f16, log2f32, log2f64, + log2f128, + log10f16, + log10f32, + log10f64, + log10f128, log_syntax, - logf128, logf16, logf32, logf64, + logf128, loongarch_target_feature, loop_break_value, + loop_match, lt, m68k_target_feature, macro_at_most_once_rep, + macro_attr, macro_attributes_in_derive_output, + macro_concat, + macro_derive, macro_escape, macro_export, macro_lifetime_matcher, @@ -1324,19 +1361,20 @@ symbols! { match_beginning_vert, match_default_bindings, matches_macro, - maximumf128, maximumf16, maximumf32, maximumf64, - maxnumf128, + maximumf128, maxnumf16, maxnumf32, maxnumf64, + maxnumf128, may_dangle, may_unwind, maybe_uninit, maybe_uninit_uninit, maybe_uninit_zeroed, + mem_align_of, mem_discriminant, mem_drop, mem_forget, @@ -1352,9 +1390,8 @@ symbols! { memtag, message, meta, + meta_sized, metadata_type, - min_align_of, - min_align_of_val, min_const_fn, min_const_generics, min_const_unsafe_fn, @@ -1362,14 +1399,14 @@ symbols! { min_generic_const_args, min_specialization, min_type_alias_impl_trait, - minimumf128, minimumf16, minimumf32, minimumf64, - minnumf128, + minimumf128, minnumf16, minnumf32, minnumf64, + minnumf128, mips_target_feature, mir_assume, mir_basic_block, @@ -1498,11 +1535,13 @@ symbols! { noop_method_borrow, noop_method_clone, noop_method_deref, + noprefix, noreturn, nostack, not, notable_trait, note, + nvptx_target_feature, object_safe_for_dispatch, of, off, @@ -1512,6 +1551,7 @@ symbols! { offset_of_nested, offset_of_slice, ok_or_else, + old_name, omit_gdb_pretty_printer_section, on, on_unimplemented, @@ -1522,6 +1562,7 @@ symbols! { opt_out_copy, optimize, optimize_attribute, + optimized, optin_builtin_traits, option, option_env, @@ -1569,12 +1610,14 @@ symbols! { panic_const_shl_overflow, panic_const_shr_overflow, panic_const_sub_overflow, + panic_display, panic_fmt, panic_handler, panic_impl, panic_implementation, panic_in_cleanup, panic_info, + panic_invalid_enum_construction, panic_location, panic_misaligned_pointer_dereference, panic_nounwind, @@ -1602,32 +1645,35 @@ symbols! { pattern_types, permissions_from_mode, phantom_data, + phase, pic, pie, pin, pin_ergonomics, + pin_macro, platform_intrinsics, plugin, plugin_registrar, plugins, pointee, + pointee_sized, pointee_trait, pointer, - pointer_like, poll, poll_next, position, + post_cleanup: "post-cleanup", post_dash_lto: "post-lto", postfix_match, powerpc_target_feature, - powf128, powf16, powf32, powf64, - powif128, + powf128, powif16, powif32, powif64, + powif128, pre_dash_lto: "pre-lto", precise_capturing, precise_capturing_in_traits, @@ -1686,7 +1732,6 @@ symbols! { ptr_slice_from_raw_parts_mut, ptr_swap, ptr_swap_nonoverlapping, - ptr_unique, ptr_write, ptr_write_bytes, ptr_write_unaligned, @@ -1703,6 +1748,8 @@ symbols! { question_mark, quote, range_inclusive_new, + range_step, + raw_dash_dylib: "raw-dylib", raw_dylib, raw_dylib_elf, raw_eq, @@ -1718,6 +1765,7 @@ symbols! { readonly, realloc, reason, + reborrow, receiver, receiver_target, recursion_limit, @@ -1764,23 +1812,23 @@ symbols! { resume, return_position_impl_trait_in_trait, return_type_notation, - rhs, riscv_target_feature, rlib, ropi, ropi_rwpi: "ropi-rwpi", rotate_left, rotate_right, - round_ties_even_f128, round_ties_even_f16, round_ties_even_f32, round_ties_even_f64, - roundf128, + round_ties_even_f128, roundf16, roundf32, roundf64, + roundf128, rt, rtm_target_feature, + runtime, rust, rust_2015, rust_2018, @@ -1797,8 +1845,12 @@ symbols! { rust_out, rustc, rustc_abi, + // FIXME(#82232, #143834): temporary name to mitigate `#[align]` nameres ambiguity + rustc_align, + rustc_align_static, rustc_allocator, rustc_allocator_zeroed, + rustc_allocator_zeroed_variant, rustc_allow_const_fn_unstable, rustc_allow_incoherent_impl, rustc_allowed_through_unstable_modules, @@ -1811,7 +1863,6 @@ symbols! { rustc_coherence_is_core, rustc_coinductive, rustc_confusables, - rustc_const_panic_str, rustc_const_stable, rustc_const_stable_indirect, rustc_const_unstable, @@ -1862,6 +1913,7 @@ symbols! { rustc_never_returns_null_ptr, rustc_never_type_options, rustc_no_implicit_autorefs, + rustc_no_implicit_bounds, rustc_no_mir_inline, rustc_nonnull_optimization_guaranteed, rustc_nounwind, @@ -1957,14 +2009,16 @@ symbols! { simd_fexp2, simd_ffi, simd_flog, - simd_flog10, simd_flog2, + simd_flog10, simd_floor, simd_fma, simd_fmax, simd_fmin, simd_fsin, simd_fsqrt, + simd_funnel_shl, + simd_funnel_shr, simd_gather, simd_ge, simd_gt, @@ -1992,6 +2046,7 @@ symbols! { simd_relaxed_fma, simd_rem, simd_round, + simd_round_ties_even, simd_saturating_add, simd_saturating_sub, simd_scatter, @@ -2006,18 +2061,20 @@ symbols! { simd_with_exposed_provenance, simd_xor, since, - sinf128, sinf16, sinf32, sinf64, + sinf128, size, size_of, size_of_val, sized, + sized_hierarchy, skip, slice, slice_from_raw_parts, slice_from_raw_parts_mut, + slice_from_ref, slice_get_unchecked, slice_into_vec, slice_iter, @@ -2029,10 +2086,10 @@ symbols! { specialization, speed, spotlight, - sqrtf128, sqrtf16, sqrtf32, sqrtf64, + sqrtf128, sreg, sreg_low16, sse, @@ -2042,11 +2099,13 @@ symbols! { staged_api, start, state, + static_align, static_in_const, static_nobundle, static_recursion, staticlib, std, + std_lib_injection, std_panic, std_panic_2015_macro, std_panic_macro, @@ -2106,13 +2165,14 @@ symbols! { target_family, target_feature, target_feature_11, + target_feature_inline_always, target_has_atomic, target_has_atomic_equal_alignment, target_has_atomic_load_store, - target_has_reliable_f128, - target_has_reliable_f128_math, target_has_reliable_f16, target_has_reliable_f16_math, + target_has_reliable_f128, + target_has_reliable_f128_math, target_os, target_pointer_width, target_thread_local, @@ -2155,10 +2215,10 @@ symbols! { transparent_enums, transparent_unions, trivial_bounds, - truncf128, truncf16, truncf32, truncf64, + truncf128, try_blocks, try_capture, try_from, @@ -2178,6 +2238,8 @@ symbols! { type_changing_struct_update, type_const, type_id, + type_id_eq, + type_ir, type_ir_infer_ctxt_like, type_ir_inherent, type_ir_interner, @@ -2186,12 +2248,12 @@ symbols! { type_name, type_privacy_lints, typed_swap_nonoverlapping, - u128, - u128_legacy_const_max, - u128_legacy_const_min, - u128_legacy_fn_max_value, - u128_legacy_fn_min_value, - u128_legacy_mod, + u8, + u8_legacy_const_max, + u8_legacy_const_min, + u8_legacy_fn_max_value, + u8_legacy_fn_min_value, + u8_legacy_mod, u16, u16_legacy_const_max, u16_legacy_const_min, @@ -2210,23 +2272,26 @@ symbols! { u64_legacy_fn_max_value, u64_legacy_fn_min_value, u64_legacy_mod, - u8, - u8_legacy_const_max, - u8_legacy_const_min, - u8_legacy_fn_max_value, - u8_legacy_fn_min_value, - u8_legacy_mod, + u128, + u128_legacy_const_max, + u128_legacy_const_min, + u128_legacy_fn_max_value, + u128_legacy_fn_min_value, + u128_legacy_mod, ub_checks, unaligned_volatile_load, unaligned_volatile_store, unboxed_closures, unchecked_add, unchecked_div, + unchecked_funnel_shl, + unchecked_funnel_shr, unchecked_mul, unchecked_rem, unchecked_shl, unchecked_shr, unchecked_sub, + undecorated, underscore_const_names, underscore_imports, underscore_lifetimes, @@ -2265,6 +2330,7 @@ symbols! { unsized_locals, unsized_tuple_coercion, unstable, + unstable_feature_bound, unstable_location_reason_default: "this crate is being loaded from the sysroot, an \ unstable location; did you mean to load this crate \ from crates.io via `Cargo.toml` instead?", @@ -2288,6 +2354,7 @@ symbols! { usize_legacy_fn_max_value, usize_legacy_fn_min_value, usize_legacy_mod, + v1, v8plus, va_arg, va_copy, @@ -2296,6 +2363,7 @@ symbols! { va_start, val, validity, + value, values, var, variant_count, @@ -2312,6 +2380,7 @@ symbols! { vecdeque_iter, vecdeque_reserve, vector, + verbatim, version, vfp2, vis, @@ -2332,8 +2401,11 @@ symbols! { wasm_abi, wasm_import_module, wasm_target_feature, + weak, + weak_odr, where_clause_attrs, while_let, + whole_dash_archive: "whole-archive", width, windows, windows_subsystem, @@ -2364,9 +2436,12 @@ symbols! { yield_expr, ymm_reg, yreg, + zca, zfh, zfhmin, zmm_reg, + ztso, + // tidy-alphabetical-end } } @@ -2377,7 +2452,7 @@ pub const STDLIB_STABLE_CRATES: &[Symbol] = &[sym::std, sym::core, sym::alloc, s #[derive(Copy, Clone, Eq, HashStable_Generic, Encodable, Decodable)] pub struct Ident { // `name` should never be the empty symbol. If you are considering that, - // you are probably conflating "empty identifer with "no identifier" and + // you are probably conflating "empty identifier with "no identifier" and // you should use `Option<Ident>` instead. pub name: Symbol, pub span: Span, @@ -2480,10 +2555,16 @@ impl fmt::Debug for Ident { /// except that AST identifiers don't keep the rawness flag, so we have to guess it. impl fmt::Display for Ident { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&IdentPrinter::new(self.name, self.is_raw_guess(), None), f) + fmt::Display::fmt(&IdentPrinter::new(self.name, self.guess_print_mode(), None), f) } } +pub enum IdentPrintMode { + Normal, + RawIdent, + RawLifetime, +} + /// The most general type to print identifiers. /// /// AST pretty-printer is used as a fallback for turning AST structures into token streams for @@ -2499,7 +2580,7 @@ impl fmt::Display for Ident { /// done for a token stream or a single token. pub struct IdentPrinter { symbol: Symbol, - is_raw: bool, + mode: IdentPrintMode, /// Span used for retrieving the crate name to which `$crate` refers to, /// if this field is `None` then the `$crate` conversion doesn't happen. convert_dollar_crate: Option<Span>, @@ -2507,46 +2588,66 @@ pub struct IdentPrinter { impl IdentPrinter { /// The most general `IdentPrinter` constructor. Do not use this. - pub fn new(symbol: Symbol, is_raw: bool, convert_dollar_crate: Option<Span>) -> IdentPrinter { - IdentPrinter { symbol, is_raw, convert_dollar_crate } + pub fn new( + symbol: Symbol, + mode: IdentPrintMode, + convert_dollar_crate: Option<Span>, + ) -> IdentPrinter { + IdentPrinter { symbol, mode, convert_dollar_crate } } /// This implementation is supposed to be used when printing identifiers /// as a part of pretty-printing for larger AST pieces. /// Do not use this either. - pub fn for_ast_ident(ident: Ident, is_raw: bool) -> IdentPrinter { - IdentPrinter::new(ident.name, is_raw, Some(ident.span)) + pub fn for_ast_ident(ident: Ident, mode: IdentPrintMode) -> IdentPrinter { + IdentPrinter::new(ident.name, mode, Some(ident.span)) } } impl fmt::Display for IdentPrinter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_raw { - f.write_str("r#")?; - } else if self.symbol == kw::DollarCrate { - if let Some(span) = self.convert_dollar_crate { + let s = match self.mode { + IdentPrintMode::Normal + if self.symbol == kw::DollarCrate + && let Some(span) = self.convert_dollar_crate => + { let converted = span.ctxt().dollar_crate_name(); if !converted.is_path_segment_keyword() { f.write_str("::")?; } - return fmt::Display::fmt(&converted, f); + converted } - } - fmt::Display::fmt(&self.symbol, f) + IdentPrintMode::Normal => self.symbol, + IdentPrintMode::RawIdent => { + f.write_str("r#")?; + self.symbol + } + IdentPrintMode::RawLifetime => { + f.write_str("'r#")?; + let s = self + .symbol + .as_str() + .strip_prefix("'") + .expect("only lifetime idents should be passed with RawLifetime mode"); + Symbol::intern(s) + } + }; + s.fmt(f) } } /// An newtype around `Ident` that calls [Ident::normalize_to_macro_rules] on -/// construction. -// FIXME(matthewj, petrochenkov) Use this more often, add a similar -// `ModernIdent` struct and use that as well. +/// construction for "local variable hygiene" comparisons. +/// +/// Use this type when you need to compare identifiers according to macro_rules hygiene. +/// This ensures compile-time safety and avoids manual normalization calls. #[derive(Copy, Clone, Eq, PartialEq, Hash)] pub struct MacroRulesNormalizedIdent(Ident); impl MacroRulesNormalizedIdent { #[inline] pub fn new(ident: Ident) -> Self { - Self(ident.normalize_to_macro_rules()) + MacroRulesNormalizedIdent(ident.normalize_to_macro_rules()) } } @@ -2562,7 +2663,49 @@ impl fmt::Display for MacroRulesNormalizedIdent { } } -/// An interned string. +/// An newtype around `Ident` that calls [Ident::normalize_to_macros_2_0] on +/// construction for "item hygiene" comparisons. +/// +/// Identifiers with same string value become same if they came from the same macro 2.0 macro +/// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from +/// different macro 2.0 macros. +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct Macros20NormalizedIdent(pub Ident); + +impl Macros20NormalizedIdent { + #[inline] + pub fn new(ident: Ident) -> Self { + Macros20NormalizedIdent(ident.normalize_to_macros_2_0()) + } + + // dummy_span does not need to be normalized, so we can use `Ident` directly + pub fn with_dummy_span(name: Symbol) -> Self { + Macros20NormalizedIdent(Ident::with_dummy_span(name)) + } +} + +impl fmt::Debug for Macros20NormalizedIdent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.0, f) + } +} + +impl fmt::Display for Macros20NormalizedIdent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} + +/// By impl Deref, we can access the wrapped Ident as if it were a normal Ident +/// such as `norm_ident.name` instead of `norm_ident.0.name`. +impl Deref for Macros20NormalizedIdent { + type Target = Ident; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// An interned UTF-8 string. /// /// Internally, a `Symbol` is implemented as an index, and all operations /// (including hashing, equality, and ordering) operate on that index. The use @@ -2574,20 +2717,23 @@ impl fmt::Display for MacroRulesNormalizedIdent { #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Symbol(SymbolIndex); +// Used within both `Symbol` and `ByteSymbol`. rustc_index::newtype_index! { #[orderable] struct SymbolIndex {} } impl Symbol { + /// Avoid this except for things like deserialization of previously + /// serialized symbols, and testing. Use `intern` instead. pub const fn new(n: u32) -> Self { Symbol(SymbolIndex::from_u32(n)) } /// Maps a string to its interned representation. #[rustc_diagnostic_item = "SymbolIntern"] - pub fn intern(string: &str) -> Self { - with_session_globals(|session_globals| session_globals.symbol_interner.intern(string)) + pub fn intern(str: &str) -> Self { + with_session_globals(|session_globals| session_globals.symbol_interner.intern_str(str)) } /// Access the underlying string. This is a slowish operation because it @@ -2600,7 +2746,7 @@ impl Symbol { /// it works out ok. pub fn as_str(&self) -> &str { with_session_globals(|session_globals| unsafe { - std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self)) + std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get_str(*self)) }) } @@ -2657,56 +2803,130 @@ impl StableCompare for Symbol { } } +/// Like `Symbol`, but for byte strings. `ByteSymbol` is used less widely, so +/// it has fewer operations defined than `Symbol`. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ByteSymbol(SymbolIndex); + +impl ByteSymbol { + /// Avoid this except for things like deserialization of previously + /// serialized symbols, and testing. Use `intern` instead. + pub const fn new(n: u32) -> Self { + ByteSymbol(SymbolIndex::from_u32(n)) + } + + /// Maps a string to its interned representation. + pub fn intern(byte_str: &[u8]) -> Self { + with_session_globals(|session_globals| { + session_globals.symbol_interner.intern_byte_str(byte_str) + }) + } + + /// Like `Symbol::as_str`. + pub fn as_byte_str(&self) -> &[u8] { + with_session_globals(|session_globals| unsafe { + std::mem::transmute::<&[u8], &[u8]>(session_globals.symbol_interner.get_byte_str(*self)) + }) + } + + pub fn as_u32(self) -> u32 { + self.0.as_u32() + } +} + +impl fmt::Debug for ByteSymbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_byte_str(), f) + } +} + +impl<CTX> HashStable<CTX> for ByteSymbol { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.as_byte_str().hash_stable(hcx, hasher); + } +} + +// Interner used for both `Symbol`s and `ByteSymbol`s. If a string and a byte +// string with identical contents (e.g. "foo" and b"foo") are both interned, +// only one copy will be stored and the resulting `Symbol` and `ByteSymbol` +// will have the same index. pub(crate) struct Interner(Lock<InternerInner>); -// The `&'static str`s in this type actually point into the arena. +// The `&'static [u8]`s in this type actually point into the arena. // // This type is private to prevent accidentally constructing more than one // `Interner` on the same thread, which makes it easy to mix up `Symbol`s // between `Interner`s. struct InternerInner { arena: DroplessArena, - strings: FxIndexSet<&'static str>, + byte_strs: FxIndexSet<&'static [u8]>, } impl Interner { + // These arguments are `&str`, but because of the sharing, we are + // effectively pre-interning all these strings for both `Symbol` and + // `ByteSymbol`. fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self { - let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied())); + let byte_strs = FxIndexSet::from_iter( + init.iter().copied().chain(extra.iter().copied()).map(|str| str.as_bytes()), + ); assert_eq!( - strings.len(), + byte_strs.len(), init.len() + extra.len(), - "`init` or `extra` contain duplicate symbols", + "duplicate symbols in the rustc symbol list and the extra symbols added by the driver", ); - Interner(Lock::new(InternerInner { arena: Default::default(), strings })) + Interner(Lock::new(InternerInner { arena: Default::default(), byte_strs })) + } + + fn intern_str(&self, str: &str) -> Symbol { + Symbol::new(self.intern_inner(str.as_bytes())) + } + + fn intern_byte_str(&self, byte_str: &[u8]) -> ByteSymbol { + ByteSymbol::new(self.intern_inner(byte_str)) } #[inline] - fn intern(&self, string: &str) -> Symbol { + fn intern_inner(&self, byte_str: &[u8]) -> u32 { let mut inner = self.0.lock(); - if let Some(idx) = inner.strings.get_index_of(string) { - return Symbol::new(idx as u32); + if let Some(idx) = inner.byte_strs.get_index_of(byte_str) { + return idx as u32; } - let string: &str = inner.arena.alloc_str(string); + let byte_str: &[u8] = inner.arena.alloc_slice(byte_str); // SAFETY: we can extend the arena allocation to `'static` because we // only access these while the arena is still alive. - let string: &'static str = unsafe { &*(string as *const str) }; + let byte_str: &'static [u8] = unsafe { &*(byte_str as *const [u8]) }; // This second hash table lookup can be avoided by using `RawEntryMut`, // but this code path isn't hot enough for it to be worth it. See // #91445 for details. - let (idx, is_new) = inner.strings.insert_full(string); + let (idx, is_new) = inner.byte_strs.insert_full(byte_str); debug_assert!(is_new); // due to the get_index_of check above - Symbol::new(idx as u32) + idx as u32 } /// Get the symbol as a string. /// /// [`Symbol::as_str()`] should be used in preference to this function. - fn get(&self, symbol: Symbol) -> &str { - self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap() + fn get_str(&self, symbol: Symbol) -> &str { + let byte_str = self.get_inner(symbol.0.as_usize()); + // SAFETY: known to be a UTF8 string because it's a `Symbol`. + unsafe { str::from_utf8_unchecked(byte_str) } + } + + /// Get the symbol as a string. + /// + /// [`ByteSymbol::as_byte_str()`] should be used in preference to this function. + fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] { + self.get_inner(symbol.0.as_usize()) + } + + fn get_inner(&self, index: usize) -> &[u8] { + self.0.lock().byte_strs.get_index(index).unwrap() } } @@ -2801,9 +3021,11 @@ impl Symbol { self != sym::empty && self != kw::Underscore && !self.is_path_segment_keyword() } - /// Was this symbol predefined in the compiler's `symbols!` macro - pub fn is_predefined(self) -> bool { - self.as_u32() < PREDEFINED_SYMBOLS_COUNT + /// Was this symbol index predefined in the compiler's `symbols!` macro? + /// Note: this applies to both `Symbol`s and `ByteSymbol`s, which is why it + /// takes a `u32` argument instead of a `&self` argument. Use with care. + pub fn is_predefined(index: u32) -> bool { + index < PREDEFINED_SYMBOLS_COUNT } } @@ -2845,6 +3067,29 @@ impl Ident { self.name.can_be_raw() && self.is_reserved() } + /// Given the name of a lifetime without the first quote (`'`), + /// returns whether the lifetime name is reserved (therefore invalid) + pub fn is_reserved_lifetime(self) -> bool { + self.is_reserved() && ![kw::Underscore, kw::Static].contains(&self.name) + } + + pub fn is_raw_lifetime_guess(self) -> bool { + let name_without_apostrophe = self.without_first_quote(); + name_without_apostrophe.name != self.name + && name_without_apostrophe.name.can_be_raw() + && name_without_apostrophe.is_reserved_lifetime() + } + + pub fn guess_print_mode(self) -> IdentPrintMode { + if self.is_raw_lifetime_guess() { + IdentPrintMode::RawLifetime + } else if self.is_raw_guess() { + IdentPrintMode::RawIdent + } else { + IdentPrintMode::Normal + } + } + /// Whether this would be the identifier for a tuple field like `self.0`, as /// opposed to a named field like `self.thing`. pub fn is_numeric(self) -> bool { diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs index 660d0d7179a..bf0660aa510 100644 --- a/compiler/rustc_span/src/symbol/tests.rs +++ b/compiler/rustc_span/src/symbol/tests.rs @@ -5,14 +5,14 @@ use crate::create_default_session_globals_then; fn interner_tests() { let i = Interner::prefill(&[], &[]); // first one is zero: - assert_eq!(i.intern("dog"), Symbol::new(0)); - // re-use gets the same entry: - assert_eq!(i.intern("dog"), Symbol::new(0)); + assert_eq!(i.intern_str("dog"), Symbol::new(0)); + // re-use gets the same entry, even with a `ByteSymbol` + assert_eq!(i.intern_byte_str(b"dog"), ByteSymbol::new(0)); // different string gets a different #: - assert_eq!(i.intern("cat"), Symbol::new(1)); - assert_eq!(i.intern("cat"), Symbol::new(1)); + assert_eq!(i.intern_byte_str(b"cat"), ByteSymbol::new(1)); + assert_eq!(i.intern_str("cat"), Symbol::new(1)); // dog is still at zero - assert_eq!(i.intern("dog"), Symbol::new(0)); + assert_eq!(i.intern_str("dog"), Symbol::new(0)); } #[test] |
