diff options
Diffstat (limited to 'compiler/rustc_span/src')
| -rw-r--r-- | compiler/rustc_span/src/def_id.rs | 106 | ||||
| -rw-r--r-- | compiler/rustc_span/src/hygiene.rs | 88 | ||||
| -rw-r--r-- | compiler/rustc_span/src/lev_distance.rs | 72 | ||||
| -rw-r--r-- | compiler/rustc_span/src/lev_distance/tests.rs | 22 | ||||
| -rw-r--r-- | compiler/rustc_span/src/lib.rs | 147 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map.rs | 19 | ||||
| -rw-r--r-- | compiler/rustc_span/src/span_encoding.rs | 9 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 206 |
8 files changed, 422 insertions, 247 deletions
diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs index 64baf94cc00..147c1f9e043 100644 --- a/compiler/rustc_span/src/def_id.rs +++ b/compiler/rustc_span/src/def_id.rs @@ -7,6 +7,7 @@ use rustc_macros::HashStable_Generic; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use std::borrow::Borrow; use std::fmt; +use std::hash::{Hash, Hasher}; rustc_index::newtype_index! { pub struct CrateNum { @@ -46,8 +47,8 @@ impl<E: Encoder> Encodable<E> for CrateNum { } impl<D: Decoder> Decodable<D> for CrateNum { - default fn decode(d: &mut D) -> Result<CrateNum, D::Error> { - Ok(CrateNum::from_u32(d.read_u32()?)) + default fn decode(d: &mut D) -> CrateNum { + CrateNum::from_u32(d.read_u32()) } } @@ -126,14 +127,17 @@ impl Borrow<Fingerprint> for DefPathHash { } } -/// A [StableCrateId] is a 64 bit hash of the crate name combined with all -/// `-Cmetadata` arguments. It is to [CrateNum] what [DefPathHash] is to -/// [DefId]. It is stable across compilation sessions. +/// A [`StableCrateId`] is a 64-bit hash of a crate name, together with all +/// `-Cmetadata` arguments, and some other data. It is to [`CrateNum`] what [`DefPathHash`] is to +/// [`DefId`]. It is stable across compilation sessions. /// -/// Since the ID is a hash value there is a (very small) chance that two crates -/// end up with the same [StableCrateId]. The compiler will check for such +/// Since the ID is a hash value, there is a small chance that two crates +/// end up with the same [`StableCrateId`]. The compiler will check for such /// collisions when loading crates and abort compilation in order to avoid /// further trouble. +/// +/// For more information on the possibility of hash collisions in rustc, +/// see the discussion in [`DefId`]. #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] #[derive(HashStable_Generic, Encodable, Decodable)] pub struct StableCrateId(pub(crate) u64); @@ -146,13 +150,10 @@ impl StableCrateId { /// Computes the stable ID for a crate with the given name and /// `-Cmetadata` arguments. pub fn new(crate_name: &str, is_exe: bool, mut metadata: Vec<String>) -> StableCrateId { - use std::hash::Hash; - use std::hash::Hasher; - let mut hasher = StableHasher::new(); crate_name.hash(&mut hasher); - // We don't want the stable crate id to dependent on the order + // We don't want the stable crate ID to depend on the order of // -C metadata arguments, so sort them: metadata.sort(); // Every distinct -C metadata value is only incorporated once: @@ -171,6 +172,18 @@ impl StableCrateId { // linking against a library of the same name, if this is an executable. hasher.write(if is_exe { b"exe" } else { b"lib" }); + // Also incorporate the rustc version. Otherwise, with -Zsymbol-mangling-version=v0 + // and no -Cmetadata, symbols from the same crate compiled with different versions of + // rustc are named the same. + // + // RUSTC_FORCE_RUSTC_VERSION is used to inject rustc version information + // during testing. + if let Some(val) = std::env::var_os("RUSTC_FORCE_RUSTC_VERSION") { + hasher.write(val.to_string_lossy().into_owned().as_bytes()) + } else { + hasher.write(option_env!("CFG_VERSION").unwrap_or("unknown version").as_bytes()); + } + StableCrateId(hasher.finish()) } } @@ -196,7 +209,7 @@ impl<E: Encoder> Encodable<E> for DefIndex { } impl<D: Decoder> Decodable<D> for DefIndex { - default fn decode(_: &mut D) -> Result<DefIndex, D::Error> { + default fn decode(_: &mut D) -> DefIndex { panic!("cannot decode `DefIndex` with `{}`", std::any::type_name::<D>()); } } @@ -205,12 +218,47 @@ impl<D: Decoder> Decodable<D> for DefIndex { /// index and a def index. /// /// You can create a `DefId` from a `LocalDefId` using `local_def_id.to_def_id()`. -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Copy)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Copy)] +// On below-64 bit systems we can simply use the derived `Hash` impl +#[cfg_attr(not(target_pointer_width = "64"), derive(Hash))] +#[repr(C)] +// We guarantee field order. Note that the order is essential here, see below why. pub struct DefId { + // cfg-ing the order of fields so that the `DefIndex` which is high entropy always ends up in + // the lower bits no matter the endianness. This allows the compiler to turn that `Hash` impl + // into a direct call to 'u64::hash(_)`. + #[cfg(not(all(target_pointer_width = "64", target_endian = "big")))] + pub index: DefIndex, pub krate: CrateNum, + #[cfg(all(target_pointer_width = "64", target_endian = "big"))] pub index: DefIndex, } +// On 64-bit systems, we can hash the whole `DefId` as one `u64` instead of two `u32`s. This +// improves performance without impairing `FxHash` quality. So the below code gets compiled to a +// noop on little endian systems because the memory layout of `DefId` is as follows: +// +// ``` +// +-1--------------31-+-32-------------63-+ +// ! index ! krate ! +// +-------------------+-------------------+ +// ``` +// +// The order here has direct impact on `FxHash` quality because we have far more `DefIndex` per +// crate than we have `Crate`s within one compilation. Or in other words, this arrangement puts +// more entropy in the low bits than the high bits. The reason this matters is that `FxHash`, which +// is used throughout rustc, has problems distributing the entropy from the high bits, so reversing +// the order would lead to a large number of collisions and thus far worse performance. +// +// On 64-bit big-endian systems, this compiles to a 64-bit rotation by 32 bits, which is still +// faster than another `FxHash` round. +#[cfg(target_pointer_width = "64")] +impl Hash for DefId { + fn hash<H: Hasher>(&self, h: &mut H) { + (((self.krate.as_u32() as u64) << 32) | (self.index.as_u32() as u64)).hash(h) + } +} + impl DefId { /// Makes a local `DefId` from the given `DefIndex`. #[inline] @@ -250,12 +298,10 @@ impl<E: Encoder> Encodable<E> for DefId { } impl<D: Decoder> Decodable<D> for DefId { - default fn decode(d: &mut D) -> Result<DefId, D::Error> { - d.read_struct(|d| { - Ok(DefId { - krate: d.read_struct_field("krate", Decodable::decode)?, - index: d.read_struct_field("index", Decodable::decode)?, - }) + default fn decode(d: &mut D) -> DefId { + d.read_struct(|d| DefId { + krate: d.read_struct_field("krate", Decodable::decode), + index: d.read_struct_field("index", Decodable::decode), }) } } @@ -275,17 +321,23 @@ impl fmt::Debug for DefId { rustc_data_structures::define_id_collections!(DefIdMap, DefIdSet, DefId); -/// A LocalDefId is equivalent to a DefId with `krate == LOCAL_CRATE`. Since +/// A `LocalDefId` is equivalent to a `DefId` with `krate == LOCAL_CRATE`. Since /// we encode this information in the type, we can ensure at compile time that -/// no DefIds from upstream crates get thrown into the mix. There are quite a -/// few cases where we know that only DefIds from the local crate are expected -/// and a DefId from a different crate would signify a bug somewhere. This -/// is when LocalDefId comes in handy. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +/// no `DefId`s from upstream crates get thrown into the mix. There are quite a +/// few cases where we know that only `DefId`s from the local crate are expected; +/// a `DefId` from a different crate would signify a bug somewhere. This +/// is when `LocalDefId` comes in handy. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct LocalDefId { pub local_def_index: DefIndex, } +// To ensure correctness of incremental compilation, +// `LocalDefId` must not implement `Ord` or `PartialOrd`. +// See https://github.com/rust-lang/rust/issues/90317. +impl !Ord for LocalDefId {} +impl !PartialOrd for LocalDefId {} + pub const CRATE_DEF_ID: LocalDefId = LocalDefId { local_def_index: CRATE_DEF_INDEX }; impl Idx for LocalDefId { @@ -324,8 +376,8 @@ impl<E: Encoder> Encodable<E> for LocalDefId { } impl<D: Decoder> Decodable<D> for LocalDefId { - fn decode(d: &mut D) -> Result<LocalDefId, D::Error> { - DefId::decode(d).map(|d| d.expect_local()) + fn decode(d: &mut D) -> LocalDefId { + DefId::decode(d).expect_local() } } diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs index 724d1904dc3..e0d6bd8cb7b 100644 --- a/compiler/rustc_span/src/hygiene.rs +++ b/compiler/rustc_span/src/hygiene.rs @@ -32,6 +32,7 @@ use crate::{HashStableContext, Span, DUMMY_SP}; use crate::def_id::{CrateNum, DefId, StableCrateId, CRATE_DEF_ID, LOCAL_CRATE}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::stable_hasher::HashingControls; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::{Lock, Lrc}; use rustc_data_structures::unhash::UnhashMap; @@ -88,6 +89,33 @@ rustc_index::newtype_index! { } } +/// Assert that the provided `HashStableContext` is configured with the 'default' +/// `HashingControls`. We should always have bailed out before getting to here +/// with a non-default mode. With this check in place, we can avoid the need +/// to maintain separate versions of `ExpnData` hashes for each permutation +/// of `HashingControls` settings. +fn assert_default_hashing_controls<CTX: HashStableContext>(ctx: &CTX, msg: &str) { + match ctx.hashing_controls() { + // Ideally, we would also check that `node_id_hashing_mode` was always + // `NodeIdHashingMode::HashDefPath`. However, we currently end up hashing + // `Span`s in this mode, and there's not an easy way to change that. + // All of the span-related data that we hash is pretty self-contained + // (in particular, we don't hash any `HirId`s), so this shouldn't result + // in any caching problems. + // FIXME: Enforce that we don't end up transitively hashing any `HirId`s, + // or ensure that this method is always invoked with the same + // `NodeIdHashingMode` + // + // Note that we require that `hash_spans` be set according to the global + // `-Z incremental-ignore-spans` option. Normally, this option is disabled, + // which will cause us to require that this method always be called with `Span` hashing + // enabled. + HashingControls { hash_spans, node_id_hashing_mode: _ } + if hash_spans == !ctx.debug_opts_incremental_ignore_spans() => {} + other => panic!("Attempted hashing of {msg} with non-default HashingControls: {:?}", other), + } +} + /// A unique hash value associated to an expansion. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Encodable, Decodable, HashStable_Generic)] pub struct ExpnHash(Fingerprint); @@ -264,7 +292,15 @@ impl ExpnId { HygieneData::with(|data| data.expn_data(self).clone()) } + #[inline] pub fn is_descendant_of(self, ancestor: ExpnId) -> bool { + // a few "fast path" cases to avoid locking HygieneData + if ancestor == ExpnId::root() || ancestor == self { + return true; + } + if ancestor.krate != self.krate { + return false; + } HygieneData::with(|data| data.is_descendant_of(self, ancestor)) } @@ -376,13 +412,22 @@ impl HygieneData { } fn is_descendant_of(&self, mut expn_id: ExpnId, ancestor: ExpnId) -> bool { - while expn_id != ancestor { + // a couple "fast path" cases to avoid traversing parents in the loop below + if ancestor == ExpnId::root() { + return true; + } + if expn_id.krate != ancestor.krate { + return false; + } + loop { + if expn_id == ancestor { + return true; + } if expn_id == ExpnId::root() { return false; } expn_id = self.expn_data(expn_id).parent; } - true } fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext { @@ -1099,18 +1144,11 @@ pub enum DesugaringKind { OpaqueTy, Async, Await, - ForLoop(ForLoopLoc), + ForLoop, LetElse, WhileLoop, } -/// A location in the desugaring of a `for` loop -#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable, HashStable_Generic)] -pub enum ForLoopLoc { - Head, - IntoIter, -} - impl DesugaringKind { /// The description wording should combine well with "desugaring of {}". pub fn descr(self) -> &'static str { @@ -1121,7 +1159,7 @@ impl DesugaringKind { DesugaringKind::QuestionMark => "operator `?`", DesugaringKind::TryBlock => "`try` block", DesugaringKind::OpaqueTy => "`impl Trait`", - DesugaringKind::ForLoop(_) => "`for` loop", + DesugaringKind::ForLoop => "`for` loop", DesugaringKind::LetElse => "`let...else`", DesugaringKind::WhileLoop => "`while` loop", } @@ -1230,6 +1268,7 @@ pub fn register_expn_id( data: ExpnData, hash: ExpnHash, ) -> ExpnId { + debug_assert!(data.parent == ExpnId::root() || krate == data.parent.krate); let expn_id = ExpnId { krate, local_id }; HygieneData::with(|hygiene_data| { let _old_data = hygiene_data.foreign_expn_data.insert(expn_id, data); @@ -1275,19 +1314,16 @@ pub fn decode_expn_id( // to track which `SyntaxContext`s we have already decoded. // The provided closure will be invoked to deserialize a `SyntaxContextData` // if we haven't already seen the id of the `SyntaxContext` we are deserializing. -pub fn decode_syntax_context< - D: Decoder, - F: FnOnce(&mut D, u32) -> Result<SyntaxContextData, D::Error>, ->( +pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>( d: &mut D, context: &HygieneDecodeContext, decode_data: F, -) -> Result<SyntaxContext, D::Error> { - let raw_id: u32 = Decodable::decode(d)?; +) -> SyntaxContext { + let raw_id: u32 = Decodable::decode(d); if raw_id == 0 { debug!("decode_syntax_context: deserialized root"); // The root is special - return Ok(SyntaxContext::root()); + return SyntaxContext::root(); } let outer_ctxts = &context.remapped_ctxts; @@ -1295,7 +1331,7 @@ pub fn decode_syntax_context< // Ensure that the lock() temporary is dropped early { if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() { - return Ok(ctxt); + return ctxt; } } @@ -1325,7 +1361,7 @@ pub fn decode_syntax_context< // Don't try to decode data while holding the lock, since we need to // be able to recursively decode a SyntaxContext - let mut ctxt_data = decode_data(d, raw_id)?; + let mut ctxt_data = decode_data(d, raw_id); // Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names` // We don't care what the encoding crate set this to - we want to resolve it // from the perspective of the current compilation session @@ -1341,7 +1377,7 @@ pub fn decode_syntax_context< assert_eq!(dummy.dollar_crate_name, kw::Empty); }); - Ok(new_ctxt) + new_ctxt } fn for_all_ctxts_in<E, F: FnMut(u32, SyntaxContext, &SyntaxContextData) -> Result<(), E>>( @@ -1383,13 +1419,13 @@ impl<E: Encoder> Encodable<E> for ExpnId { } impl<D: Decoder> Decodable<D> for LocalExpnId { - fn decode(d: &mut D) -> Result<Self, D::Error> { - ExpnId::decode(d).map(ExpnId::expect_local) + fn decode(d: &mut D) -> Self { + ExpnId::expect_local(ExpnId::decode(d)) } } impl<D: Decoder> Decodable<D> for ExpnId { - default fn decode(_: &mut D) -> Result<Self, D::Error> { + default fn decode(_: &mut D) -> Self { panic!("cannot decode `ExpnId` with `{}`", std::any::type_name::<D>()); } } @@ -1412,7 +1448,7 @@ impl<E: Encoder> Encodable<E> for SyntaxContext { } impl<D: Decoder> Decodable<D> for SyntaxContext { - default fn decode(_: &mut D) -> Result<Self, D::Error> { + default fn decode(_: &mut D) -> Self { panic!("cannot decode `SyntaxContext` with `{}`", std::any::type_name::<D>()); } } @@ -1433,6 +1469,7 @@ fn update_disambiguator(expn_data: &mut ExpnData, mut ctx: impl HashStableContex "Already set disambiguator for ExpnData: {:?}", expn_data ); + assert_default_hashing_controls(&ctx, "ExpnData (disambiguator)"); let mut expn_hash = expn_data.hash_expn(&mut ctx); let disambiguator = HygieneData::with(|data| { @@ -1482,6 +1519,7 @@ impl<CTX: HashStableContext> HashStable<CTX> for SyntaxContext { impl<CTX: HashStableContext> HashStable<CTX> for ExpnId { fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + assert_default_hashing_controls(ctx, "ExpnId"); let hash = if *self == ExpnId::root() { // Avoid fetching TLS storage for a trivial often-used value. Fingerprint::ZERO diff --git a/compiler/rustc_span/src/lev_distance.rs b/compiler/rustc_span/src/lev_distance.rs index c10968e06d7..93cf965f105 100644 --- a/compiler/rustc_span/src/lev_distance.rs +++ b/compiler/rustc_span/src/lev_distance.rs @@ -11,16 +11,21 @@ use std::cmp; mod tests; /// Finds the Levenshtein distance between two strings. -pub fn lev_distance(a: &str, b: &str) -> usize { - // cases which don't require further computation - if a.is_empty() { - return b.chars().count(); - } else if b.is_empty() { - return a.chars().count(); +/// +/// Returns None if the distance exceeds the limit. +pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> { + let n = a.chars().count(); + let m = b.chars().count(); + let min_dist = if n < m { m - n } else { n - m }; + + if min_dist > limit { + return None; + } + if n == 0 || m == 0 { + return (min_dist <= limit).then_some(min_dist); } - let mut dcol: Vec<_> = (0..=b.len()).collect(); - let mut t_last = 0; + let mut dcol: Vec<_> = (0..=m).collect(); for (i, sc) in a.chars().enumerate() { let mut current = i; @@ -35,10 +40,10 @@ pub fn lev_distance(a: &str, b: &str) -> usize { dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1; } current = next; - t_last = j; } } - dcol[t_last + 1] + + (dcol[m] <= limit).then_some(dcol[m]) } /// Finds the best match for a given word in the given iterator. @@ -51,44 +56,43 @@ pub fn lev_distance(a: &str, b: &str) -> usize { /// on an edge case with a lower(upper)case letters mismatch. #[cold] pub fn find_best_match_for_name( - name_vec: &[Symbol], + candidates: &[Symbol], lookup: Symbol, dist: Option<usize>, ) -> Option<Symbol> { - let lookup = &lookup.as_str(); - let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3); + let lookup = lookup.as_str(); + let lookup_uppercase = lookup.to_uppercase(); // Priority of matches: // 1. Exact case insensitive match // 2. Levenshtein distance match // 3. Sorted word match - if let Some(case_insensitive_match) = - name_vec.iter().find(|candidate| candidate.as_str().to_uppercase() == lookup.to_uppercase()) - { - return Some(*case_insensitive_match); + if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) { + return Some(*c); } - let levenshtein_match = name_vec - .iter() - .filter_map(|&name| { - let dist = lev_distance(lookup, &name.as_str()); - if dist <= max_dist { Some((name, dist)) } else { None } - }) - // Here we are collecting the next structure: - // (levenshtein_match, levenshtein_distance) - .fold(None, |result, (candidate, dist)| match result { - None => Some((candidate, dist)), - Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }), - }); - if levenshtein_match.is_some() { - levenshtein_match.map(|(candidate, _)| candidate) - } else { - find_match_by_sorted_words(name_vec, lookup) + + let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3); + let mut best = None; + for c in candidates { + match lev_distance(lookup, c.as_str(), dist) { + Some(0) => return Some(*c), + Some(d) => { + dist = d - 1; + best = Some(*c); + } + None => {} + } } + if best.is_some() { + return best; + } + + find_match_by_sorted_words(candidates, lookup) } fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> { iter_names.iter().fold(None, |result, candidate| { - if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) { + if sort_by_words(candidate.as_str()) == sort_by_words(lookup) { Some(*candidate) } else { result diff --git a/compiler/rustc_span/src/lev_distance/tests.rs b/compiler/rustc_span/src/lev_distance/tests.rs index b32f8d32c13..4e34219248d 100644 --- a/compiler/rustc_span/src/lev_distance/tests.rs +++ b/compiler/rustc_span/src/lev_distance/tests.rs @@ -5,18 +5,26 @@ fn test_lev_distance() { use std::char::{from_u32, MAX}; // Test bytelength agnosticity for c in (0..MAX as u32).filter_map(from_u32).map(|i| i.to_string()) { - assert_eq!(lev_distance(&c[..], &c[..]), 0); + assert_eq!(lev_distance(&c[..], &c[..], usize::MAX), Some(0)); } let a = "\nMäry häd ä little lämb\n\nLittle lämb\n"; let b = "\nMary häd ä little lämb\n\nLittle lämb\n"; let c = "Mary häd ä little lämb\n\nLittle lämb\n"; - assert_eq!(lev_distance(a, b), 1); - assert_eq!(lev_distance(b, a), 1); - assert_eq!(lev_distance(a, c), 2); - assert_eq!(lev_distance(c, a), 2); - assert_eq!(lev_distance(b, c), 1); - assert_eq!(lev_distance(c, b), 1); + assert_eq!(lev_distance(a, b, usize::MAX), Some(1)); + assert_eq!(lev_distance(b, a, usize::MAX), Some(1)); + assert_eq!(lev_distance(a, c, usize::MAX), Some(2)); + assert_eq!(lev_distance(c, a, usize::MAX), Some(2)); + assert_eq!(lev_distance(b, c, usize::MAX), Some(1)); + assert_eq!(lev_distance(c, b, usize::MAX), Some(1)); +} + +#[test] +fn test_lev_distance_limit() { + assert_eq!(lev_distance("abc", "abcd", 1), Some(1)); + assert_eq!(lev_distance("abc", "abcd", 0), None); + assert_eq!(lev_distance("abc", "xyz", 3), Some(3)); + assert_eq!(lev_distance("abc", "xyz", 2), None); } #[test] diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 032ae73bbf3..2c3db35bb66 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -15,12 +15,12 @@ #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] #![feature(array_windows)] +#![feature(bool_to_option)] #![feature(crate_visibility_modifier)] #![feature(if_let_guard)] #![feature(negative_impls)] #![feature(nll)] #![feature(min_specialization)] -#![feature(thread_local_const_init)] #[macro_use] extern crate rustc_macros; @@ -41,8 +41,9 @@ pub mod edition; use edition::Edition; pub mod hygiene; use hygiene::Transparency; -pub use hygiene::{DesugaringKind, ExpnKind, ForLoopLoc, MacroKind}; +pub use hygiene::{DesugaringKind, ExpnKind, MacroKind}; pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext}; +use rustc_data_structures::stable_hasher::HashingControls; pub mod def_id; use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE}; pub mod lev_distance; @@ -194,10 +195,8 @@ impl<S: Encoder> Encodable<S> for RealFileName { encoder.emit_enum(|encoder| match *self { RealFileName::LocalPath(ref local_path) => { encoder.emit_enum_variant("LocalPath", 0, 1, |encoder| { - Ok({ - encoder - .emit_enum_variant_arg(true, |encoder| local_path.encode(encoder))?; - }) + encoder.emit_enum_variant_arg(true, |encoder| local_path.encode(encoder))?; + Ok(()) }) } @@ -206,12 +205,9 @@ impl<S: Encoder> Encodable<S> for RealFileName { // For privacy and build reproducibility, we must not embed host-dependant path in artifacts // if they have been remapped by --remap-path-prefix assert!(local_path.is_none()); - Ok({ - encoder - .emit_enum_variant_arg(true, |encoder| local_path.encode(encoder))?; - encoder - .emit_enum_variant_arg(false, |encoder| virtual_name.encode(encoder))?; - }) + encoder.emit_enum_variant_arg(true, |encoder| local_path.encode(encoder))?; + encoder.emit_enum_variant_arg(false, |encoder| virtual_name.encode(encoder))?; + Ok(()) }), }) } @@ -430,7 +426,7 @@ impl FileName { /// `SpanData` is public because `Span` uses a thread-local interner and can't be /// sent to other threads, but some pieces of performance infra run in a separate thread. /// Using `Span` is generally preferred. -#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd)] +#[derive(Clone, Copy, Hash, PartialEq, Eq)] pub struct SpanData { pub lo: BytePos, pub hi: BytePos, @@ -440,6 +436,36 @@ pub struct SpanData { pub parent: Option<LocalDefId>, } +// Order spans by position in the file. +impl Ord for SpanData { + fn cmp(&self, other: &Self) -> Ordering { + let SpanData { + lo: s_lo, + hi: s_hi, + ctxt: s_ctxt, + // `LocalDefId` does not implement `Ord`. + // The other fields are enough to determine in-file order. + parent: _, + } = self; + let SpanData { + lo: o_lo, + hi: o_hi, + ctxt: o_ctxt, + // `LocalDefId` does not implement `Ord`. + // The other fields are enough to determine in-file order. + parent: _, + } = other; + + (s_lo, s_hi, s_ctxt).cmp(&(o_lo, o_hi, o_ctxt)) + } +} + +impl PartialOrd for SpanData { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + impl SpanData { #[inline] pub fn span(&self) -> Span { @@ -556,6 +582,16 @@ impl Span { matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _)) } + /// Gate suggestions that would not be appropriate in a context the user didn't write. + pub fn can_be_used_for_suggestions(self) -> bool { + !self.from_expansion() + // FIXME: If this span comes from a `derive` macro but it points at code the user wrote, + // the callsite span and the span will be pointing at different places. It also means that + // we can safely provide suggestions on this span. + || (matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _)) + && self.parent_callsite().map(|p| (p.lo(), p.hi())) != Some((self.lo(), self.hi()))) + } + #[inline] pub fn with_root_ctxt(lo: BytePos, hi: BytePos) -> Span { Span::new(lo, hi, SyntaxContext::root(), None) @@ -576,7 +612,7 @@ impl Span { #[inline] /// Returns `true` if `hi == lo`. - pub fn is_empty(&self) -> bool { + pub fn is_empty(self) -> bool { let span = self.data_untracked(); span.hi == span.lo } @@ -604,7 +640,7 @@ impl Span { /// /// Use this instead of `==` when either span could be generated code, /// and you only care that they point to the same bytes of source text. - pub fn source_equal(&self, other: &Span) -> bool { + pub fn source_equal(self, other: Span) -> bool { let span = self.data(); let other = other.data(); span.lo == other.lo && span.hi == other.hi @@ -645,17 +681,17 @@ impl Span { } #[inline] - pub fn rust_2015(&self) -> bool { + pub fn rust_2015(self) -> bool { self.edition() == edition::Edition::Edition2015 } #[inline] - pub fn rust_2018(&self) -> bool { + pub fn rust_2018(self) -> bool { self.edition() >= edition::Edition::Edition2018 } #[inline] - pub fn rust_2021(&self) -> bool { + pub fn rust_2021(self) -> bool { self.edition() >= edition::Edition::Edition2021 } @@ -676,7 +712,7 @@ impl Span { /// Checks if a span is "internal" to a macro in which `#[unstable]` /// items can be used (that is, a macro marked with /// `#[allow_internal_unstable]`). - pub fn allows_unstable(&self, feature: Symbol) -> bool { + pub fn allows_unstable(self, feature: Symbol) -> bool { self.ctxt() .outer_expn_data() .allow_internal_unstable @@ -684,7 +720,7 @@ impl Span { } /// Checks if this span arises from a compiler desugaring of kind `kind`. - pub fn is_desugaring(&self, kind: DesugaringKind) -> bool { + pub fn is_desugaring(self, kind: DesugaringKind) -> bool { match self.ctxt().outer_expn_data().kind { ExpnKind::Desugaring(k) => k == kind, _ => false, @@ -693,7 +729,7 @@ impl Span { /// Returns the compiler desugaring that created this span, or `None` /// if this span is not from a desugaring. - pub fn desugaring_kind(&self) -> Option<DesugaringKind> { + pub fn desugaring_kind(self) -> Option<DesugaringKind> { match self.ctxt().outer_expn_data().kind { ExpnKind::Desugaring(k) => Some(k), _ => None, @@ -703,7 +739,7 @@ impl Span { /// Checks if a span is "internal" to a macro in which `unsafe` /// can be used without triggering the `unsafe_code` lint. // (that is, a macro marked with `#[allow_internal_unsafe]`). - pub fn allows_unsafe(&self) -> bool { + pub fn allows_unsafe(self) -> bool { self.ctxt().outer_expn_data().allow_internal_unsafe } @@ -716,7 +752,7 @@ impl Span { return None; } - let is_recursive = expn_data.call_site.source_equal(&prev_span); + let is_recursive = expn_data.call_site.source_equal(prev_span); prev_span = self; self = expn_data.call_site; @@ -830,13 +866,13 @@ impl Span { /// Equivalent of `Span::call_site` from the proc macro API, /// except that the location is taken from the `self` span. - pub fn with_call_site_ctxt(&self, expn_id: ExpnId) -> Span { + pub fn with_call_site_ctxt(self, expn_id: ExpnId) -> Span { self.with_ctxt_from_mark(expn_id, Transparency::Transparent) } /// Equivalent of `Span::mixed_site` from the proc macro API, /// except that the location is taken from the `self` span. - pub fn with_mixed_site_ctxt(&self, expn_id: ExpnId) -> Span { + pub fn with_mixed_site_ctxt(self, expn_id: ExpnId) -> Span { self.with_ctxt_from_mark(expn_id, Transparency::SemiTransparent) } @@ -940,12 +976,12 @@ impl<E: Encoder> Encodable<E> for Span { } } impl<D: Decoder> Decodable<D> for Span { - default fn decode(s: &mut D) -> Result<Span, D::Error> { + default fn decode(s: &mut D) -> Span { s.read_struct(|d| { - let lo = d.read_struct_field("lo", Decodable::decode)?; - let hi = d.read_struct_field("hi", Decodable::decode)?; + let lo = d.read_struct_field("lo", Decodable::decode); + let hi = d.read_struct_field("hi", Decodable::decode); - Ok(Span::new(lo, hi, SyntaxContext::root(), None)) + Span::new(lo, hi, SyntaxContext::root(), None) }) } } @@ -1379,7 +1415,7 @@ impl<S: Encoder> Encodable<S> for SourceFile { // Encode the first element. lines[0].encode(s)?; - let diff_iter = lines[..].array_windows().map(|&[fst, snd]| snd - fst); + let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst); match bytes_per_diff { 1 => { @@ -1413,30 +1449,30 @@ impl<S: Encoder> Encodable<S> for SourceFile { } impl<D: Decoder> Decodable<D> for SourceFile { - fn decode(d: &mut D) -> Result<SourceFile, D::Error> { + fn decode(d: &mut D) -> SourceFile { d.read_struct(|d| { - let name: FileName = d.read_struct_field("name", |d| Decodable::decode(d))?; + let name: FileName = d.read_struct_field("name", |d| Decodable::decode(d)); let src_hash: SourceFileHash = - d.read_struct_field("src_hash", |d| Decodable::decode(d))?; - let start_pos: BytePos = d.read_struct_field("start_pos", |d| Decodable::decode(d))?; - let end_pos: BytePos = d.read_struct_field("end_pos", |d| Decodable::decode(d))?; + d.read_struct_field("src_hash", |d| Decodable::decode(d)); + let start_pos: BytePos = d.read_struct_field("start_pos", |d| Decodable::decode(d)); + let end_pos: BytePos = d.read_struct_field("end_pos", |d| Decodable::decode(d)); let lines: Vec<BytePos> = d.read_struct_field("lines", |d| { - let num_lines: u32 = Decodable::decode(d)?; + let num_lines: u32 = Decodable::decode(d); let mut lines = Vec::with_capacity(num_lines as usize); if num_lines > 0 { // Read the number of bytes used per diff. - let bytes_per_diff: u8 = Decodable::decode(d)?; + let bytes_per_diff: u8 = Decodable::decode(d); // Read the first element. - let mut line_start: BytePos = Decodable::decode(d)?; + let mut line_start: BytePos = Decodable::decode(d); lines.push(line_start); for _ in 1..num_lines { let diff = match bytes_per_diff { - 1 => d.read_u8()? as u32, - 2 => d.read_u16()? as u32, - 4 => d.read_u32()?, + 1 => d.read_u8() as u32, + 2 => d.read_u16() as u32, + 4 => d.read_u32(), _ => unreachable!(), }; @@ -1446,17 +1482,17 @@ impl<D: Decoder> Decodable<D> for SourceFile { } } - Ok(lines) - })?; + lines + }); let multibyte_chars: Vec<MultiByteChar> = - d.read_struct_field("multibyte_chars", |d| Decodable::decode(d))?; + d.read_struct_field("multibyte_chars", |d| Decodable::decode(d)); let non_narrow_chars: Vec<NonNarrowChar> = - d.read_struct_field("non_narrow_chars", |d| Decodable::decode(d))?; - let name_hash: u128 = d.read_struct_field("name_hash", |d| Decodable::decode(d))?; + d.read_struct_field("non_narrow_chars", |d| Decodable::decode(d)); + let name_hash: u128 = d.read_struct_field("name_hash", |d| Decodable::decode(d)); let normalized_pos: Vec<NormalizedPos> = - d.read_struct_field("normalized_pos", |d| Decodable::decode(d))?; - let cnum: CrateNum = d.read_struct_field("cnum", |d| Decodable::decode(d))?; - Ok(SourceFile { + d.read_struct_field("normalized_pos", |d| Decodable::decode(d)); + let cnum: CrateNum = d.read_struct_field("cnum", |d| Decodable::decode(d)); + SourceFile { name, start_pos, end_pos, @@ -1471,7 +1507,7 @@ impl<D: Decoder> Decodable<D> for SourceFile { normalized_pos, name_hash, cnum, - }) + } }) } } @@ -1502,7 +1538,7 @@ impl SourceFile { assert!(end_pos <= u32::MAX as usize); let (lines, multibyte_chars, non_narrow_chars) = - analyze_source_file::analyze_source_file(&src[..], start_pos); + analyze_source_file::analyze_source_file(&src, start_pos); SourceFile { name, @@ -1914,8 +1950,8 @@ impl<S: rustc_serialize::Encoder> Encodable<S> for BytePos { } impl<D: rustc_serialize::Decoder> Decodable<D> for BytePos { - fn decode(d: &mut D) -> Result<BytePos, D::Error> { - Ok(BytePos(d.read_u32()?)) + fn decode(d: &mut D) -> BytePos { + BytePos(d.read_u32()) } } @@ -1940,6 +1976,7 @@ pub struct Loc { #[derive(Debug)] pub struct SourceFileAndLine { pub sf: Lrc<SourceFile>, + /// Index of line, starting from 0. pub line: usize, } #[derive(Debug)] @@ -2022,11 +2059,15 @@ impl InnerSpan { pub trait HashStableContext { fn def_path_hash(&self, def_id: DefId) -> DefPathHash; fn hash_spans(&self) -> bool; + /// Accesses `sess.opts.debugging_opts.incremental_ignore_spans` since + /// we don't have easy access to a `Session` + fn debug_opts_incremental_ignore_spans(&self) -> bool; fn def_span(&self, def_id: LocalDefId) -> Span; fn span_data_to_lines_and_cols( &mut self, span: &SpanData, ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)>; + fn hashing_controls(&self) -> HashingControls; } impl<CTX> HashStable<CTX> for Span diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 74958c49849..7414d201f51 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -593,14 +593,19 @@ impl SourceMap { } pub fn span_to_margin(&self, sp: Span) -> Option<usize> { - match self.span_to_prev_source(sp) { - Err(_) => None, - Ok(source) => { - let last_line = source.rsplit_once('\n').unwrap_or(("", &source)).1; + Some(self.indentation_before(sp)?.len()) + } - Some(last_line.len() - last_line.trim_start().len()) - } - } + pub fn indentation_before(&self, sp: Span) -> Option<String> { + self.span_to_source(sp, |src, start_index, _| { + let before = &src[..start_index]; + let last_line = before.rsplit_once('\n').map_or(before, |(_, last)| last); + Ok(last_line + .split_once(|c: char| !c.is_whitespace()) + .map_or(last_line, |(indent, _)| indent) + .to_string()) + }) + .ok() } /// Returns the source snippet as `String` before the given `Span`. diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index e9120b98aab..61e4074a7c8 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -61,6 +61,15 @@ use rustc_data_structures::fx::FxIndexSet; /// using the callback `SPAN_TRACK` to access the query engine. /// #[derive(Clone, Copy, Eq, PartialEq, Hash)] +// FIXME(@lcnr): Enable this attribute once the bootstrap +// compiler knows of `rustc_pass_by_value`. +// +// Right now, this lint would only trigger when compiling the +// stage 2 compiler, which is fairly annoying as there are +// a lot of places using `&Span` right now. After the next bootstrap bump, +// the lint will already trigger when using stage 1, which is a lot less annoying. +// +// #[cfg_attr(not(bootstrap), rustc_pass_by_value)] pub struct Span { base_or_index: u32, len_or_tag: u16, diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 52e2a8f48e2..757c430e799 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -196,6 +196,7 @@ symbols! { Implied, Input, Into, + IntoFuture, IntoIterator, IoRead, IoWrite, @@ -207,6 +208,7 @@ symbols! { LinkedList, LintPass, Mutex, + N, None, Ok, Option, @@ -269,7 +271,6 @@ symbols! { __D, __H, __S, - __next, __try_var, _d, _e, @@ -307,32 +308,44 @@ symbols! { alloc_layout, alloc_zeroed, allocator, + allocator_api, allocator_internals, allow, allow_fail, allow_internal_unsafe, allow_internal_unstable, allowed, + alu32, always, and, and_then, any, + append_const_msg, arbitrary_enum_discriminant, arbitrary_self_types, + args, arith_offset, arm, arm_target_feature, array, arrays, as_ptr, + as_ref, as_str, asm, + asm_const, + asm_experimental_arch, + asm_sym, + asm_unwind, assert, + assert_eq_macro, assert_inhabited, assert_macro, + assert_ne_macro, assert_receiver_is_total_eq, assert_uninit_valid, assert_zero_valid, + associated_const_equality, associated_consts, associated_type_bounds, associated_type_defaults, @@ -351,7 +364,10 @@ symbols! { augmented_assignments, auto_traits, automatically_derived, + avx, avx512_target_feature, + avx512bw, + avx512f, await_macro, bang, begin_panic, @@ -399,6 +415,7 @@ symbols! { cfg_doctest, cfg_eval, cfg_hide, + cfg_macro, cfg_panic, cfg_sanitize, cfg_target_abi, @@ -426,20 +443,25 @@ symbols! { coerce_unsized, cold, column, + column_macro, compare_and_swap, compare_exchange, compare_exchange_weak, compile_error, + compile_error_macro, compiler_builtins, compiler_fence, concat, + concat_bytes, concat_idents, + concat_macro, conservative_impl_trait, console, const_allocate, const_async_blocks, const_compare_raw_pointers, const_constructor, + const_deallocate, const_eval_limit, const_eval_select, const_eval_select_ct, @@ -493,6 +515,7 @@ symbols! { core_panic_macro, cosf32, cosf64, + count, cr, crate_id, crate_in_paths, @@ -512,10 +535,13 @@ symbols! { custom_inner_attributes, custom_test_frameworks, d, + dbg_macro, dead_code, dealloc, debug, + debug_assert_eq_macro, debug_assert_macro, + debug_assert_ne_macro, debug_assertions, debug_struct, debug_trait_builder, @@ -559,6 +585,7 @@ symbols! { doc_spotlight, doctest, document_private_items, + dotdot: "..", dotdot_in_tuple_patterns, dotdoteq_in_patterns, dreg, @@ -572,6 +599,7 @@ symbols! { dylib, dyn_metadata, dyn_trait, + e, edition_macro_pats, edition_panic, eh_catch_typeinfo, @@ -584,7 +612,11 @@ symbols! { enable, enclosing_scope, encode, + end, env, + env_macro, + eprint_macro, + eprintln_macro, eq, ermsb_target_feature, exact_div, @@ -623,6 +655,7 @@ symbols! { fdiv_fast, feature, fence, + ferris: "🦀", fetch_update, ffi, ffi_const, @@ -631,6 +664,7 @@ symbols! { field, field_init_shorthand, file, + file_macro, fill, finish, flags, @@ -653,8 +687,10 @@ symbols! { format, format_args, format_args_capture, + format_args_macro, format_args_nl, format_macro, + fp, freeze, freg, frem_fast, @@ -675,6 +711,7 @@ symbols! { gen_future, gen_kill, generator, + generator_return, generator_state, generators, generic_arg_infer, @@ -718,7 +755,10 @@ symbols! { in_band_lifetimes, include, include_bytes, + include_bytes_macro, + include_macro, include_str, + include_str_macro, inclusive_range_syntax, index, index_mut, @@ -728,9 +768,11 @@ symbols! { inlateout, inline, inline_const, + inline_const_pat, inout, instruction_set, intel, + into_future, into_iter, intra_doc_pointers, intrinsics, @@ -764,6 +806,7 @@ symbols! { lifetime, likely, line, + line_macro, link, link_args, link_cfg, @@ -774,8 +817,8 @@ symbols! { linkage, lint_reasons, literal, - llvm_asm, load, + loaded_from_disk, local, local_inner_macros, log10f32, @@ -806,9 +849,11 @@ symbols! { masked, match_beginning_vert, match_default_bindings, + matches_macro, maxnumf32, maxnumf64, may_dangle, + may_unwind, maybe_uninit, maybe_uninit_uninit, maybe_uninit_zeroed, @@ -841,6 +886,7 @@ symbols! { modifiers, module, module_path, + module_path_macro, more_qualified_paths, more_struct_aliases, movbe_target_feature, @@ -870,6 +916,7 @@ symbols! { neg, negate_unsigned, negative_impls, + neon, never, never_type, never_type_fallback, @@ -924,6 +971,7 @@ symbols! { optin_builtin_traits, option, option_env, + option_env_macro, options, or, or_patterns, @@ -944,6 +992,7 @@ symbols! { panic_implementation, panic_info, panic_location, + panic_no_unwind, panic_runtime, panic_str, panic_unwind, @@ -987,6 +1036,8 @@ symbols! { prelude_import, preserves_flags, primitive, + print_macro, + println_macro, proc_dash_macro: "proc-macro", proc_macro, proc_macro_attribute, @@ -1042,8 +1093,11 @@ symbols! { reg64, reg_abcd, reg_byte, + reg_iw, reg_nonzero, - reg_thumb, + reg_pair, + reg_ptr, + reg_upper, register_attr, register_tool, relaxed_adts, @@ -1058,6 +1112,7 @@ symbols! { repr_packed, repr_simd, repr_transparent, + reserved_r9: "reserved-r9", residual, result, rhs, @@ -1119,6 +1174,7 @@ symbols! { rustc_macro_transparency, rustc_main, rustc_mir, + rustc_must_implement_one_of, rustc_nonnull_optimization_guaranteed, rustc_object_lifetime_default, rustc_on_unimplemented, @@ -1126,6 +1182,7 @@ symbols! { rustc_paren_sugar, rustc_partition_codegened, rustc_partition_reused, + rustc_pass_by_value, rustc_peek, rustc_peek_definite_init, rustc_peek_liveness, @@ -1144,13 +1201,14 @@ symbols! { rustc_std_internal_symbol, rustc_strict_coherence, rustc_symbol_name, - rustc_synthetic, rustc_test_marker, rustc_then_this_would_need, rustc_trivial_field_reads, rustc_unsafe_specialization_marker, rustc_variance, + rustc_with_negative_coherence, rustdoc, + rustdoc_internals, rustfmt, rvalue_static_promotion, s, @@ -1169,6 +1227,7 @@ symbols! { simd, simd_add, simd_and, + simd_as, simd_bitmask, simd_cast, simd_ceil, @@ -1250,6 +1309,7 @@ symbols! { sqrtf64, sreg, sreg_low16, + sse, sse4a_target_feature, stable, staged_api, @@ -1271,6 +1331,7 @@ symbols! { str, str_alloc, stringify, + stringify_macro, struct_field_attributes, struct_inherit, struct_variant, @@ -1314,6 +1375,10 @@ symbols! { then_with, thread, thread_local, + thread_local_macro, + thumb2, + thumb_mode: "thumb-mode", + todo_macro, tool_attributes, tool_lints, trace_macros, @@ -1364,6 +1429,7 @@ symbols! { underscore_imports, underscore_lifetimes, uniform_paths, + unimplemented_macro, unit, universal_impl_trait, unix, @@ -1372,6 +1438,7 @@ symbols! { unpin, unreachable, unreachable_code, + unreachable_macro, unrestricted_attribute_tokens, unsafe_block_in_unsafe_fn, unsafe_cell, @@ -1402,7 +1469,9 @@ symbols! { var, variant_count, vec, + vec_macro, version, + vfp2, vis, visible_private_types, volatile, @@ -1426,7 +1495,9 @@ symbols! { wrapping_sub, wreg, write_bytes, + write_macro, write_str, + writeln_macro, x87_reg, xer, xmm_reg, @@ -1496,9 +1567,12 @@ impl Ident { Ident::new(self.name, self.span.normalize_to_macro_rules()) } - /// Convert the name to a `SymbolStr`. This is a slowish operation because - /// it requires locking the symbol interner. - pub fn as_str(self) -> SymbolStr { + /// Access the underlying string. This is a slowish operation because it + /// requires locking the symbol interner. + /// + /// Note that the lifetime of the return value is a lie. See + /// `Symbol::as_str()` for details. + pub fn as_str(&self) -> &str { self.name.as_str() } } @@ -1634,12 +1708,17 @@ impl Symbol { with_session_globals(|session_globals| session_globals.symbol_interner.intern(string)) } - /// Convert to a `SymbolStr`. This is a slowish operation because it + /// Access the underlying string. This is a slowish operation because it /// requires locking the symbol interner. - pub fn as_str(self) -> SymbolStr { - with_session_globals(|session_globals| { - let symbol_str = session_globals.symbol_interner.get(self); - unsafe { SymbolStr { string: std::mem::transmute::<&str, &str>(symbol_str) } } + /// + /// Note that the lifetime of the return value is a lie. It's not the same + /// as `&self`, but actually tied to the lifetime of the underlying + /// interner. Interners are long-lived, and there are very few of them, and + /// this function is typically used for short-lived things, so in practice + /// it works out ok. + pub fn as_str(&self) -> &str { + with_session_globals(|session_globals| unsafe { + std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self)) }) } @@ -1662,26 +1741,26 @@ impl Symbol { impl fmt::Debug for Symbol { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.as_str(), f) + fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Display for Symbol { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.as_str(), f) + fmt::Display::fmt(self.as_str(), f) } } impl<S: Encoder> Encodable<S> for Symbol { fn encode(&self, s: &mut S) -> Result<(), S::Error> { - s.emit_str(&self.as_str()) + s.emit_str(self.as_str()) } } impl<D: Decoder> Decodable<D> for Symbol { #[inline] - fn decode(d: &mut D) -> Result<Symbol, D::Error> { - Ok(Symbol::intern(&d.read_str()?)) + fn decode(d: &mut D) -> Symbol { + Symbol::intern(&d.read_str()) } } @@ -1693,11 +1772,10 @@ impl<CTX> HashStable<CTX> for Symbol { } impl<CTX> ToStableHashKey<CTX> for Symbol { - type KeyType = SymbolStr; - + type KeyType = String; #[inline] - fn to_stable_hash_key(&self, _: &CTX) -> SymbolStr { - self.as_str() + fn to_stable_hash_key(&self, _: &CTX) -> String { + self.as_str().to_string() } } @@ -1710,8 +1788,9 @@ pub(crate) struct Interner(Lock<InternerInner>); // found that to regress performance up to 2% in some cases. This might be // revisited after further improvements to `indexmap`. // -// This type is private to prevent accidentally constructing more than one `Interner` on the same -// thread, which makes it easy to mixup `Symbol`s between `Interner`s. +// This type is private to prevent accidentally constructing more than one +// `Interner` on the same thread, which makes it easy to mixup `Symbol`s +// between `Interner`s. #[derive(Default)] struct InternerInner { arena: DroplessArena, @@ -1737,14 +1816,20 @@ impl Interner { let name = Symbol::new(inner.strings.len() as u32); - // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be - // UTF-8. + // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, + // and immediately convert the clone back to `&[u8], all because there + // is no `inner.arena.alloc_str()` method. This is clearly safe. let string: &str = unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) }; - // It is safe to extend the arena allocation to `'static` because we only access - // these while the arena is still alive. + + // SAFETY: we can extend the arena allocation to `'static` because we + // only access these while the arena is still alive. let string: &'static str = unsafe { &*(string as *const str) }; inner.strings.push(string); + + // This second hash table lookup can be avoided by using `RawEntryMut`, + // but this code path isn't hot enough for it to be worth it. See + // #91445 for details. inner.names.insert(string, name); name } @@ -1882,70 +1967,3 @@ impl Ident { self.name.can_be_raw() && self.is_reserved() } } - -/// An alternative to [`Symbol`], useful when the chars within the symbol need to -/// be accessed. It deliberately has limited functionality and should only be -/// used for temporary values. -/// -/// Because the interner outlives any thread which uses this type, we can -/// safely treat `string` which points to interner data, as an immortal string, -/// as long as this type never crosses between threads. -// -// FIXME: ensure that the interner outlives any thread which uses `SymbolStr`, -// by creating a new thread right after constructing the interner. -#[derive(Clone, Eq, PartialOrd, Ord)] -pub struct SymbolStr { - string: &'static str, -} - -// This impl allows a `SymbolStr` to be directly equated with a `String` or -// `&str`. -impl<T: std::ops::Deref<Target = str>> std::cmp::PartialEq<T> for SymbolStr { - fn eq(&self, other: &T) -> bool { - self.string == other.deref() - } -} - -impl !Send for SymbolStr {} -impl !Sync for SymbolStr {} - -/// This impl means that if `ss` is a `SymbolStr`: -/// - `*ss` is a `str`; -/// - `&*ss` is a `&str` (and `match &*ss { ... }` is a common pattern). -/// - `&ss as &str` is a `&str`, which means that `&ss` can be passed to a -/// function expecting a `&str`. -impl std::ops::Deref for SymbolStr { - type Target = str; - #[inline] - fn deref(&self) -> &str { - self.string - } -} - -impl fmt::Debug for SymbolStr { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.string, f) - } -} - -impl fmt::Display for SymbolStr { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.string, f) - } -} - -impl<CTX> HashStable<CTX> for SymbolStr { - #[inline] - fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { - self.string.hash_stable(hcx, hasher) - } -} - -impl<CTX> ToStableHashKey<CTX> for SymbolStr { - type KeyType = SymbolStr; - - #[inline] - fn to_stable_hash_key(&self, _: &CTX) -> SymbolStr { - self.clone() - } -} |
