diff options
Diffstat (limited to 'compiler/rustc_span')
| -rw-r--r-- | compiler/rustc_span/Cargo.toml | 21 | ||||
| -rw-r--r-- | compiler/rustc_span/src/analyze_source_file.rs | 274 | ||||
| -rw-r--r-- | compiler/rustc_span/src/analyze_source_file/tests.rs | 142 | ||||
| -rw-r--r-- | compiler/rustc_span/src/caching_source_map_view.rs | 104 | ||||
| -rw-r--r-- | compiler/rustc_span/src/def_id.rs | 280 | ||||
| -rw-r--r-- | compiler/rustc_span/src/edition.rs | 73 | ||||
| -rw-r--r-- | compiler/rustc_span/src/fatal_error.rs | 26 | ||||
| -rw-r--r-- | compiler/rustc_span/src/hygiene.rs | 1239 | ||||
| -rw-r--r-- | compiler/rustc_span/src/lib.rs | 1872 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map.rs | 1097 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map/tests.rs | 272 | ||||
| -rw-r--r-- | compiler/rustc_span/src/span_encoding.rs | 133 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 1693 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol/tests.rs | 25 | ||||
| -rw-r--r-- | compiler/rustc_span/src/tests.rs | 40 |
15 files changed, 7291 insertions, 0 deletions
diff --git a/compiler/rustc_span/Cargo.toml b/compiler/rustc_span/Cargo.toml new file mode 100644 index 00000000000..1abfd50f003 --- /dev/null +++ b/compiler/rustc_span/Cargo.toml @@ -0,0 +1,21 @@ +[package] +authors = ["The Rust Project Developers"] +name = "rustc_span" +version = "0.0.0" +edition = "2018" + +[lib] +doctest = false + +[dependencies] +rustc_serialize = { path = "../rustc_serialize" } +rustc_macros = { path = "../rustc_macros" } +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_index = { path = "../rustc_index" } +rustc_arena = { path = "../rustc_arena" } +scoped-tls = "1.0" +unicode-width = "0.1.4" +cfg-if = "0.1.2" +tracing = "0.1" +sha-1 = "0.8" +md-5 = "0.8" diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs new file mode 100644 index 00000000000..b4beb3dc376 --- /dev/null +++ b/compiler/rustc_span/src/analyze_source_file.rs @@ -0,0 +1,274 @@ +use super::*; +use unicode_width::UnicodeWidthChar; + +#[cfg(test)] +mod tests; + +/// Finds all newlines, multi-byte characters, and non-narrow characters in a +/// SourceFile. +/// +/// This function will use an SSE2 enhanced implementation if hardware support +/// is detected at runtime. +pub fn analyze_source_file( + src: &str, + source_file_start_pos: BytePos, +) -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) { + let mut lines = vec![source_file_start_pos]; + let mut multi_byte_chars = vec![]; + let mut non_narrow_chars = vec![]; + + // Calls the right implementation, depending on hardware support available. + analyze_source_file_dispatch( + src, + source_file_start_pos, + &mut lines, + &mut multi_byte_chars, + &mut non_narrow_chars, + ); + + // The code above optimistically registers a new line *after* each \n + // it encounters. If that point is already outside the source_file, remove + // it again. + if let Some(&last_line_start) = lines.last() { + let source_file_end = source_file_start_pos + BytePos::from_usize(src.len()); + assert!(source_file_end >= last_line_start); + if last_line_start == source_file_end { + lines.pop(); + } + } + + (lines, multi_byte_chars, non_narrow_chars) +} + +cfg_if::cfg_if! { + if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))] { + fn analyze_source_file_dispatch(src: &str, + source_file_start_pos: BytePos, + lines: &mut Vec<BytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + non_narrow_chars: &mut Vec<NonNarrowChar>) { + if is_x86_feature_detected!("sse2") { + unsafe { + analyze_source_file_sse2(src, + source_file_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + } + } else { + analyze_source_file_generic(src, + src.len(), + source_file_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + + } + } + + /// Checks 16 byte chunks of text at a time. If the chunk contains + /// something other than printable ASCII characters and newlines, the + /// function falls back to the generic implementation. Otherwise it uses + /// SSE2 intrinsics to quickly find all newlines. + #[target_feature(enable = "sse2")] + unsafe fn analyze_source_file_sse2(src: &str, + output_offset: BytePos, + lines: &mut Vec<BytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + non_narrow_chars: &mut Vec<NonNarrowChar>) { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + const CHUNK_SIZE: usize = 16; + + let src_bytes = src.as_bytes(); + + let chunk_count = src.len() / CHUNK_SIZE; + + // This variable keeps track of where we should start decoding a + // chunk. If a multi-byte character spans across chunk boundaries, + // we need to skip that part in the next chunk because we already + // handled it. + let mut intra_chunk_offset = 0; + + for chunk_index in 0 .. chunk_count { + let ptr = src_bytes.as_ptr() as *const __m128i; + // We don't know if the pointer is aligned to 16 bytes, so we + // use `loadu`, which supports unaligned loading. + let chunk = _mm_loadu_si128(ptr.offset(chunk_index as isize)); + + // For character in the chunk, see if its byte value is < 0, which + // indicates that it's part of a UTF-8 char. + let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)); + // Create a bit mask from the comparison results. + let multibyte_mask = _mm_movemask_epi8(multibyte_test); + + // If the bit mask is all zero, we only have ASCII chars here: + if multibyte_mask == 0 { + assert!(intra_chunk_offset == 0); + + // Check if there are any control characters in the chunk. All + // control characters that we can encounter at this point have a + // byte value less than 32 or ... + let control_char_test0 = _mm_cmplt_epi8(chunk, _mm_set1_epi8(32)); + let control_char_mask0 = _mm_movemask_epi8(control_char_test0); + + // ... it's the ASCII 'DEL' character with a value of 127. + let control_char_test1 = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127)); + let control_char_mask1 = _mm_movemask_epi8(control_char_test1); + + let control_char_mask = control_char_mask0 | control_char_mask1; + + if control_char_mask != 0 { + // Check for newlines in the chunk + let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)); + let newlines_mask = _mm_movemask_epi8(newlines_test); + + if control_char_mask == newlines_mask { + // All control characters are newlines, record them + let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32; + let output_offset = output_offset + + BytePos::from_usize(chunk_index * CHUNK_SIZE + 1); + + loop { + let index = newlines_mask.trailing_zeros(); + + if index >= CHUNK_SIZE as u32 { + // We have arrived at the end of the chunk. + break + } + + lines.push(BytePos(index) + output_offset); + + // Clear the bit, so we can find the next one. + newlines_mask &= (!1) << index; + } + + // We are done for this chunk. All control characters were + // newlines and we took care of those. + continue + } else { + // Some of the control characters are not newlines, + // fall through to the slow path below. + } + } else { + // No control characters, nothing to record for this chunk + continue + } + } + + // The slow path. + // There are control chars in here, fallback to generic decoding. + let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset; + intra_chunk_offset = analyze_source_file_generic( + &src[scan_start .. ], + CHUNK_SIZE - intra_chunk_offset, + BytePos::from_usize(scan_start) + output_offset, + lines, + multi_byte_chars, + non_narrow_chars + ); + } + + // There might still be a tail left to analyze + let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset; + if tail_start < src.len() { + analyze_source_file_generic(&src[tail_start as usize ..], + src.len() - tail_start, + output_offset + BytePos::from_usize(tail_start), + lines, + multi_byte_chars, + non_narrow_chars); + } + } + } else { + + // The target (or compiler version) does not support SSE2 ... + fn analyze_source_file_dispatch(src: &str, + source_file_start_pos: BytePos, + lines: &mut Vec<BytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + non_narrow_chars: &mut Vec<NonNarrowChar>) { + analyze_source_file_generic(src, + src.len(), + source_file_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + } + } +} + +// `scan_len` determines the number of bytes in `src` to scan. Note that the +// function can read past `scan_len` if a multi-byte character start within the +// range but extends past it. The overflow is returned by the function. +fn analyze_source_file_generic( + src: &str, + scan_len: usize, + output_offset: BytePos, + lines: &mut Vec<BytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + non_narrow_chars: &mut Vec<NonNarrowChar>, +) -> usize { + assert!(src.len() >= scan_len); + let mut i = 0; + let src_bytes = src.as_bytes(); + + while i < scan_len { + let byte = unsafe { + // We verified that i < scan_len <= src.len() + *src_bytes.get_unchecked(i as usize) + }; + + // How much to advance in order to get to the next UTF-8 char in the + // string. + let mut char_len = 1; + + if byte < 32 { + // This is an ASCII control character, it could be one of the cases + // that are interesting to us. + + let pos = BytePos::from_usize(i) + output_offset; + + match byte { + b'\n' => { + lines.push(pos + BytePos(1)); + } + b'\t' => { + non_narrow_chars.push(NonNarrowChar::Tab(pos)); + } + _ => { + non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos)); + } + } + } else if byte >= 127 { + // The slow path: + // This is either ASCII control character "DEL" or the beginning of + // a multibyte char. Just decode to `char`. + let c = (&src[i..]).chars().next().unwrap(); + char_len = c.len_utf8(); + + let pos = BytePos::from_usize(i) + output_offset; + + if char_len > 1 { + assert!(char_len >= 2 && char_len <= 4); + let mbc = MultiByteChar { pos, bytes: char_len as u8 }; + multi_byte_chars.push(mbc); + } + + // Assume control characters are zero width. + // FIXME: How can we decide between `width` and `width_cjk`? + let char_width = UnicodeWidthChar::width(c).unwrap_or(0); + + if char_width != 1 { + non_narrow_chars.push(NonNarrowChar::new(pos, char_width)); + } + } + + i += char_len; + } + + i - scan_len +} diff --git a/compiler/rustc_span/src/analyze_source_file/tests.rs b/compiler/rustc_span/src/analyze_source_file/tests.rs new file mode 100644 index 00000000000..cb418a4bdaf --- /dev/null +++ b/compiler/rustc_span/src/analyze_source_file/tests.rs @@ -0,0 +1,142 @@ +use super::*; + +macro_rules! test { + (case: $test_name:ident, + text: $text:expr, + source_file_start_pos: $source_file_start_pos:expr, + lines: $lines:expr, + multi_byte_chars: $multi_byte_chars:expr, + non_narrow_chars: $non_narrow_chars:expr,) => { + #[test] + fn $test_name() { + let (lines, multi_byte_chars, non_narrow_chars) = + analyze_source_file($text, BytePos($source_file_start_pos)); + + let expected_lines: Vec<BytePos> = $lines.into_iter().map(|pos| BytePos(pos)).collect(); + + assert_eq!(lines, expected_lines); + + let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars + .into_iter() + .map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes }) + .collect(); + + assert_eq!(multi_byte_chars, expected_mbcs); + + let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars + .into_iter() + .map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width)) + .collect(); + + assert_eq!(non_narrow_chars, expected_nncs); + } + }; +} + +test!( + case: empty_text, + text: "", + source_file_start_pos: 0, + lines: vec![], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newlines_short, + text: "a\nc", + source_file_start_pos: 0, + lines: vec![0, 2], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newlines_long, + text: "012345678\nabcdef012345678\na", + source_file_start_pos: 0, + lines: vec![0, 10, 26], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newline_and_multi_byte_char_in_same_chunk, + text: "01234β789\nbcdef0123456789abcdef", + source_file_start_pos: 0, + lines: vec![0, 11], + multi_byte_chars: vec![(5, 2)], + non_narrow_chars: vec![], +); + +test!( + case: newline_and_control_char_in_same_chunk, + text: "01234\u{07}6789\nbcdef0123456789abcdef", + source_file_start_pos: 0, + lines: vec![0, 11], + multi_byte_chars: vec![], + non_narrow_chars: vec![(5, 0)], +); + +test!( + case: multi_byte_char_short, + text: "aβc", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(1, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_long, + text: "0123456789abcΔf012345β", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(13, 2), (22, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_across_chunk_boundary, + text: "0123456789abcdeΔ123456789abcdef01234", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(15, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_across_chunk_boundary_tail, + text: "0123456789abcdeΔ....", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(15, 2)], + non_narrow_chars: vec![], +); + +test!( + case: non_narrow_short, + text: "0\t2", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![], + non_narrow_chars: vec![(1, 4)], +); + +test!( + case: non_narrow_long, + text: "01\t3456789abcdef01234567\u{07}9", + source_file_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![], + non_narrow_chars: vec![(2, 4), (24, 0)], +); + +test!( + case: output_offset_all, + text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf", + source_file_start_pos: 1000, + lines: vec![0 + 1000, 7 + 1000, 27 + 1000], + multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)], + non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)], +); diff --git a/compiler/rustc_span/src/caching_source_map_view.rs b/compiler/rustc_span/src/caching_source_map_view.rs new file mode 100644 index 00000000000..68b0bd1a574 --- /dev/null +++ b/compiler/rustc_span/src/caching_source_map_view.rs @@ -0,0 +1,104 @@ +use crate::source_map::SourceMap; +use crate::{BytePos, SourceFile}; +use rustc_data_structures::sync::Lrc; + +#[derive(Clone)] +struct CacheEntry { + time_stamp: usize, + line_number: usize, + line_start: BytePos, + line_end: BytePos, + file: Lrc<SourceFile>, + file_index: usize, +} + +#[derive(Clone)] +pub struct CachingSourceMapView<'sm> { + source_map: &'sm SourceMap, + line_cache: [CacheEntry; 3], + time_stamp: usize, +} + +impl<'sm> CachingSourceMapView<'sm> { + pub fn new(source_map: &'sm SourceMap) -> CachingSourceMapView<'sm> { + let files = source_map.files(); + let first_file = files[0].clone(); + let entry = CacheEntry { + time_stamp: 0, + line_number: 0, + line_start: BytePos(0), + line_end: BytePos(0), + file: first_file, + file_index: 0, + }; + + CachingSourceMapView { + source_map, + line_cache: [entry.clone(), entry.clone(), entry], + time_stamp: 0, + } + } + + pub fn byte_pos_to_line_and_col( + &mut self, + pos: BytePos, + ) -> Option<(Lrc<SourceFile>, usize, BytePos)> { + self.time_stamp += 1; + + // Check if the position is in one of the cached lines + for cache_entry in self.line_cache.iter_mut() { + if pos >= cache_entry.line_start && pos < cache_entry.line_end { + cache_entry.time_stamp = self.time_stamp; + + return Some(( + cache_entry.file.clone(), + cache_entry.line_number, + pos - cache_entry.line_start, + )); + } + } + + // No cache hit ... + let mut oldest = 0; + for index in 1..self.line_cache.len() { + if self.line_cache[index].time_stamp < self.line_cache[oldest].time_stamp { + oldest = index; + } + } + + let cache_entry = &mut self.line_cache[oldest]; + + // If the entry doesn't point to the correct file, fix it up + if pos < cache_entry.file.start_pos || pos >= cache_entry.file.end_pos { + let file_valid; + if self.source_map.files().len() > 0 { + let file_index = self.source_map.lookup_source_file_idx(pos); + let file = self.source_map.files()[file_index].clone(); + + if pos >= file.start_pos && pos < file.end_pos { + cache_entry.file = file; + cache_entry.file_index = file_index; + file_valid = true; + } else { + file_valid = false; + } + } else { + file_valid = false; + } + + if !file_valid { + return None; + } + } + + let line_index = cache_entry.file.lookup_line(pos).unwrap(); + let line_bounds = cache_entry.file.line_bounds(line_index); + + cache_entry.line_number = line_index + 1; + cache_entry.line_start = line_bounds.0; + cache_entry.line_end = line_bounds.1; + cache_entry.time_stamp = self.time_stamp; + + Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line_start)) + } +} diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs new file mode 100644 index 00000000000..aae778217d3 --- /dev/null +++ b/compiler/rustc_span/src/def_id.rs @@ -0,0 +1,280 @@ +use crate::HashStableContext; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::AtomicRef; +use rustc_index::vec::Idx; +use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::borrow::Borrow; +use std::fmt; + +rustc_index::newtype_index! { + pub struct CrateId { + ENCODABLE = custom + } +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CrateNum { + /// A special `CrateNum` that we use for the `tcx.rcache` when decoding from + /// the incr. comp. cache. + ReservedForIncrCompCache, + Index(CrateId), +} + +/// Item definitions in the currently-compiled crate would have the `CrateNum` +/// `LOCAL_CRATE` in their `DefId`. +pub const LOCAL_CRATE: CrateNum = CrateNum::Index(CrateId::from_u32(0)); + +impl Idx for CrateNum { + #[inline] + fn new(value: usize) -> Self { + CrateNum::Index(Idx::new(value)) + } + + #[inline] + fn index(self) -> usize { + match self { + CrateNum::Index(idx) => Idx::index(idx), + _ => panic!("Tried to get crate index of {:?}", self), + } + } +} + +impl CrateNum { + pub fn new(x: usize) -> CrateNum { + CrateNum::from_usize(x) + } + + pub fn from_usize(x: usize) -> CrateNum { + CrateNum::Index(CrateId::from_usize(x)) + } + + pub fn from_u32(x: u32) -> CrateNum { + CrateNum::Index(CrateId::from_u32(x)) + } + + pub fn as_usize(self) -> usize { + match self { + CrateNum::Index(id) => id.as_usize(), + _ => panic!("tried to get index of non-standard crate {:?}", self), + } + } + + pub fn as_u32(self) -> u32 { + match self { + CrateNum::Index(id) => id.as_u32(), + _ => panic!("tried to get index of non-standard crate {:?}", self), + } + } + + pub fn as_def_id(&self) -> DefId { + DefId { krate: *self, index: CRATE_DEF_INDEX } + } +} + +impl fmt::Display for CrateNum { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CrateNum::Index(id) => fmt::Display::fmt(&id.private, f), + CrateNum::ReservedForIncrCompCache => write!(f, "crate for decoding incr comp cache"), + } + } +} + +/// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx. +/// Therefore, make sure to include the context when encode a `CrateNum`. +impl<E: Encoder> Encodable<E> for CrateNum { + default fn encode(&self, s: &mut E) -> Result<(), E::Error> { + s.emit_u32(self.as_u32()) + } +} + +impl<D: Decoder> Decodable<D> for CrateNum { + default fn decode(d: &mut D) -> Result<CrateNum, D::Error> { + Ok(CrateNum::from_u32(d.read_u32()?)) + } +} + +impl ::std::fmt::Debug for CrateNum { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + match self { + CrateNum::Index(id) => write!(fmt, "crate{}", id.private), + CrateNum::ReservedForIncrCompCache => write!(fmt, "crate for decoding incr comp cache"), + } + } +} + +#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[derive(HashStable_Generic, Encodable, Decodable)] +pub struct DefPathHash(pub Fingerprint); + +impl Borrow<Fingerprint> for DefPathHash { + #[inline] + fn borrow(&self) -> &Fingerprint { + &self.0 + } +} + +rustc_index::newtype_index! { + /// A DefIndex is an index into the hir-map for a crate, identifying a + /// particular definition. It should really be considered an interned + /// shorthand for a particular DefPath. + pub struct DefIndex { + ENCODABLE = custom // (only encodable in metadata) + + DEBUG_FORMAT = "DefIndex({})", + /// The crate root is always assigned index 0 by the AST Map code, + /// thanks to `NodeCollector::new`. + const CRATE_DEF_INDEX = 0, + } +} + +impl<E: Encoder> Encodable<E> for DefIndex { + default fn encode(&self, _: &mut E) -> Result<(), E::Error> { + panic!("cannot encode `DefIndex` with `{}`", std::any::type_name::<E>()); + } +} + +impl<D: Decoder> Decodable<D> for DefIndex { + default fn decode(_: &mut D) -> Result<DefIndex, D::Error> { + panic!("cannot decode `DefIndex` with `{}`", std::any::type_name::<D>()); + } +} + +/// A `DefId` identifies a particular *definition*, by combining a crate +/// index and a def index. +/// +/// You can create a `DefId` from a `LocalDefId` using `local_def_id.to_def_id()`. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Copy)] +pub struct DefId { + pub krate: CrateNum, + pub index: DefIndex, +} + +impl DefId { + /// Makes a local `DefId` from the given `DefIndex`. + #[inline] + pub fn local(index: DefIndex) -> DefId { + DefId { krate: LOCAL_CRATE, index } + } + + #[inline] + pub fn is_local(self) -> bool { + self.krate == LOCAL_CRATE + } + + #[inline] + pub fn as_local(self) -> Option<LocalDefId> { + if self.is_local() { Some(LocalDefId { local_def_index: self.index }) } else { None } + } + + #[inline] + pub fn expect_local(self) -> LocalDefId { + self.as_local().unwrap_or_else(|| panic!("DefId::expect_local: `{:?}` isn't local", self)) + } + + pub fn is_top_level_module(self) -> bool { + self.is_local() && self.index == CRATE_DEF_INDEX + } +} + +impl<E: Encoder> Encodable<E> for DefId { + default fn encode(&self, s: &mut E) -> Result<(), E::Error> { + s.emit_struct("DefId", 2, |s| { + s.emit_struct_field("krate", 0, |s| self.krate.encode(s))?; + + s.emit_struct_field("index", 1, |s| self.index.encode(s)) + }) + } +} + +impl<D: Decoder> Decodable<D> for DefId { + default fn decode(d: &mut D) -> Result<DefId, D::Error> { + d.read_struct("DefId", 2, |d| { + Ok(DefId { + krate: d.read_struct_field("krate", 0, Decodable::decode)?, + index: d.read_struct_field("index", 1, Decodable::decode)?, + }) + }) + } +} + +pub fn default_def_id_debug(def_id: DefId, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DefId").field("krate", &def_id.krate).field("index", &def_id.index).finish() +} + +pub static DEF_ID_DEBUG: AtomicRef<fn(DefId, &mut fmt::Formatter<'_>) -> fmt::Result> = + AtomicRef::new(&(default_def_id_debug as fn(_, &mut fmt::Formatter<'_>) -> _)); + +impl fmt::Debug for DefId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*DEF_ID_DEBUG)(*self, f) + } +} + +rustc_data_structures::define_id_collections!(DefIdMap, DefIdSet, DefId); + +/// A LocalDefId is equivalent to a DefId with `krate == LOCAL_CRATE`. Since +/// we encode this information in the type, we can ensure at compile time that +/// no DefIds from upstream crates get thrown into the mix. There are quite a +/// few cases where we know that only DefIds from the local crate are expected +/// and a DefId from a different crate would signify a bug somewhere. This +/// is when LocalDefId comes in handy. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct LocalDefId { + pub local_def_index: DefIndex, +} + +impl Idx for LocalDefId { + #[inline] + fn new(idx: usize) -> Self { + LocalDefId { local_def_index: Idx::new(idx) } + } + #[inline] + fn index(self) -> usize { + self.local_def_index.index() + } +} + +impl LocalDefId { + #[inline] + pub fn to_def_id(self) -> DefId { + DefId { krate: LOCAL_CRATE, index: self.local_def_index } + } + + #[inline] + pub fn is_top_level_module(self) -> bool { + self.local_def_index == CRATE_DEF_INDEX + } +} + +impl fmt::Debug for LocalDefId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.to_def_id().fmt(f) + } +} + +impl<E: Encoder> Encodable<E> for LocalDefId { + fn encode(&self, s: &mut E) -> Result<(), E::Error> { + self.to_def_id().encode(s) + } +} + +impl<D: Decoder> Decodable<D> for LocalDefId { + fn decode(d: &mut D) -> Result<LocalDefId, D::Error> { + DefId::decode(d).map(|d| d.expect_local()) + } +} + +impl<CTX: HashStableContext> HashStable<CTX> for DefId { + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + hcx.hash_def_id(*self, hasher) + } +} + +impl<CTX: HashStableContext> HashStable<CTX> for CrateNum { + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + hcx.hash_crate_num(*self, hasher) + } +} diff --git a/compiler/rustc_span/src/edition.rs b/compiler/rustc_span/src/edition.rs new file mode 100644 index 00000000000..4d0c92f51d7 --- /dev/null +++ b/compiler/rustc_span/src/edition.rs @@ -0,0 +1,73 @@ +use crate::symbol::{sym, Symbol}; +use std::fmt; +use std::str::FromStr; + +use rustc_macros::HashStable_Generic; + +/// The edition of the compiler (RFC 2052) +#[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)] +#[derive(HashStable_Generic)] +pub enum Edition { + // editions must be kept in order, oldest to newest + /// The 2015 edition + Edition2015, + /// The 2018 edition + Edition2018, + // when adding new editions, be sure to update: + // + // - Update the `ALL_EDITIONS` const + // - Update the EDITION_NAME_LIST const + // - add a `rust_####()` function to the session + // - update the enum in Cargo's sources as well +} + +// must be in order from oldest to newest +pub const ALL_EDITIONS: &[Edition] = &[Edition::Edition2015, Edition::Edition2018]; + +pub const EDITION_NAME_LIST: &str = "2015|2018"; + +pub const DEFAULT_EDITION: Edition = Edition::Edition2015; + +impl fmt::Display for Edition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match *self { + Edition::Edition2015 => "2015", + Edition::Edition2018 => "2018", + }; + write!(f, "{}", s) + } +} + +impl Edition { + pub fn lint_name(&self) -> &'static str { + match *self { + Edition::Edition2015 => "rust_2015_compatibility", + Edition::Edition2018 => "rust_2018_compatibility", + } + } + + pub fn feature_name(&self) -> Symbol { + match *self { + Edition::Edition2015 => sym::rust_2015_preview, + Edition::Edition2018 => sym::rust_2018_preview, + } + } + + pub fn is_stable(&self) -> bool { + match *self { + Edition::Edition2015 => true, + Edition::Edition2018 => true, + } + } +} + +impl FromStr for Edition { + type Err = (); + fn from_str(s: &str) -> Result<Self, ()> { + match s { + "2015" => Ok(Edition::Edition2015), + "2018" => Ok(Edition::Edition2018), + _ => Err(()), + } + } +} diff --git a/compiler/rustc_span/src/fatal_error.rs b/compiler/rustc_span/src/fatal_error.rs new file mode 100644 index 00000000000..718c0ddbc63 --- /dev/null +++ b/compiler/rustc_span/src/fatal_error.rs @@ -0,0 +1,26 @@ +/// Used as a return value to signify a fatal error occurred. (It is also +/// used as the argument to panic at the moment, but that will eventually +/// not be true.) +#[derive(Copy, Clone, Debug)] +#[must_use] +pub struct FatalError; + +pub struct FatalErrorMarker; + +// Don't implement Send on FatalError. This makes it impossible to panic!(FatalError). +// We don't want to invoke the panic handler and print a backtrace for fatal errors. +impl !Send for FatalError {} + +impl FatalError { + pub fn raise(self) -> ! { + std::panic::resume_unwind(Box::new(FatalErrorMarker)) + } +} + +impl std::fmt::Display for FatalError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "parser fatal error") + } +} + +impl std::error::Error for FatalError {} diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs new file mode 100644 index 00000000000..942c6648340 --- /dev/null +++ b/compiler/rustc_span/src/hygiene.rs @@ -0,0 +1,1239 @@ +//! Machinery for hygienic macros. +//! +//! Inspired by Matthew Flatt et al., “Macros That Work Together: Compile-Time Bindings, Partial +//! Expansion, and Definition Contexts,” *Journal of Functional Programming* 22, no. 2 +//! (March 1, 2012): 181–216, <https://doi.org/10.1017/S0956796812000093>. + +// Hygiene data is stored in a global variable and accessed via TLS, which +// means that accesses are somewhat expensive. (`HygieneData::with` +// encapsulates a single access.) Therefore, on hot code paths it is worth +// ensuring that multiple HygieneData accesses are combined into a single +// `HygieneData::with`. +// +// This explains why `HygieneData`, `SyntaxContext` and `ExpnId` have interfaces +// with a certain amount of redundancy in them. For example, +// `SyntaxContext::outer_expn_data` combines `SyntaxContext::outer` and +// `ExpnId::expn_data` so that two `HygieneData` accesses can be performed within +// a single `HygieneData::with` call. +// +// It also explains why many functions appear in `HygieneData` and again in +// `SyntaxContext` or `ExpnId`. For example, `HygieneData::outer` and +// `SyntaxContext::outer` do the same thing, but the former is for use within a +// `HygieneData::with` call while the latter is for use outside such a call. +// When modifying this file it is important to understand this distinction, +// because getting it wrong can lead to nested `HygieneData::with` calls that +// trigger runtime aborts. (Fortunately these are obvious and easy to fix.) + +use crate::edition::Edition; +use crate::symbol::{kw, sym, Symbol}; +use crate::SESSION_GLOBALS; +use crate::{Span, DUMMY_SP}; + +use crate::def_id::{CrateNum, DefId, CRATE_DEF_INDEX, LOCAL_CRATE}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::sync::{Lock, Lrc}; +use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::fmt; +use tracing::*; + +/// A `SyntaxContext` represents a chain of pairs `(ExpnId, Transparency)` named "marks". +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SyntaxContext(u32); + +#[derive(Debug, Encodable, Decodable, Clone)] +pub struct SyntaxContextData { + outer_expn: ExpnId, + outer_transparency: Transparency, + parent: SyntaxContext, + /// This context, but with all transparent and semi-transparent expansions filtered away. + opaque: SyntaxContext, + /// This context, but with all transparent expansions filtered away. + opaque_and_semitransparent: SyntaxContext, + /// Name of the crate to which `$crate` with this context would resolve. + dollar_crate_name: Symbol, +} + +/// A unique ID associated with a macro invocation and expansion. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct ExpnId(u32); + +/// A property of a macro expansion that determines how identifiers +/// produced by that expansion are resolved. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Encodable, Decodable)] +#[derive(HashStable_Generic)] +pub enum Transparency { + /// Identifier produced by a transparent expansion is always resolved at call-site. + /// Call-site spans in procedural macros, hygiene opt-out in `macro` should use this. + Transparent, + /// Identifier produced by a semi-transparent expansion may be resolved + /// either at call-site or at definition-site. + /// If it's a local variable, label or `$crate` then it's resolved at def-site. + /// Otherwise it's resolved at call-site. + /// `macro_rules` macros behave like this, built-in macros currently behave like this too, + /// but that's an implementation detail. + SemiTransparent, + /// Identifier produced by an opaque expansion is always resolved at definition-site. + /// Def-site spans in procedural macros, identifiers from `macro` by default use this. + Opaque, +} + +impl ExpnId { + pub fn fresh(expn_data: Option<ExpnData>) -> Self { + HygieneData::with(|data| data.fresh_expn(expn_data)) + } + + /// The ID of the theoretical expansion that generates freshly parsed, unexpanded AST. + #[inline] + pub fn root() -> Self { + ExpnId(0) + } + + #[inline] + pub fn as_u32(self) -> u32 { + self.0 + } + + #[inline] + pub fn from_u32(raw: u32) -> ExpnId { + ExpnId(raw) + } + + #[inline] + pub fn expn_data(self) -> ExpnData { + HygieneData::with(|data| data.expn_data(self).clone()) + } + + #[inline] + pub fn set_expn_data(self, mut expn_data: ExpnData) { + HygieneData::with(|data| { + let old_expn_data = &mut data.expn_data[self.0 as usize]; + assert!(old_expn_data.is_none(), "expansion data is reset for an expansion ID"); + expn_data.orig_id.replace(self.as_u32()).expect_none("orig_id should be None"); + *old_expn_data = Some(expn_data); + }) + } + + pub fn is_descendant_of(self, ancestor: ExpnId) -> bool { + HygieneData::with(|data| data.is_descendant_of(self, ancestor)) + } + + /// `expn_id.outer_expn_is_descendant_of(ctxt)` is equivalent to but faster than + /// `expn_id.is_descendant_of(ctxt.outer_expn())`. + pub fn outer_expn_is_descendant_of(self, ctxt: SyntaxContext) -> bool { + HygieneData::with(|data| data.is_descendant_of(self, data.outer_expn(ctxt))) + } + + /// Returns span for the macro which originally caused this expansion to happen. + /// + /// Stops backtracing at include! boundary. + pub fn expansion_cause(mut self) -> Option<Span> { + let mut last_macro = None; + loop { + let expn_data = self.expn_data(); + // Stop going up the backtrace once include! is encountered + if expn_data.is_root() + || expn_data.kind == ExpnKind::Macro(MacroKind::Bang, sym::include) + { + break; + } + self = expn_data.call_site.ctxt().outer_expn(); + last_macro = Some(expn_data.call_site); + } + last_macro + } +} + +#[derive(Debug)] +pub struct HygieneData { + /// Each expansion should have an associated expansion data, but sometimes there's a delay + /// between creation of an expansion ID and obtaining its data (e.g. macros are collected + /// first and then resolved later), so we use an `Option` here. + expn_data: Vec<Option<ExpnData>>, + syntax_context_data: Vec<SyntaxContextData>, + syntax_context_map: FxHashMap<(SyntaxContext, ExpnId, Transparency), SyntaxContext>, +} + +impl HygieneData { + crate fn new(edition: Edition) -> Self { + let mut root_data = ExpnData::default( + ExpnKind::Root, + DUMMY_SP, + edition, + Some(DefId::local(CRATE_DEF_INDEX)), + ); + root_data.orig_id = Some(0); + + HygieneData { + expn_data: vec![Some(root_data)], + syntax_context_data: vec![SyntaxContextData { + outer_expn: ExpnId::root(), + outer_transparency: Transparency::Opaque, + parent: SyntaxContext(0), + opaque: SyntaxContext(0), + opaque_and_semitransparent: SyntaxContext(0), + dollar_crate_name: kw::DollarCrate, + }], + syntax_context_map: FxHashMap::default(), + } + } + + pub fn with<T, F: FnOnce(&mut HygieneData) -> T>(f: F) -> T { + SESSION_GLOBALS.with(|session_globals| f(&mut *session_globals.hygiene_data.borrow_mut())) + } + + fn fresh_expn(&mut self, mut expn_data: Option<ExpnData>) -> ExpnId { + let raw_id = self.expn_data.len() as u32; + if let Some(data) = expn_data.as_mut() { + data.orig_id.replace(raw_id).expect_none("orig_id should be None"); + } + self.expn_data.push(expn_data); + ExpnId(raw_id) + } + + fn expn_data(&self, expn_id: ExpnId) -> &ExpnData { + self.expn_data[expn_id.0 as usize].as_ref().expect("no expansion data for an expansion ID") + } + + fn is_descendant_of(&self, mut expn_id: ExpnId, ancestor: ExpnId) -> bool { + while expn_id != ancestor { + if expn_id == ExpnId::root() { + return false; + } + expn_id = self.expn_data(expn_id).parent; + } + true + } + + fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext { + self.syntax_context_data[ctxt.0 as usize].opaque + } + + fn normalize_to_macro_rules(&self, ctxt: SyntaxContext) -> SyntaxContext { + self.syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent + } + + fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId { + self.syntax_context_data[ctxt.0 as usize].outer_expn + } + + fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) { + let data = &self.syntax_context_data[ctxt.0 as usize]; + (data.outer_expn, data.outer_transparency) + } + + fn parent_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContext { + self.syntax_context_data[ctxt.0 as usize].parent + } + + fn remove_mark(&self, ctxt: &mut SyntaxContext) -> (ExpnId, Transparency) { + let outer_mark = self.outer_mark(*ctxt); + *ctxt = self.parent_ctxt(*ctxt); + outer_mark + } + + fn marks(&self, mut ctxt: SyntaxContext) -> Vec<(ExpnId, Transparency)> { + let mut marks = Vec::new(); + while ctxt != SyntaxContext::root() { + debug!("marks: getting parent of {:?}", ctxt); + marks.push(self.outer_mark(ctxt)); + ctxt = self.parent_ctxt(ctxt); + } + marks.reverse(); + marks + } + + fn walk_chain(&self, mut span: Span, to: SyntaxContext) -> Span { + debug!("walk_chain({:?}, {:?})", span, to); + debug!("walk_chain: span ctxt = {:?}", span.ctxt()); + while span.from_expansion() && span.ctxt() != to { + let outer_expn = self.outer_expn(span.ctxt()); + debug!("walk_chain({:?}): outer_expn={:?}", span, outer_expn); + let expn_data = self.expn_data(outer_expn); + debug!("walk_chain({:?}): expn_data={:?}", span, expn_data); + span = expn_data.call_site; + } + span + } + + fn adjust(&self, ctxt: &mut SyntaxContext, expn_id: ExpnId) -> Option<ExpnId> { + let mut scope = None; + while !self.is_descendant_of(expn_id, self.outer_expn(*ctxt)) { + scope = Some(self.remove_mark(ctxt).0); + } + scope + } + + fn apply_mark( + &mut self, + ctxt: SyntaxContext, + expn_id: ExpnId, + transparency: Transparency, + ) -> SyntaxContext { + assert_ne!(expn_id, ExpnId::root()); + if transparency == Transparency::Opaque { + return self.apply_mark_internal(ctxt, expn_id, transparency); + } + + let call_site_ctxt = self.expn_data(expn_id).call_site.ctxt(); + let mut call_site_ctxt = if transparency == Transparency::SemiTransparent { + self.normalize_to_macros_2_0(call_site_ctxt) + } else { + self.normalize_to_macro_rules(call_site_ctxt) + }; + + if call_site_ctxt == SyntaxContext::root() { + return self.apply_mark_internal(ctxt, expn_id, transparency); + } + + // Otherwise, `expn_id` is a macros 1.0 definition and the call site is in a + // macros 2.0 expansion, i.e., a macros 1.0 invocation is in a macros 2.0 definition. + // + // In this case, the tokens from the macros 1.0 definition inherit the hygiene + // at their invocation. That is, we pretend that the macros 1.0 definition + // was defined at its invocation (i.e., inside the macros 2.0 definition) + // so that the macros 2.0 definition remains hygienic. + // + // See the example at `test/ui/hygiene/legacy_interaction.rs`. + for (expn_id, transparency) in self.marks(ctxt) { + call_site_ctxt = self.apply_mark_internal(call_site_ctxt, expn_id, transparency); + } + self.apply_mark_internal(call_site_ctxt, expn_id, transparency) + } + + fn apply_mark_internal( + &mut self, + ctxt: SyntaxContext, + expn_id: ExpnId, + transparency: Transparency, + ) -> SyntaxContext { + let syntax_context_data = &mut self.syntax_context_data; + let mut opaque = syntax_context_data[ctxt.0 as usize].opaque; + let mut opaque_and_semitransparent = + syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent; + + if transparency >= Transparency::Opaque { + let parent = opaque; + opaque = *self + .syntax_context_map + .entry((parent, expn_id, transparency)) + .or_insert_with(|| { + let new_opaque = SyntaxContext(syntax_context_data.len() as u32); + syntax_context_data.push(SyntaxContextData { + outer_expn: expn_id, + outer_transparency: transparency, + parent, + opaque: new_opaque, + opaque_and_semitransparent: new_opaque, + dollar_crate_name: kw::DollarCrate, + }); + new_opaque + }); + } + + if transparency >= Transparency::SemiTransparent { + let parent = opaque_and_semitransparent; + opaque_and_semitransparent = *self + .syntax_context_map + .entry((parent, expn_id, transparency)) + .or_insert_with(|| { + let new_opaque_and_semitransparent = + SyntaxContext(syntax_context_data.len() as u32); + syntax_context_data.push(SyntaxContextData { + outer_expn: expn_id, + outer_transparency: transparency, + parent, + opaque, + opaque_and_semitransparent: new_opaque_and_semitransparent, + dollar_crate_name: kw::DollarCrate, + }); + new_opaque_and_semitransparent + }); + } + + let parent = ctxt; + *self.syntax_context_map.entry((parent, expn_id, transparency)).or_insert_with(|| { + let new_opaque_and_semitransparent_and_transparent = + SyntaxContext(syntax_context_data.len() as u32); + syntax_context_data.push(SyntaxContextData { + outer_expn: expn_id, + outer_transparency: transparency, + parent, + opaque, + opaque_and_semitransparent, + dollar_crate_name: kw::DollarCrate, + }); + new_opaque_and_semitransparent_and_transparent + }) + } +} + +pub fn clear_syntax_context_map() { + HygieneData::with(|data| data.syntax_context_map = FxHashMap::default()); +} + +pub fn walk_chain(span: Span, to: SyntaxContext) -> Span { + HygieneData::with(|data| data.walk_chain(span, to)) +} + +pub fn update_dollar_crate_names(mut get_name: impl FnMut(SyntaxContext) -> Symbol) { + // The new contexts that need updating are at the end of the list and have `$crate` as a name. + let (len, to_update) = HygieneData::with(|data| { + ( + data.syntax_context_data.len(), + data.syntax_context_data + .iter() + .rev() + .take_while(|scdata| scdata.dollar_crate_name == kw::DollarCrate) + .count(), + ) + }); + // The callback must be called from outside of the `HygieneData` lock, + // since it will try to acquire it too. + let range_to_update = len - to_update..len; + let names: Vec<_> = + range_to_update.clone().map(|idx| get_name(SyntaxContext::from_u32(idx as u32))).collect(); + HygieneData::with(|data| { + range_to_update.zip(names.into_iter()).for_each(|(idx, name)| { + data.syntax_context_data[idx].dollar_crate_name = name; + }) + }) +} + +pub fn debug_hygiene_data(verbose: bool) -> String { + HygieneData::with(|data| { + if verbose { + format!("{:#?}", data) + } else { + let mut s = String::from(""); + s.push_str("Expansions:"); + data.expn_data.iter().enumerate().for_each(|(id, expn_info)| { + let expn_info = expn_info.as_ref().expect("no expansion data for an expansion ID"); + s.push_str(&format!( + "\n{}: parent: {:?}, call_site_ctxt: {:?}, def_site_ctxt: {:?}, kind: {:?}", + id, + expn_info.parent, + expn_info.call_site.ctxt(), + expn_info.def_site.ctxt(), + expn_info.kind, + )); + }); + s.push_str("\n\nSyntaxContexts:"); + data.syntax_context_data.iter().enumerate().for_each(|(id, ctxt)| { + s.push_str(&format!( + "\n#{}: parent: {:?}, outer_mark: ({:?}, {:?})", + id, ctxt.parent, ctxt.outer_expn, ctxt.outer_transparency, + )); + }); + s + } + }) +} + +impl SyntaxContext { + #[inline] + pub const fn root() -> Self { + SyntaxContext(0) + } + + #[inline] + crate fn as_u32(self) -> u32 { + self.0 + } + + #[inline] + crate fn from_u32(raw: u32) -> SyntaxContext { + SyntaxContext(raw) + } + + /// Extend a syntax context with a given expansion and transparency. + crate fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> SyntaxContext { + HygieneData::with(|data| data.apply_mark(self, expn_id, transparency)) + } + + /// Pulls a single mark off of the syntax context. This effectively moves the + /// context up one macro definition level. That is, if we have a nested macro + /// definition as follows: + /// + /// ```rust + /// macro_rules! f { + /// macro_rules! g { + /// ... + /// } + /// } + /// ``` + /// + /// and we have a SyntaxContext that is referring to something declared by an invocation + /// of g (call it g1), calling remove_mark will result in the SyntaxContext for the + /// invocation of f that created g1. + /// Returns the mark that was removed. + pub fn remove_mark(&mut self) -> ExpnId { + HygieneData::with(|data| data.remove_mark(self).0) + } + + pub fn marks(self) -> Vec<(ExpnId, Transparency)> { + HygieneData::with(|data| data.marks(self)) + } + + /// Adjust this context for resolution in a scope created by the given expansion. + /// For example, consider the following three resolutions of `f`: + /// + /// ```rust + /// mod foo { pub fn f() {} } // `f`'s `SyntaxContext` is empty. + /// m!(f); + /// macro m($f:ident) { + /// mod bar { + /// pub fn f() {} // `f`'s `SyntaxContext` has a single `ExpnId` from `m`. + /// pub fn $f() {} // `$f`'s `SyntaxContext` is empty. + /// } + /// foo::f(); // `f`'s `SyntaxContext` has a single `ExpnId` from `m` + /// //^ Since `mod foo` is outside this expansion, `adjust` removes the mark from `f`, + /// //| and it resolves to `::foo::f`. + /// bar::f(); // `f`'s `SyntaxContext` has a single `ExpnId` from `m` + /// //^ Since `mod bar` not outside this expansion, `adjust` does not change `f`, + /// //| and it resolves to `::bar::f`. + /// bar::$f(); // `f`'s `SyntaxContext` is empty. + /// //^ Since `mod bar` is not outside this expansion, `adjust` does not change `$f`, + /// //| and it resolves to `::bar::$f`. + /// } + /// ``` + /// This returns the expansion whose definition scope we use to privacy check the resolution, + /// or `None` if we privacy check as usual (i.e., not w.r.t. a macro definition scope). + pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { + HygieneData::with(|data| data.adjust(self, expn_id)) + } + + /// Like `SyntaxContext::adjust`, but also normalizes `self` to macros 2.0. + pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { + HygieneData::with(|data| { + *self = data.normalize_to_macros_2_0(*self); + data.adjust(self, expn_id) + }) + } + + /// Adjust this context for resolution in a scope created by the given expansion + /// via a glob import with the given `SyntaxContext`. + /// For example: + /// + /// ```rust + /// m!(f); + /// macro m($i:ident) { + /// mod foo { + /// pub fn f() {} // `f`'s `SyntaxContext` has a single `ExpnId` from `m`. + /// pub fn $i() {} // `$i`'s `SyntaxContext` is empty. + /// } + /// n(f); + /// macro n($j:ident) { + /// use foo::*; + /// f(); // `f`'s `SyntaxContext` has a mark from `m` and a mark from `n` + /// //^ `glob_adjust` removes the mark from `n`, so this resolves to `foo::f`. + /// $i(); // `$i`'s `SyntaxContext` has a mark from `n` + /// //^ `glob_adjust` removes the mark from `n`, so this resolves to `foo::$i`. + /// $j(); // `$j`'s `SyntaxContext` has a mark from `m` + /// //^ This cannot be glob-adjusted, so this is a resolution error. + /// } + /// } + /// ``` + /// This returns `None` if the context cannot be glob-adjusted. + /// Otherwise, it returns the scope to use when privacy checking (see `adjust` for details). + pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> { + HygieneData::with(|data| { + let mut scope = None; + let mut glob_ctxt = data.normalize_to_macros_2_0(glob_span.ctxt()); + while !data.is_descendant_of(expn_id, data.outer_expn(glob_ctxt)) { + scope = Some(data.remove_mark(&mut glob_ctxt).0); + if data.remove_mark(self).0 != scope.unwrap() { + return None; + } + } + if data.adjust(self, expn_id).is_some() { + return None; + } + Some(scope) + }) + } + + /// Undo `glob_adjust` if possible: + /// + /// ```rust + /// if let Some(privacy_checking_scope) = self.reverse_glob_adjust(expansion, glob_ctxt) { + /// assert!(self.glob_adjust(expansion, glob_ctxt) == Some(privacy_checking_scope)); + /// } + /// ``` + pub fn reverse_glob_adjust( + &mut self, + expn_id: ExpnId, + glob_span: Span, + ) -> Option<Option<ExpnId>> { + HygieneData::with(|data| { + if data.adjust(self, expn_id).is_some() { + return None; + } + + let mut glob_ctxt = data.normalize_to_macros_2_0(glob_span.ctxt()); + let mut marks = Vec::new(); + while !data.is_descendant_of(expn_id, data.outer_expn(glob_ctxt)) { + marks.push(data.remove_mark(&mut glob_ctxt)); + } + + let scope = marks.last().map(|mark| mark.0); + while let Some((expn_id, transparency)) = marks.pop() { + *self = data.apply_mark(*self, expn_id, transparency); + } + Some(scope) + }) + } + + pub fn hygienic_eq(self, other: SyntaxContext, expn_id: ExpnId) -> bool { + HygieneData::with(|data| { + let mut self_normalized = data.normalize_to_macros_2_0(self); + data.adjust(&mut self_normalized, expn_id); + self_normalized == data.normalize_to_macros_2_0(other) + }) + } + + #[inline] + pub fn normalize_to_macros_2_0(self) -> SyntaxContext { + HygieneData::with(|data| data.normalize_to_macros_2_0(self)) + } + + #[inline] + pub fn normalize_to_macro_rules(self) -> SyntaxContext { + HygieneData::with(|data| data.normalize_to_macro_rules(self)) + } + + #[inline] + pub fn outer_expn(self) -> ExpnId { + HygieneData::with(|data| data.outer_expn(self)) + } + + /// `ctxt.outer_expn_data()` is equivalent to but faster than + /// `ctxt.outer_expn().expn_data()`. + #[inline] + pub fn outer_expn_data(self) -> ExpnData { + HygieneData::with(|data| data.expn_data(data.outer_expn(self)).clone()) + } + + #[inline] + pub fn outer_mark(self) -> (ExpnId, Transparency) { + HygieneData::with(|data| data.outer_mark(self)) + } + + #[inline] + pub fn outer_mark_with_data(self) -> (ExpnId, Transparency, ExpnData) { + HygieneData::with(|data| { + let (expn_id, transparency) = data.outer_mark(self); + (expn_id, transparency, data.expn_data(expn_id).clone()) + }) + } + + pub fn dollar_crate_name(self) -> Symbol { + HygieneData::with(|data| data.syntax_context_data[self.0 as usize].dollar_crate_name) + } +} + +impl fmt::Debug for SyntaxContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "#{}", self.0) + } +} + +impl Span { + /// Creates a fresh expansion with given properties. + /// Expansions are normally created by macros, but in some cases expansions are created for + /// other compiler-generated code to set per-span properties like allowed unstable features. + /// The returned span belongs to the created expansion and has the new properties, + /// but its location is inherited from the current span. + pub fn fresh_expansion(self, expn_data: ExpnData) -> Span { + self.fresh_expansion_with_transparency(expn_data, Transparency::Transparent) + } + + pub fn fresh_expansion_with_transparency( + self, + expn_data: ExpnData, + transparency: Transparency, + ) -> Span { + HygieneData::with(|data| { + let expn_id = data.fresh_expn(Some(expn_data)); + self.with_ctxt(data.apply_mark(SyntaxContext::root(), expn_id, transparency)) + }) + } +} + +/// A subset of properties from both macro definition and macro call available through global data. +/// Avoid using this if you have access to the original definition or call structures. +#[derive(Clone, Debug, Encodable, Decodable, HashStable_Generic)] +pub struct ExpnData { + // --- The part unique to each expansion. + /// The kind of this expansion - macro or compiler desugaring. + pub kind: ExpnKind, + /// The expansion that produced this expansion. + pub parent: ExpnId, + /// The location of the actual macro invocation or syntax sugar , e.g. + /// `let x = foo!();` or `if let Some(y) = x {}` + /// + /// This may recursively refer to other macro invocations, e.g., if + /// `foo!()` invoked `bar!()` internally, and there was an + /// expression inside `bar!`; the call_site of the expression in + /// the expansion would point to the `bar!` invocation; that + /// call_site span would have its own ExpnData, with the call_site + /// pointing to the `foo!` invocation. + pub call_site: Span, + + // --- The part specific to the macro/desugaring definition. + // --- It may be reasonable to share this part between expansions with the same definition, + // --- but such sharing is known to bring some minor inconveniences without also bringing + // --- noticeable perf improvements (PR #62898). + /// The span of the macro definition (possibly dummy). + /// This span serves only informational purpose and is not used for resolution. + pub def_site: Span, + /// List of `#[unstable]`/feature-gated features that the macro is allowed to use + /// internally without forcing the whole crate to opt-in + /// to them. + pub allow_internal_unstable: Option<Lrc<[Symbol]>>, + /// Whether the macro is allowed to use `unsafe` internally + /// even if the user crate has `#![forbid(unsafe_code)]`. + pub allow_internal_unsafe: bool, + /// Enables the macro helper hack (`ident!(...)` -> `$crate::ident!(...)`) + /// for a given macro. + pub local_inner_macros: bool, + /// Edition of the crate in which the macro is defined. + pub edition: Edition, + /// The `DefId` of the macro being invoked, + /// if this `ExpnData` corresponds to a macro invocation + pub macro_def_id: Option<DefId>, + /// The crate that originally created this `ExpnData. During + /// metadata serialization, we only encode `ExpnData`s that were + /// created locally - when our serialized metadata is decoded, + /// foreign `ExpnId`s will have their `ExpnData` looked up + /// from the crate specified by `Crate + pub krate: CrateNum, + /// The raw that this `ExpnData` had in its original crate. + /// An `ExpnData` can be created before being assigned an `ExpnId`, + /// so this might be `None` until `set_expn_data` is called + // This is used only for serialization/deserialization purposes: + // two `ExpnData`s that differ only in their `orig_id` should + // be considered equivalent. + #[stable_hasher(ignore)] + pub orig_id: Option<u32>, +} + +// This would require special handling of `orig_id` and `parent` +impl !PartialEq for ExpnData {} + +impl ExpnData { + /// Constructs expansion data with default properties. + pub fn default( + kind: ExpnKind, + call_site: Span, + edition: Edition, + macro_def_id: Option<DefId>, + ) -> ExpnData { + ExpnData { + kind, + parent: ExpnId::root(), + call_site, + def_site: DUMMY_SP, + allow_internal_unstable: None, + allow_internal_unsafe: false, + local_inner_macros: false, + edition, + macro_def_id, + krate: LOCAL_CRATE, + orig_id: None, + } + } + + pub fn allow_unstable( + kind: ExpnKind, + call_site: Span, + edition: Edition, + allow_internal_unstable: Lrc<[Symbol]>, + macro_def_id: Option<DefId>, + ) -> ExpnData { + ExpnData { + allow_internal_unstable: Some(allow_internal_unstable), + ..ExpnData::default(kind, call_site, edition, macro_def_id) + } + } + + #[inline] + pub fn is_root(&self) -> bool { + if let ExpnKind::Root = self.kind { true } else { false } + } +} + +/// Expansion kind. +#[derive(Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)] +pub enum ExpnKind { + /// No expansion, aka root expansion. Only `ExpnId::root()` has this kind. + Root, + /// Expansion produced by a macro. + Macro(MacroKind, Symbol), + /// Transform done by the compiler on the AST. + AstPass(AstPass), + /// Desugaring done by the compiler during HIR lowering. + Desugaring(DesugaringKind), +} + +impl ExpnKind { + pub fn descr(&self) -> String { + match *self { + ExpnKind::Root => kw::PathRoot.to_string(), + ExpnKind::Macro(macro_kind, name) => match macro_kind { + MacroKind::Bang => format!("{}!", name), + MacroKind::Attr => format!("#[{}]", name), + MacroKind::Derive => format!("#[derive({})]", name), + }, + ExpnKind::AstPass(kind) => kind.descr().to_string(), + ExpnKind::Desugaring(kind) => format!("desugaring of {}", kind.descr()), + } + } +} + +/// The kind of macro invocation or definition. +#[derive(Clone, Copy, PartialEq, Eq, Encodable, Decodable, Hash, Debug)] +#[derive(HashStable_Generic)] +pub enum MacroKind { + /// A bang macro `foo!()`. + Bang, + /// An attribute macro `#[foo]`. + Attr, + /// A derive macro `#[derive(Foo)]` + Derive, +} + +impl MacroKind { + pub fn descr(self) -> &'static str { + match self { + MacroKind::Bang => "macro", + MacroKind::Attr => "attribute macro", + MacroKind::Derive => "derive macro", + } + } + + pub fn descr_expected(self) -> &'static str { + match self { + MacroKind::Attr => "attribute", + _ => self.descr(), + } + } + + pub fn article(self) -> &'static str { + match self { + MacroKind::Attr => "an", + _ => "a", + } + } +} + +/// The kind of AST transform. +#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)] +pub enum AstPass { + StdImports, + TestHarness, + ProcMacroHarness, +} + +impl AstPass { + fn descr(self) -> &'static str { + match self { + AstPass::StdImports => "standard library imports", + AstPass::TestHarness => "test harness", + AstPass::ProcMacroHarness => "proc macro harness", + } + } +} + +/// The kind of compiler desugaring. +#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable, HashStable_Generic)] +pub enum DesugaringKind { + /// We desugar `if c { i } else { e }` to `match $ExprKind::Use(c) { true => i, _ => e }`. + /// However, we do not want to blame `c` for unreachability but rather say that `i` + /// is unreachable. This desugaring kind allows us to avoid blaming `c`. + /// This also applies to `while` loops. + CondTemporary, + QuestionMark, + TryBlock, + /// Desugaring of an `impl Trait` in return type position + /// to an `type Foo = impl Trait;` and replacing the + /// `impl Trait` with `Foo`. + OpaqueTy, + Async, + Await, + ForLoop(ForLoopLoc), +} + +/// A location in the desugaring of a `for` loop +#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable, HashStable_Generic)] +pub enum ForLoopLoc { + Head, + IntoIter, +} + +impl DesugaringKind { + /// The description wording should combine well with "desugaring of {}". + fn descr(self) -> &'static str { + match self { + DesugaringKind::CondTemporary => "`if` or `while` condition", + DesugaringKind::Async => "`async` block or function", + DesugaringKind::Await => "`await` expression", + DesugaringKind::QuestionMark => "operator `?`", + DesugaringKind::TryBlock => "`try` block", + DesugaringKind::OpaqueTy => "`impl Trait`", + DesugaringKind::ForLoop(_) => "`for` loop", + } + } +} + +#[derive(Default)] +pub struct HygieneEncodeContext { + /// All `SyntaxContexts` for which we have written `SyntaxContextData` into crate metadata. + /// This is `None` after we finish encoding `SyntaxContexts`, to ensure + /// that we don't accidentally try to encode any more `SyntaxContexts` + serialized_ctxts: Lock<FxHashSet<SyntaxContext>>, + /// The `SyntaxContexts` that we have serialized (e.g. as a result of encoding `Spans`) + /// in the most recent 'round' of serializnig. Serializing `SyntaxContextData` + /// may cause us to serialize more `SyntaxContext`s, so serialize in a loop + /// until we reach a fixed point. + latest_ctxts: Lock<FxHashSet<SyntaxContext>>, + + serialized_expns: Lock<FxHashSet<ExpnId>>, + + latest_expns: Lock<FxHashSet<ExpnId>>, +} + +impl HygieneEncodeContext { + pub fn encode< + T, + R, + F: FnMut(&mut T, u32, &SyntaxContextData) -> Result<(), R>, + G: FnMut(&mut T, u32, &ExpnData) -> Result<(), R>, + >( + &self, + encoder: &mut T, + mut encode_ctxt: F, + mut encode_expn: G, + ) -> Result<(), R> { + // When we serialize a `SyntaxContextData`, we may end up serializing + // a `SyntaxContext` that we haven't seen before + while !self.latest_ctxts.lock().is_empty() || !self.latest_expns.lock().is_empty() { + debug!( + "encode_hygiene: Serializing a round of {:?} SyntaxContextDatas: {:?}", + self.latest_ctxts.lock().len(), + self.latest_ctxts + ); + + // Consume the current round of SyntaxContexts. + // Drop the lock() temporary early + let latest_ctxts = { std::mem::take(&mut *self.latest_ctxts.lock()) }; + + // It's fine to iterate over a HashMap, because the serialization + // of the table that we insert data into doesn't depend on insertion + // order + for_all_ctxts_in(latest_ctxts.into_iter(), |(index, ctxt, data)| { + if self.serialized_ctxts.lock().insert(ctxt) { + encode_ctxt(encoder, index, data)?; + } + Ok(()) + })?; + + let latest_expns = { std::mem::take(&mut *self.latest_expns.lock()) }; + + for_all_expns_in(latest_expns.into_iter(), |index, expn, data| { + if self.serialized_expns.lock().insert(expn) { + encode_expn(encoder, index, data)?; + } + Ok(()) + })?; + } + debug!("encode_hygiene: Done serializing SyntaxContextData"); + Ok(()) + } +} + +#[derive(Default)] +/// Additional information used to assist in decoding hygiene data +pub struct HygieneDecodeContext { + // Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current + // global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create + // a new id in the global `HygieneData`. This map tracks the ID we end up picking, + // so that multiple occurrences of the same serialized id are decoded to the same + // `SyntaxContext` + remapped_ctxts: Lock<Vec<Option<SyntaxContext>>>, + // The same as `remapepd_ctxts`, but for `ExpnId`s + remapped_expns: Lock<Vec<Option<ExpnId>>>, +} + +pub fn decode_expn_id< + 'a, + D: Decoder, + F: FnOnce(&mut D, u32) -> Result<ExpnData, D::Error>, + G: FnOnce(CrateNum) -> &'a HygieneDecodeContext, +>( + d: &mut D, + mode: ExpnDataDecodeMode<'a, G>, + decode_data: F, +) -> Result<ExpnId, D::Error> { + let index = u32::decode(d)?; + let context = match mode { + ExpnDataDecodeMode::IncrComp(context) => context, + ExpnDataDecodeMode::Metadata(get_context) => { + let krate = CrateNum::decode(d)?; + get_context(krate) + } + }; + + // Do this after decoding, so that we decode a `CrateNum` + // if necessary + if index == ExpnId::root().as_u32() { + debug!("decode_expn_id: deserialized root"); + return Ok(ExpnId::root()); + } + + let outer_expns = &context.remapped_expns; + + // Ensure that the lock() temporary is dropped early + { + if let Some(expn_id) = outer_expns.lock().get(index as usize).copied().flatten() { + return Ok(expn_id); + } + } + + // Don't decode the data inside `HygieneData::with`, since we need to recursively decode + // other ExpnIds + let mut expn_data = decode_data(d, index)?; + + let expn_id = HygieneData::with(|hygiene_data| { + let expn_id = ExpnId(hygiene_data.expn_data.len() as u32); + + // If we just deserialized an `ExpnData` owned by + // the local crate, its `orig_id` will be stale, + // so we need to update it to its own value. + // This only happens when we deserialize the incremental cache, + // since a crate will never decode its own metadata. + if expn_data.krate == LOCAL_CRATE { + expn_data.orig_id = Some(expn_id.0); + } + + hygiene_data.expn_data.push(Some(expn_data)); + + let mut expns = outer_expns.lock(); + let new_len = index as usize + 1; + if expns.len() < new_len { + expns.resize(new_len, None); + } + expns[index as usize] = Some(expn_id); + drop(expns); + expn_id + }); + Ok(expn_id) +} + +// Decodes `SyntaxContext`, using the provided `HygieneDecodeContext` +// to track which `SyntaxContext`s we have already decoded. +// The provided closure will be invoked to deserialize a `SyntaxContextData` +// if we haven't already seen the id of the `SyntaxContext` we are deserializing. +pub fn decode_syntax_context< + D: Decoder, + F: FnOnce(&mut D, u32) -> Result<SyntaxContextData, D::Error>, +>( + d: &mut D, + context: &HygieneDecodeContext, + decode_data: F, +) -> Result<SyntaxContext, D::Error> { + let raw_id: u32 = Decodable::decode(d)?; + if raw_id == 0 { + debug!("decode_syntax_context: deserialized root"); + // The root is special + return Ok(SyntaxContext::root()); + } + + let outer_ctxts = &context.remapped_ctxts; + + // Ensure that the lock() temporary is dropped early + { + if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() { + return Ok(ctxt); + } + } + + // Allocate and store SyntaxContext id *before* calling the decoder function, + // as the SyntaxContextData may reference itself. + let new_ctxt = HygieneData::with(|hygiene_data| { + let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32); + // Push a dummy SyntaxContextData to ensure that nobody else can get the + // same ID as us. This will be overwritten after call `decode_Data` + hygiene_data.syntax_context_data.push(SyntaxContextData { + outer_expn: ExpnId::root(), + outer_transparency: Transparency::Transparent, + parent: SyntaxContext::root(), + opaque: SyntaxContext::root(), + opaque_and_semitransparent: SyntaxContext::root(), + dollar_crate_name: kw::Invalid, + }); + let mut ctxts = outer_ctxts.lock(); + let new_len = raw_id as usize + 1; + if ctxts.len() < new_len { + ctxts.resize(new_len, None); + } + ctxts[raw_id as usize] = Some(new_ctxt); + drop(ctxts); + new_ctxt + }); + + // Don't try to decode data while holding the lock, since we need to + // be able to recursively decode a SyntaxContext + let mut ctxt_data = decode_data(d, raw_id)?; + // Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names` + // We don't care what the encoding crate set this to - we want to resolve it + // from the perspective of the current compilation session + ctxt_data.dollar_crate_name = kw::DollarCrate; + + // Overwrite the dummy data with our decoded SyntaxContextData + HygieneData::with(|hygiene_data| { + let dummy = std::mem::replace( + &mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize], + ctxt_data, + ); + // Make sure nothing weird happening while `decode_data` was running + assert_eq!(dummy.dollar_crate_name, kw::Invalid); + }); + + Ok(new_ctxt) +} + +pub fn num_syntax_ctxts() -> usize { + HygieneData::with(|data| data.syntax_context_data.len()) +} + +pub fn for_all_ctxts_in<E, F: FnMut((u32, SyntaxContext, &SyntaxContextData)) -> Result<(), E>>( + ctxts: impl Iterator<Item = SyntaxContext>, + mut f: F, +) -> Result<(), E> { + let all_data: Vec<_> = HygieneData::with(|data| { + ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect() + }); + for (ctxt, data) in all_data.into_iter() { + f((ctxt.0, ctxt, &data))?; + } + Ok(()) +} + +pub fn for_all_expns_in<E, F: FnMut(u32, ExpnId, &ExpnData) -> Result<(), E>>( + expns: impl Iterator<Item = ExpnId>, + mut f: F, +) -> Result<(), E> { + let all_data: Vec<_> = HygieneData::with(|data| { + expns.map(|expn| (expn, data.expn_data[expn.0 as usize].clone())).collect() + }); + for (expn, data) in all_data.into_iter() { + f(expn.0, expn, &data.unwrap_or_else(|| panic!("Missing data for {:?}", expn)))?; + } + Ok(()) +} + +pub fn for_all_data<E, F: FnMut((u32, SyntaxContext, &SyntaxContextData)) -> Result<(), E>>( + mut f: F, +) -> Result<(), E> { + let all_data = HygieneData::with(|data| data.syntax_context_data.clone()); + for (i, data) in all_data.into_iter().enumerate() { + f((i as u32, SyntaxContext(i as u32), &data))?; + } + Ok(()) +} + +impl<E: Encoder> Encodable<E> for ExpnId { + default fn encode(&self, _: &mut E) -> Result<(), E::Error> { + panic!("cannot encode `ExpnId` with `{}`", std::any::type_name::<E>()); + } +} + +impl<D: Decoder> Decodable<D> for ExpnId { + default fn decode(_: &mut D) -> Result<Self, D::Error> { + panic!("cannot decode `ExpnId` with `{}`", std::any::type_name::<D>()); + } +} + +pub fn for_all_expn_data<E, F: FnMut(u32, &ExpnData) -> Result<(), E>>(mut f: F) -> Result<(), E> { + let all_data = HygieneData::with(|data| data.expn_data.clone()); + for (i, data) in all_data.into_iter().enumerate() { + f(i as u32, &data.unwrap_or_else(|| panic!("Missing ExpnData!")))?; + } + Ok(()) +} + +pub fn raw_encode_syntax_context<E: Encoder>( + ctxt: SyntaxContext, + context: &HygieneEncodeContext, + e: &mut E, +) -> Result<(), E::Error> { + if !context.serialized_ctxts.lock().contains(&ctxt) { + context.latest_ctxts.lock().insert(ctxt); + } + ctxt.0.encode(e) +} + +pub fn raw_encode_expn_id<E: Encoder>( + expn: ExpnId, + context: &HygieneEncodeContext, + mode: ExpnDataEncodeMode, + e: &mut E, +) -> Result<(), E::Error> { + // Record the fact that we need to serialize the corresponding + // `ExpnData` + let needs_data = || { + if !context.serialized_expns.lock().contains(&expn) { + context.latest_expns.lock().insert(expn); + } + }; + + match mode { + ExpnDataEncodeMode::IncrComp => { + // Always serialize the `ExpnData` in incr comp mode + needs_data(); + expn.0.encode(e) + } + ExpnDataEncodeMode::Metadata => { + let data = expn.expn_data(); + // We only need to serialize the ExpnData + // if it comes from this crate. + // We currently don't serialize any hygiene information data for + // proc-macro crates: see the `SpecializedEncoder<Span>` impl + // for crate metadata. + if data.krate == LOCAL_CRATE { + needs_data(); + } + data.orig_id.expect("Missing orig_id").encode(e)?; + data.krate.encode(e) + } + } +} + +pub enum ExpnDataEncodeMode { + IncrComp, + Metadata, +} + +pub enum ExpnDataDecodeMode<'a, F: FnOnce(CrateNum) -> &'a HygieneDecodeContext> { + IncrComp(&'a HygieneDecodeContext), + Metadata(F), +} + +impl<'a> ExpnDataDecodeMode<'a, Box<dyn FnOnce(CrateNum) -> &'a HygieneDecodeContext>> { + pub fn incr_comp(ctxt: &'a HygieneDecodeContext) -> Self { + ExpnDataDecodeMode::IncrComp(ctxt) + } +} + +impl<E: Encoder> Encodable<E> for SyntaxContext { + default fn encode(&self, _: &mut E) -> Result<(), E::Error> { + panic!("cannot encode `SyntaxContext` with `{}`", std::any::type_name::<E>()); + } +} + +impl<D: Decoder> Decodable<D> for SyntaxContext { + default fn decode(_: &mut D) -> Result<Self, D::Error> { + panic!("cannot decode `SyntaxContext` with `{}`", std::any::type_name::<D>()); + } +} diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs new file mode 100644 index 00000000000..c654dade2ab --- /dev/null +++ b/compiler/rustc_span/src/lib.rs @@ -0,0 +1,1872 @@ +//! The source positions and related helper functions. +//! +//! ## Note +//! +//! This API is completely unstable and subject to change. + +#![doc(html_root_url = "https://doc.rust-lang.org/nightly/")] +#![feature(crate_visibility_modifier)] +#![feature(const_fn)] +#![feature(const_panic)] +#![feature(negative_impls)] +#![feature(nll)] +#![feature(optin_builtin_traits)] +#![feature(min_specialization)] +#![feature(option_expect_none)] +#![feature(refcell_take)] + +#[macro_use] +extern crate rustc_macros; + +use rustc_data_structures::AtomicRef; +use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + +mod caching_source_map_view; +pub mod source_map; +pub use self::caching_source_map_view::CachingSourceMapView; +use source_map::SourceMap; + +pub mod edition; +use edition::Edition; +pub mod hygiene; +pub use hygiene::SyntaxContext; +use hygiene::Transparency; +pub use hygiene::{DesugaringKind, ExpnData, ExpnId, ExpnKind, ForLoopLoc, MacroKind}; +pub mod def_id; +use def_id::{CrateNum, DefId, LOCAL_CRATE}; +mod span_encoding; +pub use span_encoding::{Span, DUMMY_SP}; + +pub mod symbol; +pub use symbol::{sym, Symbol}; + +mod analyze_source_file; +pub mod fatal_error; + +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::{Lock, Lrc}; + +use std::borrow::Cow; +use std::cell::RefCell; +use std::cmp::{self, Ordering}; +use std::fmt; +use std::hash::Hash; +use std::ops::{Add, Sub}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +use md5::Md5; +use sha1::Digest; +use sha1::Sha1; + +#[cfg(test)] +mod tests; + +// Per-session global variables: this struct is stored in thread-local storage +// in such a way that it is accessible without any kind of handle to all +// threads within the compilation session, but is not accessible outside the +// session. +pub struct SessionGlobals { + symbol_interner: Lock<symbol::Interner>, + span_interner: Lock<span_encoding::SpanInterner>, + hygiene_data: Lock<hygiene::HygieneData>, + source_map: Lock<Option<Lrc<SourceMap>>>, +} + +impl SessionGlobals { + pub fn new(edition: Edition) -> SessionGlobals { + SessionGlobals { + symbol_interner: Lock::new(symbol::Interner::fresh()), + span_interner: Lock::new(span_encoding::SpanInterner::default()), + hygiene_data: Lock::new(hygiene::HygieneData::new(edition)), + source_map: Lock::new(None), + } + } +} + +pub fn with_session_globals<R>(edition: Edition, f: impl FnOnce() -> R) -> R { + let session_globals = SessionGlobals::new(edition); + SESSION_GLOBALS.set(&session_globals, f) +} + +pub fn with_default_session_globals<R>(f: impl FnOnce() -> R) -> R { + with_session_globals(edition::DEFAULT_EDITION, f) +} + +// If this ever becomes non thread-local, `decode_syntax_context` +// and `decode_expn_id` will need to be updated to handle concurrent +// deserialization. +scoped_tls::scoped_thread_local!(pub static SESSION_GLOBALS: SessionGlobals); + +// FIXME: Perhaps this should not implement Rustc{Decodable, Encodable} +// +// FIXME: We should use this enum or something like it to get rid of the +// use of magic `/rust/1.x/...` paths across the board. +#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Hash)] +#[derive(HashStable_Generic, Decodable, Encodable)] +pub enum RealFileName { + Named(PathBuf), + /// For de-virtualized paths (namely paths into libstd that have been mapped + /// to the appropriate spot on the local host's file system), + Devirtualized { + /// `local_path` is the (host-dependent) local path to the file. + local_path: PathBuf, + /// `virtual_name` is the stable path rustc will store internally within + /// build artifacts. + virtual_name: PathBuf, + }, +} + +impl RealFileName { + /// Returns the path suitable for reading from the file system on the local host. + /// Avoid embedding this in build artifacts; see `stable_name` for that. + pub fn local_path(&self) -> &Path { + match self { + RealFileName::Named(p) + | RealFileName::Devirtualized { local_path: p, virtual_name: _ } => &p, + } + } + + /// Returns the path suitable for reading from the file system on the local host. + /// Avoid embedding this in build artifacts; see `stable_name` for that. + pub fn into_local_path(self) -> PathBuf { + match self { + RealFileName::Named(p) + | RealFileName::Devirtualized { local_path: p, virtual_name: _ } => p, + } + } + + /// Returns the path suitable for embedding into build artifacts. Note that + /// a virtualized path will not correspond to a valid file system path; see + /// `local_path` for something that is more likely to return paths into the + /// local host file system. + pub fn stable_name(&self) -> &Path { + match self { + RealFileName::Named(p) + | RealFileName::Devirtualized { local_path: _, virtual_name: p } => &p, + } + } +} + +/// Differentiates between real files and common virtual files. +#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Hash)] +#[derive(HashStable_Generic, Decodable, Encodable)] +pub enum FileName { + Real(RealFileName), + /// Call to `quote!`. + QuoteExpansion(u64), + /// Command line. + Anon(u64), + /// Hack in `src/librustc_ast/parse.rs`. + // FIXME(jseyfried) + MacroExpansion(u64), + ProcMacroSourceCode(u64), + /// Strings provided as `--cfg [cfgspec]` stored in a `crate_cfg`. + CfgSpec(u64), + /// Strings provided as crate attributes in the CLI. + CliCrateAttr(u64), + /// Custom sources for explicit parser calls from plugins and drivers. + Custom(String), + DocTest(PathBuf, isize), + /// Post-substitution inline assembly from LLVM + InlineAsm(u64), +} + +impl std::fmt::Display for FileName { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use FileName::*; + match *self { + Real(RealFileName::Named(ref path)) => write!(fmt, "{}", path.display()), + // FIXME: might be nice to display both compoments of Devirtualized. + // But for now (to backport fix for issue #70924), best to not + // perturb diagnostics so its obvious test suite still works. + Real(RealFileName::Devirtualized { ref local_path, virtual_name: _ }) => { + write!(fmt, "{}", local_path.display()) + } + QuoteExpansion(_) => write!(fmt, "<quote expansion>"), + MacroExpansion(_) => write!(fmt, "<macro expansion>"), + Anon(_) => write!(fmt, "<anon>"), + ProcMacroSourceCode(_) => write!(fmt, "<proc-macro source code>"), + CfgSpec(_) => write!(fmt, "<cfgspec>"), + CliCrateAttr(_) => write!(fmt, "<crate attribute>"), + Custom(ref s) => write!(fmt, "<{}>", s), + DocTest(ref path, _) => write!(fmt, "{}", path.display()), + InlineAsm(_) => write!(fmt, "<inline asm>"), + } + } +} + +impl From<PathBuf> for FileName { + fn from(p: PathBuf) -> Self { + assert!(!p.to_string_lossy().ends_with('>')); + FileName::Real(RealFileName::Named(p)) + } +} + +impl FileName { + pub fn is_real(&self) -> bool { + use FileName::*; + match *self { + Real(_) => true, + Anon(_) + | MacroExpansion(_) + | ProcMacroSourceCode(_) + | CfgSpec(_) + | CliCrateAttr(_) + | Custom(_) + | QuoteExpansion(_) + | DocTest(_, _) + | InlineAsm(_) => false, + } + } + + pub fn quote_expansion_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::QuoteExpansion(hasher.finish()) + } + + pub fn macro_expansion_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::MacroExpansion(hasher.finish()) + } + + pub fn anon_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::Anon(hasher.finish()) + } + + pub fn proc_macro_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::ProcMacroSourceCode(hasher.finish()) + } + + pub fn cfg_spec_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::QuoteExpansion(hasher.finish()) + } + + pub fn cli_crate_attr_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::CliCrateAttr(hasher.finish()) + } + + pub fn doc_test_source_code(path: PathBuf, line: isize) -> FileName { + FileName::DocTest(path, line) + } + + pub fn inline_asm_source_code(src: &str) -> FileName { + let mut hasher = StableHasher::new(); + src.hash(&mut hasher); + FileName::InlineAsm(hasher.finish()) + } +} + +/// Spans represent a region of code, used for error reporting. Positions in spans +/// are *absolute* positions from the beginning of the source_map, not positions +/// relative to `SourceFile`s. Methods on the `SourceMap` can be used to relate spans back +/// to the original source. +/// You must be careful if the span crosses more than one file - you will not be +/// able to use many of the functions on spans in source_map and you cannot assume +/// that the length of the `span = hi - lo`; there may be space in the `BytePos` +/// range between files. +/// +/// `SpanData` is public because `Span` uses a thread-local interner and can't be +/// sent to other threads, but some pieces of performance infra run in a separate thread. +/// Using `Span` is generally preferred. +#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd)] +pub struct SpanData { + pub lo: BytePos, + pub hi: BytePos, + /// Information about where the macro came from, if this piece of + /// code was created by a macro expansion. + pub ctxt: SyntaxContext, +} + +impl SpanData { + #[inline] + pub fn with_lo(&self, lo: BytePos) -> Span { + Span::new(lo, self.hi, self.ctxt) + } + #[inline] + pub fn with_hi(&self, hi: BytePos) -> Span { + Span::new(self.lo, hi, self.ctxt) + } + #[inline] + pub fn with_ctxt(&self, ctxt: SyntaxContext) -> Span { + Span::new(self.lo, self.hi, ctxt) + } +} + +// The interner is pointed to by a thread local value which is only set on the main thread +// with parallelization is disabled. So we don't allow `Span` to transfer between threads +// to avoid panics and other errors, even though it would be memory safe to do so. +#[cfg(not(parallel_compiler))] +impl !Send for Span {} +#[cfg(not(parallel_compiler))] +impl !Sync for Span {} + +impl PartialOrd for Span { + fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { + PartialOrd::partial_cmp(&self.data(), &rhs.data()) + } +} +impl Ord for Span { + fn cmp(&self, rhs: &Self) -> Ordering { + Ord::cmp(&self.data(), &rhs.data()) + } +} + +/// A collection of `Span`s. +/// +/// Spans have two orthogonal attributes: +/// +/// - They can be *primary spans*. In this case they are the locus of +/// the error, and would be rendered with `^^^`. +/// - They can have a *label*. In this case, the label is written next +/// to the mark in the snippet when we render. +#[derive(Clone, Debug, Hash, PartialEq, Eq, Encodable, Decodable)] +pub struct MultiSpan { + primary_spans: Vec<Span>, + span_labels: Vec<(Span, String)>, +} + +impl Span { + #[inline] + pub fn lo(self) -> BytePos { + self.data().lo + } + #[inline] + pub fn with_lo(self, lo: BytePos) -> Span { + self.data().with_lo(lo) + } + #[inline] + pub fn hi(self) -> BytePos { + self.data().hi + } + #[inline] + pub fn with_hi(self, hi: BytePos) -> Span { + self.data().with_hi(hi) + } + #[inline] + pub fn ctxt(self) -> SyntaxContext { + self.data().ctxt + } + #[inline] + pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span { + self.data().with_ctxt(ctxt) + } + + /// Returns `true` if this is a dummy span with any hygienic context. + #[inline] + pub fn is_dummy(self) -> bool { + let span = self.data(); + span.lo.0 == 0 && span.hi.0 == 0 + } + + /// Returns `true` if this span comes from a macro or desugaring. + #[inline] + pub fn from_expansion(self) -> bool { + self.ctxt() != SyntaxContext::root() + } + + /// Returns `true` if `span` originates in a derive-macro's expansion. + pub fn in_derive_expansion(self) -> bool { + matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _)) + } + + #[inline] + pub fn with_root_ctxt(lo: BytePos, hi: BytePos) -> Span { + Span::new(lo, hi, SyntaxContext::root()) + } + + /// Returns a new span representing an empty span at the beginning of this span + #[inline] + pub fn shrink_to_lo(self) -> Span { + let span = self.data(); + span.with_hi(span.lo) + } + /// Returns a new span representing an empty span at the end of this span. + #[inline] + pub fn shrink_to_hi(self) -> Span { + let span = self.data(); + span.with_lo(span.hi) + } + + /// Returns `self` if `self` is not the dummy span, and `other` otherwise. + pub fn substitute_dummy(self, other: Span) -> Span { + if self.is_dummy() { other } else { self } + } + + /// Returns `true` if `self` fully encloses `other`. + pub fn contains(self, other: Span) -> bool { + let span = self.data(); + let other = other.data(); + span.lo <= other.lo && other.hi <= span.hi + } + + /// Returns `true` if `self` touches `other`. + pub fn overlaps(self, other: Span) -> bool { + let span = self.data(); + let other = other.data(); + span.lo < other.hi && other.lo < span.hi + } + + /// Returns `true` if the spans are equal with regards to the source text. + /// + /// Use this instead of `==` when either span could be generated code, + /// and you only care that they point to the same bytes of source text. + pub fn source_equal(&self, other: &Span) -> bool { + let span = self.data(); + let other = other.data(); + span.lo == other.lo && span.hi == other.hi + } + + /// Returns `Some(span)`, where the start is trimmed by the end of `other`. + pub fn trim_start(self, other: Span) -> Option<Span> { + let span = self.data(); + let other = other.data(); + if span.hi > other.hi { Some(span.with_lo(cmp::max(span.lo, other.hi))) } else { None } + } + + /// Returns the source span -- this is either the supplied span, or the span for + /// the macro callsite that expanded to it. + pub fn source_callsite(self) -> Span { + let expn_data = self.ctxt().outer_expn_data(); + if !expn_data.is_root() { expn_data.call_site.source_callsite() } else { self } + } + + /// The `Span` for the tokens in the previous macro expansion from which `self` was generated, + /// if any. + pub fn parent(self) -> Option<Span> { + let expn_data = self.ctxt().outer_expn_data(); + if !expn_data.is_root() { Some(expn_data.call_site) } else { None } + } + + /// Edition of the crate from which this span came. + pub fn edition(self) -> edition::Edition { + self.ctxt().outer_expn_data().edition + } + + #[inline] + pub fn rust_2015(&self) -> bool { + self.edition() == edition::Edition::Edition2015 + } + + #[inline] + pub fn rust_2018(&self) -> bool { + self.edition() >= edition::Edition::Edition2018 + } + + /// Returns the source callee. + /// + /// Returns `None` if the supplied span has no expansion trace, + /// else returns the `ExpnData` for the macro definition + /// corresponding to the source callsite. + pub fn source_callee(self) -> Option<ExpnData> { + fn source_callee(expn_data: ExpnData) -> ExpnData { + let next_expn_data = expn_data.call_site.ctxt().outer_expn_data(); + if !next_expn_data.is_root() { source_callee(next_expn_data) } else { expn_data } + } + let expn_data = self.ctxt().outer_expn_data(); + if !expn_data.is_root() { Some(source_callee(expn_data)) } else { None } + } + + /// Checks if a span is "internal" to a macro in which `#[unstable]` + /// items can be used (that is, a macro marked with + /// `#[allow_internal_unstable]`). + pub fn allows_unstable(&self, feature: Symbol) -> bool { + self.ctxt().outer_expn_data().allow_internal_unstable.map_or(false, |features| { + features + .iter() + .any(|&f| f == feature || f == sym::allow_internal_unstable_backcompat_hack) + }) + } + + /// Checks if this span arises from a compiler desugaring of kind `kind`. + pub fn is_desugaring(&self, kind: DesugaringKind) -> bool { + match self.ctxt().outer_expn_data().kind { + ExpnKind::Desugaring(k) => k == kind, + _ => false, + } + } + + /// Returns the compiler desugaring that created this span, or `None` + /// if this span is not from a desugaring. + pub fn desugaring_kind(&self) -> Option<DesugaringKind> { + match self.ctxt().outer_expn_data().kind { + ExpnKind::Desugaring(k) => Some(k), + _ => None, + } + } + + /// Checks if a span is "internal" to a macro in which `unsafe` + /// can be used without triggering the `unsafe_code` lint + // (that is, a macro marked with `#[allow_internal_unsafe]`). + pub fn allows_unsafe(&self) -> bool { + self.ctxt().outer_expn_data().allow_internal_unsafe + } + + pub fn macro_backtrace(mut self) -> impl Iterator<Item = ExpnData> { + let mut prev_span = DUMMY_SP; + std::iter::from_fn(move || { + loop { + let expn_data = self.ctxt().outer_expn_data(); + if expn_data.is_root() { + return None; + } + + let is_recursive = expn_data.call_site.source_equal(&prev_span); + + prev_span = self; + self = expn_data.call_site; + + // Don't print recursive invocations. + if !is_recursive { + return Some(expn_data); + } + } + }) + } + + /// Returns a `Span` that would enclose both `self` and `end`. + pub fn to(self, end: Span) -> Span { + let span_data = self.data(); + let end_data = end.data(); + // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480). + // Return the macro span on its own to avoid weird diagnostic output. It is preferable to + // have an incomplete span than a completely nonsensical one. + if span_data.ctxt != end_data.ctxt { + if span_data.ctxt == SyntaxContext::root() { + return end; + } else if end_data.ctxt == SyntaxContext::root() { + return self; + } + // Both spans fall within a macro. + // FIXME(estebank): check if it is the *same* macro. + } + Span::new( + cmp::min(span_data.lo, end_data.lo), + cmp::max(span_data.hi, end_data.hi), + if span_data.ctxt == SyntaxContext::root() { end_data.ctxt } else { span_data.ctxt }, + ) + } + + /// Returns a `Span` between the end of `self` to the beginning of `end`. + pub fn between(self, end: Span) -> Span { + let span = self.data(); + let end = end.data(); + Span::new( + span.hi, + end.lo, + if end.ctxt == SyntaxContext::root() { end.ctxt } else { span.ctxt }, + ) + } + + /// Returns a `Span` between the beginning of `self` to the beginning of `end`. + pub fn until(self, end: Span) -> Span { + let span = self.data(); + let end = end.data(); + Span::new( + span.lo, + end.lo, + if end.ctxt == SyntaxContext::root() { end.ctxt } else { span.ctxt }, + ) + } + + pub fn from_inner(self, inner: InnerSpan) -> Span { + let span = self.data(); + Span::new( + span.lo + BytePos::from_usize(inner.start), + span.lo + BytePos::from_usize(inner.end), + span.ctxt, + ) + } + + /// Equivalent of `Span::def_site` from the proc macro API, + /// except that the location is taken from the `self` span. + pub fn with_def_site_ctxt(self, expn_id: ExpnId) -> Span { + self.with_ctxt_from_mark(expn_id, Transparency::Opaque) + } + + /// Equivalent of `Span::call_site` from the proc macro API, + /// except that the location is taken from the `self` span. + pub fn with_call_site_ctxt(&self, expn_id: ExpnId) -> Span { + self.with_ctxt_from_mark(expn_id, Transparency::Transparent) + } + + /// Equivalent of `Span::mixed_site` from the proc macro API, + /// except that the location is taken from the `self` span. + pub fn with_mixed_site_ctxt(&self, expn_id: ExpnId) -> Span { + self.with_ctxt_from_mark(expn_id, Transparency::SemiTransparent) + } + + /// Produces a span with the same location as `self` and context produced by a macro with the + /// given ID and transparency, assuming that macro was defined directly and not produced by + /// some other macro (which is the case for built-in and procedural macros). + pub fn with_ctxt_from_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span { + self.with_ctxt(SyntaxContext::root().apply_mark(expn_id, transparency)) + } + + #[inline] + pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span { + let span = self.data(); + span.with_ctxt(span.ctxt.apply_mark(expn_id, transparency)) + } + + #[inline] + pub fn remove_mark(&mut self) -> ExpnId { + let mut span = self.data(); + let mark = span.ctxt.remove_mark(); + *self = Span::new(span.lo, span.hi, span.ctxt); + mark + } + + #[inline] + pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { + let mut span = self.data(); + let mark = span.ctxt.adjust(expn_id); + *self = Span::new(span.lo, span.hi, span.ctxt); + mark + } + + #[inline] + pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> { + let mut span = self.data(); + let mark = span.ctxt.normalize_to_macros_2_0_and_adjust(expn_id); + *self = Span::new(span.lo, span.hi, span.ctxt); + mark + } + + #[inline] + pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> { + let mut span = self.data(); + let mark = span.ctxt.glob_adjust(expn_id, glob_span); + *self = Span::new(span.lo, span.hi, span.ctxt); + mark + } + + #[inline] + pub fn reverse_glob_adjust( + &mut self, + expn_id: ExpnId, + glob_span: Span, + ) -> Option<Option<ExpnId>> { + let mut span = self.data(); + let mark = span.ctxt.reverse_glob_adjust(expn_id, glob_span); + *self = Span::new(span.lo, span.hi, span.ctxt); + mark + } + + #[inline] + pub fn normalize_to_macros_2_0(self) -> Span { + let span = self.data(); + span.with_ctxt(span.ctxt.normalize_to_macros_2_0()) + } + + #[inline] + pub fn normalize_to_macro_rules(self) -> Span { + let span = self.data(); + span.with_ctxt(span.ctxt.normalize_to_macro_rules()) + } +} + +#[derive(Clone, Debug)] +pub struct SpanLabel { + /// The span we are going to include in the final snippet. + pub span: Span, + + /// Is this a primary span? This is the "locus" of the message, + /// and is indicated with a `^^^^` underline, versus `----`. + pub is_primary: bool, + + /// What label should we attach to this span (if any)? + pub label: Option<String>, +} + +impl Default for Span { + fn default() -> Self { + DUMMY_SP + } +} + +impl<E: Encoder> Encodable<E> for Span { + default fn encode(&self, s: &mut E) -> Result<(), E::Error> { + let span = self.data(); + s.emit_struct("Span", 2, |s| { + s.emit_struct_field("lo", 0, |s| span.lo.encode(s))?; + s.emit_struct_field("hi", 1, |s| span.hi.encode(s)) + }) + } +} +impl<D: Decoder> Decodable<D> for Span { + default fn decode(s: &mut D) -> Result<Span, D::Error> { + s.read_struct("Span", 2, |d| { + let lo = d.read_struct_field("lo", 0, Decodable::decode)?; + let hi = d.read_struct_field("hi", 1, Decodable::decode)?; + + Ok(Span::new(lo, hi, SyntaxContext::root())) + }) + } +} + +/// Calls the provided closure, using the provided `SourceMap` to format +/// any spans that are debug-printed during the closure'e exectuino. +/// +/// Normally, the global `TyCtxt` is used to retrieve the `SourceMap` +/// (see `rustc_interface::callbacks::span_debug1). However, some parts +/// of the compiler (e.g. `rustc_parse`) may debug-print `Span`s before +/// a `TyCtxt` is available. In this case, we fall back to +/// the `SourceMap` provided to this function. If that is not available, +/// we fall back to printing the raw `Span` field values +pub fn with_source_map<T, F: FnOnce() -> T>(source_map: Lrc<SourceMap>, f: F) -> T { + SESSION_GLOBALS.with(|session_globals| { + *session_globals.source_map.borrow_mut() = Some(source_map); + }); + struct ClearSourceMap; + impl Drop for ClearSourceMap { + fn drop(&mut self) { + SESSION_GLOBALS.with(|session_globals| { + session_globals.source_map.borrow_mut().take(); + }); + } + } + + let _guard = ClearSourceMap; + f() +} + +pub fn debug_with_source_map( + span: Span, + f: &mut fmt::Formatter<'_>, + source_map: &SourceMap, +) -> fmt::Result { + write!(f, "{} ({:?})", source_map.span_to_string(span), span.ctxt()) +} + +pub fn default_span_debug(span: Span, f: &mut fmt::Formatter<'_>) -> fmt::Result { + SESSION_GLOBALS.with(|session_globals| { + if let Some(source_map) = &*session_globals.source_map.borrow() { + debug_with_source_map(span, f, source_map) + } else { + f.debug_struct("Span") + .field("lo", &span.lo()) + .field("hi", &span.hi()) + .field("ctxt", &span.ctxt()) + .finish() + } + }) +} + +impl fmt::Debug for Span { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*SPAN_DEBUG)(*self, f) + } +} + +impl fmt::Debug for SpanData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*SPAN_DEBUG)(Span::new(self.lo, self.hi, self.ctxt), f) + } +} + +impl MultiSpan { + #[inline] + pub fn new() -> MultiSpan { + MultiSpan { primary_spans: vec![], span_labels: vec![] } + } + + pub fn from_span(primary_span: Span) -> MultiSpan { + MultiSpan { primary_spans: vec![primary_span], span_labels: vec![] } + } + + pub fn from_spans(mut vec: Vec<Span>) -> MultiSpan { + vec.sort(); + MultiSpan { primary_spans: vec, span_labels: vec![] } + } + + pub fn push_span_label(&mut self, span: Span, label: String) { + self.span_labels.push((span, label)); + } + + /// Selects the first primary span (if any). + pub fn primary_span(&self) -> Option<Span> { + self.primary_spans.first().cloned() + } + + /// Returns all primary spans. + pub fn primary_spans(&self) -> &[Span] { + &self.primary_spans + } + + /// Returns `true` if any of the primary spans are displayable. + pub fn has_primary_spans(&self) -> bool { + self.primary_spans.iter().any(|sp| !sp.is_dummy()) + } + + /// Returns `true` if this contains only a dummy primary span with any hygienic context. + pub fn is_dummy(&self) -> bool { + let mut is_dummy = true; + for span in &self.primary_spans { + if !span.is_dummy() { + is_dummy = false; + } + } + is_dummy + } + + /// Replaces all occurrences of one Span with another. Used to move `Span`s in areas that don't + /// display well (like std macros). Returns whether replacements occurred. + pub fn replace(&mut self, before: Span, after: Span) -> bool { + let mut replacements_occurred = false; + for primary_span in &mut self.primary_spans { + if *primary_span == before { + *primary_span = after; + replacements_occurred = true; + } + } + for span_label in &mut self.span_labels { + if span_label.0 == before { + span_label.0 = after; + replacements_occurred = true; + } + } + replacements_occurred + } + + /// Returns the strings to highlight. We always ensure that there + /// is an entry for each of the primary spans -- for each primary + /// span `P`, if there is at least one label with span `P`, we return + /// those labels (marked as primary). But otherwise we return + /// `SpanLabel` instances with empty labels. + pub fn span_labels(&self) -> Vec<SpanLabel> { + let is_primary = |span| self.primary_spans.contains(&span); + + let mut span_labels = self + .span_labels + .iter() + .map(|&(span, ref label)| SpanLabel { + span, + is_primary: is_primary(span), + label: Some(label.clone()), + }) + .collect::<Vec<_>>(); + + for &span in &self.primary_spans { + if !span_labels.iter().any(|sl| sl.span == span) { + span_labels.push(SpanLabel { span, is_primary: true, label: None }); + } + } + + span_labels + } + + /// Returns `true` if any of the span labels is displayable. + pub fn has_span_labels(&self) -> bool { + self.span_labels.iter().any(|(sp, _)| !sp.is_dummy()) + } +} + +impl From<Span> for MultiSpan { + fn from(span: Span) -> MultiSpan { + MultiSpan::from_span(span) + } +} + +impl From<Vec<Span>> for MultiSpan { + fn from(spans: Vec<Span>) -> MultiSpan { + MultiSpan::from_spans(spans) + } +} + +/// Identifies an offset of a multi-byte character in a `SourceFile`. +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +pub struct MultiByteChar { + /// The absolute offset of the character in the `SourceMap`. + pub pos: BytePos, + /// The number of bytes, `>= 2`. + pub bytes: u8, +} + +/// Identifies an offset of a non-narrow character in a `SourceFile`. +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +pub enum NonNarrowChar { + /// Represents a zero-width character. + ZeroWidth(BytePos), + /// Represents a wide (full-width) character. + Wide(BytePos), + /// Represents a tab character, represented visually with a width of 4 characters. + Tab(BytePos), +} + +impl NonNarrowChar { + fn new(pos: BytePos, width: usize) -> Self { + match width { + 0 => NonNarrowChar::ZeroWidth(pos), + 2 => NonNarrowChar::Wide(pos), + 4 => NonNarrowChar::Tab(pos), + _ => panic!("width {} given for non-narrow character", width), + } + } + + /// Returns the absolute offset of the character in the `SourceMap`. + pub fn pos(&self) -> BytePos { + match *self { + NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p, + } + } + + /// Returns the width of the character, 0 (zero-width) or 2 (wide). + pub fn width(&self) -> usize { + match *self { + NonNarrowChar::ZeroWidth(_) => 0, + NonNarrowChar::Wide(_) => 2, + NonNarrowChar::Tab(_) => 4, + } + } +} + +impl Add<BytePos> for NonNarrowChar { + type Output = Self; + + fn add(self, rhs: BytePos) -> Self { + match self { + NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs), + NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs), + NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos + rhs), + } + } +} + +impl Sub<BytePos> for NonNarrowChar { + type Output = Self; + + fn sub(self, rhs: BytePos) -> Self { + match self { + NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs), + NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs), + NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos - rhs), + } + } +} + +/// Identifies an offset of a character that was normalized away from `SourceFile`. +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +pub struct NormalizedPos { + /// The absolute offset of the character in the `SourceMap`. + pub pos: BytePos, + /// The difference between original and normalized string at position. + pub diff: u32, +} + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum ExternalSource { + /// No external source has to be loaded, since the `SourceFile` represents a local crate. + Unneeded, + Foreign { + kind: ExternalSourceKind, + /// This SourceFile's byte-offset within the source_map of its original crate + original_start_pos: BytePos, + /// The end of this SourceFile within the source_map of its original crate + original_end_pos: BytePos, + }, +} + +/// The state of the lazy external source loading mechanism of a `SourceFile`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum ExternalSourceKind { + /// The external source has been loaded already. + Present(Lrc<String>), + /// No attempt has been made to load the external source. + AbsentOk, + /// A failed attempt has been made to load the external source. + AbsentErr, + Unneeded, +} + +impl ExternalSource { + pub fn is_absent(&self) -> bool { + match self { + ExternalSource::Foreign { kind: ExternalSourceKind::Present(_), .. } => false, + _ => true, + } + } + + pub fn get_source(&self) -> Option<&Lrc<String>> { + match self { + ExternalSource::Foreign { kind: ExternalSourceKind::Present(ref src), .. } => Some(src), + _ => None, + } + } +} + +#[derive(Debug)] +pub struct OffsetOverflowError; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Encodable, Decodable)] +pub enum SourceFileHashAlgorithm { + Md5, + Sha1, +} + +impl FromStr for SourceFileHashAlgorithm { + type Err = (); + + fn from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()> { + match s { + "md5" => Ok(SourceFileHashAlgorithm::Md5), + "sha1" => Ok(SourceFileHashAlgorithm::Sha1), + _ => Err(()), + } + } +} + +rustc_data_structures::impl_stable_hash_via_hash!(SourceFileHashAlgorithm); + +/// The hash of the on-disk source file used for debug info. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(HashStable_Generic, Encodable, Decodable)] +pub struct SourceFileHash { + pub kind: SourceFileHashAlgorithm, + value: [u8; 20], +} + +impl SourceFileHash { + pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash { + let mut hash = SourceFileHash { kind, value: Default::default() }; + let len = hash.hash_len(); + let value = &mut hash.value[..len]; + let data = src.as_bytes(); + match kind { + SourceFileHashAlgorithm::Md5 => { + value.copy_from_slice(&Md5::digest(data)); + } + SourceFileHashAlgorithm::Sha1 => { + value.copy_from_slice(&Sha1::digest(data)); + } + } + hash + } + + /// Check if the stored hash matches the hash of the string. + pub fn matches(&self, src: &str) -> bool { + Self::new(self.kind, src) == *self + } + + /// The bytes of the hash. + pub fn hash_bytes(&self) -> &[u8] { + let len = self.hash_len(); + &self.value[..len] + } + + fn hash_len(&self) -> usize { + match self.kind { + SourceFileHashAlgorithm::Md5 => 16, + SourceFileHashAlgorithm::Sha1 => 20, + } + } +} + +/// A single source in the `SourceMap`. +#[derive(Clone)] +pub struct SourceFile { + /// The name of the file that the source came from. Source that doesn't + /// originate from files has names between angle brackets by convention + /// (e.g., `<anon>`). + pub name: FileName, + /// `true` if the `name` field above has been modified by `--remap-path-prefix`. + pub name_was_remapped: bool, + /// The unmapped path of the file that the source came from. + /// Set to `None` if the `SourceFile` was imported from an external crate. + pub unmapped_path: Option<FileName>, + /// The complete source code. + pub src: Option<Lrc<String>>, + /// The source code's hash. + pub src_hash: SourceFileHash, + /// The external source code (used for external crates, which will have a `None` + /// value as `self.src`. + pub external_src: Lock<ExternalSource>, + /// The start position of this source in the `SourceMap`. + pub start_pos: BytePos, + /// The end position of this source in the `SourceMap`. + pub end_pos: BytePos, + /// Locations of lines beginnings in the source code. + pub lines: Vec<BytePos>, + /// Locations of multi-byte characters in the source code. + pub multibyte_chars: Vec<MultiByteChar>, + /// Width of characters that are not narrow in the source code. + pub non_narrow_chars: Vec<NonNarrowChar>, + /// Locations of characters removed during normalization. + pub normalized_pos: Vec<NormalizedPos>, + /// A hash of the filename, used for speeding up hashing in incremental compilation. + pub name_hash: u128, + /// Indicates which crate this `SourceFile` was imported from. + pub cnum: CrateNum, +} + +impl<S: Encoder> Encodable<S> for SourceFile { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_struct("SourceFile", 8, |s| { + s.emit_struct_field("name", 0, |s| self.name.encode(s))?; + s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?; + s.emit_struct_field("src_hash", 2, |s| self.src_hash.encode(s))?; + s.emit_struct_field("start_pos", 3, |s| self.start_pos.encode(s))?; + s.emit_struct_field("end_pos", 4, |s| self.end_pos.encode(s))?; + s.emit_struct_field("lines", 5, |s| { + let lines = &self.lines[..]; + // Store the length. + s.emit_u32(lines.len() as u32)?; + + if !lines.is_empty() { + // In order to preserve some space, we exploit the fact that + // the lines list is sorted and individual lines are + // probably not that long. Because of that we can store lines + // as a difference list, using as little space as possible + // for the differences. + let max_line_length = if lines.len() == 1 { + 0 + } else { + lines.windows(2).map(|w| w[1] - w[0]).map(|bp| bp.to_usize()).max().unwrap() + }; + + let bytes_per_diff: u8 = match max_line_length { + 0..=0xFF => 1, + 0x100..=0xFFFF => 2, + _ => 4, + }; + + // Encode the number of bytes used per diff. + bytes_per_diff.encode(s)?; + + // Encode the first element. + lines[0].encode(s)?; + + let diff_iter = (&lines[..]).windows(2).map(|w| (w[1] - w[0])); + + match bytes_per_diff { + 1 => { + for diff in diff_iter { + (diff.0 as u8).encode(s)? + } + } + 2 => { + for diff in diff_iter { + (diff.0 as u16).encode(s)? + } + } + 4 => { + for diff in diff_iter { + diff.0.encode(s)? + } + } + _ => unreachable!(), + } + } + + Ok(()) + })?; + s.emit_struct_field("multibyte_chars", 6, |s| self.multibyte_chars.encode(s))?; + s.emit_struct_field("non_narrow_chars", 7, |s| self.non_narrow_chars.encode(s))?; + s.emit_struct_field("name_hash", 8, |s| self.name_hash.encode(s))?; + s.emit_struct_field("normalized_pos", 9, |s| self.normalized_pos.encode(s))?; + s.emit_struct_field("cnum", 10, |s| self.cnum.encode(s)) + }) + } +} + +impl<D: Decoder> Decodable<D> for SourceFile { + fn decode(d: &mut D) -> Result<SourceFile, D::Error> { + d.read_struct("SourceFile", 8, |d| { + let name: FileName = d.read_struct_field("name", 0, |d| Decodable::decode(d))?; + let name_was_remapped: bool = + d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?; + let src_hash: SourceFileHash = + d.read_struct_field("src_hash", 2, |d| Decodable::decode(d))?; + let start_pos: BytePos = + d.read_struct_field("start_pos", 3, |d| Decodable::decode(d))?; + let end_pos: BytePos = d.read_struct_field("end_pos", 4, |d| Decodable::decode(d))?; + let lines: Vec<BytePos> = d.read_struct_field("lines", 5, |d| { + let num_lines: u32 = Decodable::decode(d)?; + let mut lines = Vec::with_capacity(num_lines as usize); + + if num_lines > 0 { + // Read the number of bytes used per diff. + let bytes_per_diff: u8 = Decodable::decode(d)?; + + // Read the first element. + let mut line_start: BytePos = Decodable::decode(d)?; + lines.push(line_start); + + for _ in 1..num_lines { + let diff = match bytes_per_diff { + 1 => d.read_u8()? as u32, + 2 => d.read_u16()? as u32, + 4 => d.read_u32()?, + _ => unreachable!(), + }; + + line_start = line_start + BytePos(diff); + + lines.push(line_start); + } + } + + Ok(lines) + })?; + let multibyte_chars: Vec<MultiByteChar> = + d.read_struct_field("multibyte_chars", 6, |d| Decodable::decode(d))?; + let non_narrow_chars: Vec<NonNarrowChar> = + d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?; + let name_hash: u128 = d.read_struct_field("name_hash", 8, |d| Decodable::decode(d))?; + let normalized_pos: Vec<NormalizedPos> = + d.read_struct_field("normalized_pos", 9, |d| Decodable::decode(d))?; + let cnum: CrateNum = d.read_struct_field("cnum", 10, |d| Decodable::decode(d))?; + Ok(SourceFile { + name, + name_was_remapped, + unmapped_path: None, + start_pos, + end_pos, + src: None, + src_hash, + // Unused - the metadata decoder will construct + // a new SourceFile, filling in `external_src` properly + external_src: Lock::new(ExternalSource::Unneeded), + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + name_hash, + cnum, + }) + }) + } +} + +impl fmt::Debug for SourceFile { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "SourceFile({})", self.name) + } +} + +impl SourceFile { + pub fn new( + name: FileName, + name_was_remapped: bool, + unmapped_path: FileName, + mut src: String, + start_pos: BytePos, + hash_kind: SourceFileHashAlgorithm, + ) -> Self { + // Compute the file hash before any normalization. + let src_hash = SourceFileHash::new(hash_kind, &src); + let normalized_pos = normalize_src(&mut src, start_pos); + + let name_hash = { + let mut hasher: StableHasher = StableHasher::new(); + name.hash(&mut hasher); + hasher.finish::<u128>() + }; + let end_pos = start_pos.to_usize() + src.len(); + assert!(end_pos <= u32::MAX as usize); + + let (lines, multibyte_chars, non_narrow_chars) = + analyze_source_file::analyze_source_file(&src[..], start_pos); + + SourceFile { + name, + name_was_remapped, + unmapped_path: Some(unmapped_path), + src: Some(Lrc::new(src)), + src_hash, + external_src: Lock::new(ExternalSource::Unneeded), + start_pos, + end_pos: Pos::from_usize(end_pos), + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + name_hash, + cnum: LOCAL_CRATE, + } + } + + /// Returns the `BytePos` of the beginning of the current line. + pub fn line_begin_pos(&self, pos: BytePos) -> BytePos { + let line_index = self.lookup_line(pos).unwrap(); + self.lines[line_index] + } + + /// Add externally loaded source. + /// If the hash of the input doesn't match or no input is supplied via None, + /// it is interpreted as an error and the corresponding enum variant is set. + /// The return value signifies whether some kind of source is present. + pub fn add_external_src<F>(&self, get_src: F) -> bool + where + F: FnOnce() -> Option<String>, + { + if matches!( + *self.external_src.borrow(), + ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. } + ) { + let src = get_src(); + let mut external_src = self.external_src.borrow_mut(); + // Check that no-one else have provided the source while we were getting it + if let ExternalSource::Foreign { + kind: src_kind @ ExternalSourceKind::AbsentOk, .. + } = &mut *external_src + { + if let Some(mut src) = src { + // The src_hash needs to be computed on the pre-normalized src. + if self.src_hash.matches(&src) { + normalize_src(&mut src, BytePos::from_usize(0)); + *src_kind = ExternalSourceKind::Present(Lrc::new(src)); + return true; + } + } else { + *src_kind = ExternalSourceKind::AbsentErr; + } + + false + } else { + self.src.is_some() || external_src.get_source().is_some() + } + } else { + self.src.is_some() || self.external_src.borrow().get_source().is_some() + } + } + + /// Gets a line from the list of pre-computed line-beginnings. + /// The line number here is 0-based. + pub fn get_line(&self, line_number: usize) -> Option<Cow<'_, str>> { + fn get_until_newline(src: &str, begin: usize) -> &str { + // We can't use `lines.get(line_number+1)` because we might + // be parsing when we call this function and thus the current + // line is the last one we have line info for. + let slice = &src[begin..]; + match slice.find('\n') { + Some(e) => &slice[..e], + None => slice, + } + } + + let begin = { + let line = self.lines.get(line_number)?; + let begin: BytePos = *line - self.start_pos; + begin.to_usize() + }; + + if let Some(ref src) = self.src { + Some(Cow::from(get_until_newline(src, begin))) + } else if let Some(src) = self.external_src.borrow().get_source() { + Some(Cow::Owned(String::from(get_until_newline(src, begin)))) + } else { + None + } + } + + pub fn is_real_file(&self) -> bool { + self.name.is_real() + } + + pub fn is_imported(&self) -> bool { + self.src.is_none() + } + + pub fn byte_length(&self) -> u32 { + self.end_pos.0 - self.start_pos.0 + } + pub fn count_lines(&self) -> usize { + self.lines.len() + } + + /// Finds the line containing the given position. The return value is the + /// index into the `lines` array of this `SourceFile`, not the 1-based line + /// number. If the source_file is empty or the position is located before the + /// first line, `None` is returned. + pub fn lookup_line(&self, pos: BytePos) -> Option<usize> { + if self.lines.is_empty() { + return None; + } + + let line_index = lookup_line(&self.lines[..], pos); + assert!(line_index < self.lines.len() as isize); + if line_index >= 0 { Some(line_index as usize) } else { None } + } + + pub fn line_bounds(&self, line_index: usize) -> (BytePos, BytePos) { + if self.start_pos == self.end_pos { + return (self.start_pos, self.end_pos); + } + + assert!(line_index < self.lines.len()); + if line_index == (self.lines.len() - 1) { + (self.lines[line_index], self.end_pos) + } else { + (self.lines[line_index], self.lines[line_index + 1]) + } + } + + #[inline] + pub fn contains(&self, byte_pos: BytePos) -> bool { + byte_pos >= self.start_pos && byte_pos <= self.end_pos + } + + /// Calculates the original byte position relative to the start of the file + /// based on the given byte position. + pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos { + // Diff before any records is 0. Otherwise use the previously recorded + // diff as that applies to the following characters until a new diff + // is recorded. + let diff = match self.normalized_pos.binary_search_by(|np| np.pos.cmp(&pos)) { + Ok(i) => self.normalized_pos[i].diff, + Err(i) if i == 0 => 0, + Err(i) => self.normalized_pos[i - 1].diff, + }; + + BytePos::from_u32(pos.0 - self.start_pos.0 + diff) + } +} + +/// Normalizes the source code and records the normalizations. +fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> { + let mut normalized_pos = vec![]; + remove_bom(src, &mut normalized_pos); + normalize_newlines(src, &mut normalized_pos); + + // Offset all the positions by start_pos to match the final file positions. + for np in &mut normalized_pos { + np.pos.0 += start_pos.0; + } + + normalized_pos +} + +/// Removes UTF-8 BOM, if any. +fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { + if src.starts_with("\u{feff}") { + src.drain(..3); + normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 }); + } +} + +/// Replaces `\r\n` with `\n` in-place in `src`. +/// +/// Returns error if there's a lone `\r` in the string +fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { + if !src.as_bytes().contains(&b'\r') { + return; + } + + // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding. + // While we *can* call `as_mut_vec` and do surgery on the live string + // directly, let's rather steal the contents of `src`. This makes the code + // safe even if a panic occurs. + + let mut buf = std::mem::replace(src, String::new()).into_bytes(); + let mut gap_len = 0; + let mut tail = buf.as_mut_slice(); + let mut cursor = 0; + let original_gap = normalized_pos.last().map_or(0, |l| l.diff); + loop { + let idx = match find_crlf(&tail[gap_len..]) { + None => tail.len(), + Some(idx) => idx + gap_len, + }; + tail.copy_within(gap_len..idx, 0); + tail = &mut tail[idx - gap_len..]; + if tail.len() == gap_len { + break; + } + cursor += idx - gap_len; + gap_len += 1; + normalized_pos.push(NormalizedPos { + pos: BytePos::from_usize(cursor + 1), + diff: original_gap + gap_len as u32, + }); + } + + // Account for removed `\r`. + // After `set_len`, `buf` is guaranteed to contain utf-8 again. + let new_len = buf.len() - gap_len; + unsafe { + buf.set_len(new_len); + *src = String::from_utf8_unchecked(buf); + } + + fn find_crlf(src: &[u8]) -> Option<usize> { + let mut search_idx = 0; + while let Some(idx) = find_cr(&src[search_idx..]) { + if src[search_idx..].get(idx + 1) != Some(&b'\n') { + search_idx += idx + 1; + continue; + } + return Some(search_idx + idx); + } + None + } + + fn find_cr(src: &[u8]) -> Option<usize> { + src.iter().position(|&b| b == b'\r') + } +} + +// _____________________________________________________________________________ +// Pos, BytePos, CharPos +// + +pub trait Pos { + fn from_usize(n: usize) -> Self; + fn to_usize(&self) -> usize; + fn from_u32(n: u32) -> Self; + fn to_u32(&self) -> u32; +} + +/// A byte offset. Keep this small (currently 32-bits), as AST contains +/// a lot of them. +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct BytePos(pub u32); + +/// A character offset. Because of multibyte UTF-8 characters, a byte offset +/// is not equivalent to a character offset. The `SourceMap` will convert `BytePos` +/// values to `CharPos` values as necessary. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct CharPos(pub usize); + +// FIXME: lots of boilerplate in these impls, but so far my attempts to fix +// have been unsuccessful. + +impl Pos for BytePos { + #[inline(always)] + fn from_usize(n: usize) -> BytePos { + BytePos(n as u32) + } + + #[inline(always)] + fn to_usize(&self) -> usize { + self.0 as usize + } + + #[inline(always)] + fn from_u32(n: u32) -> BytePos { + BytePos(n) + } + + #[inline(always)] + fn to_u32(&self) -> u32 { + self.0 + } +} + +impl Add for BytePos { + type Output = BytePos; + + #[inline(always)] + fn add(self, rhs: BytePos) -> BytePos { + BytePos((self.to_usize() + rhs.to_usize()) as u32) + } +} + +impl Sub for BytePos { + type Output = BytePos; + + #[inline(always)] + fn sub(self, rhs: BytePos) -> BytePos { + BytePos((self.to_usize() - rhs.to_usize()) as u32) + } +} + +impl<S: rustc_serialize::Encoder> Encodable<S> for BytePos { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_u32(self.0) + } +} + +impl<D: rustc_serialize::Decoder> Decodable<D> for BytePos { + fn decode(d: &mut D) -> Result<BytePos, D::Error> { + Ok(BytePos(d.read_u32()?)) + } +} + +impl Pos for CharPos { + #[inline(always)] + fn from_usize(n: usize) -> CharPos { + CharPos(n) + } + + #[inline(always)] + fn to_usize(&self) -> usize { + self.0 + } + + #[inline(always)] + fn from_u32(n: u32) -> CharPos { + CharPos(n as usize) + } + + #[inline(always)] + fn to_u32(&self) -> u32 { + self.0 as u32 + } +} + +impl Add for CharPos { + type Output = CharPos; + + #[inline(always)] + fn add(self, rhs: CharPos) -> CharPos { + CharPos(self.to_usize() + rhs.to_usize()) + } +} + +impl Sub for CharPos { + type Output = CharPos; + + #[inline(always)] + fn sub(self, rhs: CharPos) -> CharPos { + CharPos(self.to_usize() - rhs.to_usize()) + } +} + +// _____________________________________________________________________________ +// Loc, SourceFileAndLine, SourceFileAndBytePos +// + +/// A source code location used for error reporting. +#[derive(Debug, Clone)] +pub struct Loc { + /// Information about the original source. + pub file: Lrc<SourceFile>, + /// The (1-based) line number. + pub line: usize, + /// The (0-based) column offset. + pub col: CharPos, + /// The (0-based) column offset when displayed. + pub col_display: usize, +} + +// Used to be structural records. +#[derive(Debug)] +pub struct SourceFileAndLine { + pub sf: Lrc<SourceFile>, + pub line: usize, +} +#[derive(Debug)] +pub struct SourceFileAndBytePos { + pub sf: Lrc<SourceFile>, + pub pos: BytePos, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct LineInfo { + /// Index of line, starting from 0. + pub line_index: usize, + + /// Column in line where span begins, starting from 0. + pub start_col: CharPos, + + /// Column in line where span ends, starting from 0, exclusive. + pub end_col: CharPos, +} + +pub struct FileLines { + pub file: Lrc<SourceFile>, + pub lines: Vec<LineInfo>, +} + +pub static SPAN_DEBUG: AtomicRef<fn(Span, &mut fmt::Formatter<'_>) -> fmt::Result> = + AtomicRef::new(&(default_span_debug as fn(_, &mut fmt::Formatter<'_>) -> _)); + +// _____________________________________________________________________________ +// SpanLinesError, SpanSnippetError, DistinctSources, MalformedSourceMapPositions +// + +pub type FileLinesResult = Result<FileLines, SpanLinesError>; + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SpanLinesError { + DistinctSources(DistinctSources), +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SpanSnippetError { + IllFormedSpan(Span), + DistinctSources(DistinctSources), + MalformedForSourcemap(MalformedSourceMapPositions), + SourceNotAvailable { filename: FileName }, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct DistinctSources { + pub begin: (FileName, BytePos), + pub end: (FileName, BytePos), +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct MalformedSourceMapPositions { + pub name: FileName, + pub source_len: usize, + pub begin_pos: BytePos, + pub end_pos: BytePos, +} + +/// Range inside of a `Span` used for diagnostics when we only have access to relative positions. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct InnerSpan { + pub start: usize, + pub end: usize, +} + +impl InnerSpan { + pub fn new(start: usize, end: usize) -> InnerSpan { + InnerSpan { start, end } + } +} + +// Given a slice of line start positions and a position, returns the index of +// the line the position is on. Returns -1 if the position is located before +// the first line. +fn lookup_line(lines: &[BytePos], pos: BytePos) -> isize { + match lines.binary_search(&pos) { + Ok(line) => line as isize, + Err(line) => line as isize - 1, + } +} + +/// Requirements for a `StableHashingContext` to be used in this crate. +/// This is a hack to allow using the `HashStable_Generic` derive macro +/// instead of implementing everything in librustc_middle. +pub trait HashStableContext { + fn hash_def_id(&mut self, _: DefId, hasher: &mut StableHasher); + fn hash_crate_num(&mut self, _: CrateNum, hasher: &mut StableHasher); + fn hash_spans(&self) -> bool; + fn byte_pos_to_line_and_col( + &mut self, + byte: BytePos, + ) -> Option<(Lrc<SourceFile>, usize, BytePos)>; +} + +impl<CTX> HashStable<CTX> for Span +where + CTX: HashStableContext, +{ + /// Hashes a span in a stable way. We can't directly hash the span's `BytePos` + /// fields (that would be similar to hashing pointers, since those are just + /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column) + /// triple, which stays the same even if the containing `SourceFile` has moved + /// within the `SourceMap`. + /// Also note that we are hashing byte offsets for the column, not unicode + /// codepoint offsets. For the purpose of the hash that's sufficient. + /// Also, hashing filenames is expensive so we avoid doing it twice when the + /// span starts and ends in the same file, which is almost always the case. + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + const TAG_VALID_SPAN: u8 = 0; + const TAG_INVALID_SPAN: u8 = 1; + + if !ctx.hash_spans() { + return; + } + + if *self == DUMMY_SP { + std::hash::Hash::hash(&TAG_INVALID_SPAN, hasher); + return; + } + + // If this is not an empty or invalid span, we want to hash the last + // position that belongs to it, as opposed to hashing the first + // position past it. + let span = self.data(); + let (file_lo, line_lo, col_lo) = match ctx.byte_pos_to_line_and_col(span.lo) { + Some(pos) => pos, + None => { + std::hash::Hash::hash(&TAG_INVALID_SPAN, hasher); + span.ctxt.hash_stable(ctx, hasher); + return; + } + }; + + if !file_lo.contains(span.hi) { + std::hash::Hash::hash(&TAG_INVALID_SPAN, hasher); + span.ctxt.hash_stable(ctx, hasher); + return; + } + + std::hash::Hash::hash(&TAG_VALID_SPAN, hasher); + // We truncate the stable ID hash and line and column numbers. The chances + // of causing a collision this way should be minimal. + std::hash::Hash::hash(&(file_lo.name_hash as u64), hasher); + + let col = (col_lo.0 as u64) & 0xFF; + let line = ((line_lo as u64) & 0xFF_FF_FF) << 8; + let len = ((span.hi - span.lo).0 as u64) << 32; + let line_col_len = col | line | len; + std::hash::Hash::hash(&line_col_len, hasher); + span.ctxt.hash_stable(ctx, hasher); + } +} + +impl<CTX: HashStableContext> HashStable<CTX> for SyntaxContext { + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + const TAG_EXPANSION: u8 = 0; + const TAG_NO_EXPANSION: u8 = 1; + + if *self == SyntaxContext::root() { + TAG_NO_EXPANSION.hash_stable(ctx, hasher); + } else { + TAG_EXPANSION.hash_stable(ctx, hasher); + let (expn_id, transparency) = self.outer_mark(); + expn_id.hash_stable(ctx, hasher); + transparency.hash_stable(ctx, hasher); + } + } +} + +impl<CTX: HashStableContext> HashStable<CTX> for ExpnId { + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + // Since the same expansion context is usually referenced many + // times, we cache a stable hash of it and hash that instead of + // recursing every time. + thread_local! { + static CACHE: RefCell<Vec<Option<Fingerprint>>> = Default::default(); + } + + const TAG_ROOT: u8 = 0; + const TAG_NOT_ROOT: u8 = 1; + + if *self == ExpnId::root() { + TAG_ROOT.hash_stable(ctx, hasher); + return; + } + + TAG_NOT_ROOT.hash_stable(ctx, hasher); + let index = self.as_u32() as usize; + + let res = CACHE.with(|cache| cache.borrow().get(index).copied().flatten()); + + if let Some(res) = res { + res.hash_stable(ctx, hasher); + } else { + let new_len = index + 1; + + let mut sub_hasher = StableHasher::new(); + self.expn_data().hash_stable(ctx, &mut sub_hasher); + let sub_hash: Fingerprint = sub_hasher.finish(); + + CACHE.with(|cache| { + let mut cache = cache.borrow_mut(); + if cache.len() < new_len { + cache.resize(new_len, None); + } + cache[index].replace(sub_hash).expect_none("Cache slot was filled"); + }); + sub_hash.hash_stable(ctx, hasher); + } + } +} diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs new file mode 100644 index 00000000000..7c656db22ed --- /dev/null +++ b/compiler/rustc_span/src/source_map.rs @@ -0,0 +1,1097 @@ +//! The `SourceMap` tracks all the source code used within a single crate, mapping +//! from integer byte positions to the original source code location. Each bit +//! of source parsed during crate parsing (typically files, in-memory strings, +//! or various bits of macro expansion) cover a continuous range of bytes in the +//! `SourceMap` and are represented by `SourceFile`s. Byte positions are stored in +//! `Span` and used pervasively in the compiler. They are absolute positions +//! within the `SourceMap`, which upon request can be converted to line and column +//! information, source code snippets, etc. + +pub use crate::hygiene::{ExpnData, ExpnKind}; +pub use crate::*; + +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::stable_hasher::StableHasher; +use rustc_data_structures::sync::{AtomicU32, Lock, LockGuard, Lrc, MappedLockGuard}; +use std::cmp; +use std::convert::TryFrom; +use std::hash::Hash; +use std::path::{Path, PathBuf}; +use std::sync::atomic::Ordering; + +use std::fs; +use std::io; +use tracing::debug; + +#[cfg(test)] +mod tests; + +/// Returns the span itself if it doesn't come from a macro expansion, +/// otherwise return the call site span up to the `enclosing_sp` by +/// following the `expn_data` chain. +pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span { + let expn_data1 = sp.ctxt().outer_expn_data(); + let expn_data2 = enclosing_sp.ctxt().outer_expn_data(); + if expn_data1.is_root() || !expn_data2.is_root() && expn_data1.call_site == expn_data2.call_site + { + sp + } else { + original_sp(expn_data1.call_site, enclosing_sp) + } +} + +pub mod monotonic { + use std::ops::{Deref, DerefMut}; + + /// A `MonotonicVec` is a `Vec` which can only be grown. + /// Once inserted, an element can never be removed or swapped, + /// guaranteeing that any indices into a `MonotonicVec` are stable + // This is declared in its own module to ensure that the private + // field is inaccessible + pub struct MonotonicVec<T>(Vec<T>); + impl<T> MonotonicVec<T> { + pub fn new(val: Vec<T>) -> MonotonicVec<T> { + MonotonicVec(val) + } + + pub fn push(&mut self, val: T) { + self.0.push(val); + } + } + + impl<T> Default for MonotonicVec<T> { + fn default() -> Self { + MonotonicVec::new(vec![]) + } + } + + impl<T> Deref for MonotonicVec<T> { + type Target = Vec<T>; + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl<T> !DerefMut for MonotonicVec<T> {} +} + +#[derive(Clone, Encodable, Decodable, Debug, Copy, HashStable_Generic)] +pub struct Spanned<T> { + pub node: T, + pub span: Span, +} + +pub fn respan<T>(sp: Span, t: T) -> Spanned<T> { + Spanned { node: t, span: sp } +} + +pub fn dummy_spanned<T>(t: T) -> Spanned<T> { + respan(DUMMY_SP, t) +} + +// _____________________________________________________________________________ +// SourceFile, MultiByteChar, FileName, FileLines +// + +/// An abstraction over the fs operations used by the Parser. +pub trait FileLoader { + /// Query the existence of a file. + fn file_exists(&self, path: &Path) -> bool; + + /// Read the contents of an UTF-8 file into memory. + fn read_file(&self, path: &Path) -> io::Result<String>; +} + +/// A FileLoader that uses std::fs to load real files. +pub struct RealFileLoader; + +impl FileLoader for RealFileLoader { + fn file_exists(&self, path: &Path) -> bool { + fs::metadata(path).is_ok() + } + + fn read_file(&self, path: &Path) -> io::Result<String> { + fs::read_to_string(path) + } +} + +// This is a `SourceFile` identifier that is used to correlate `SourceFile`s between +// subsequent compilation sessions (which is something we need to do during +// incremental compilation). +#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)] +pub struct StableSourceFileId(u128); + +// FIXME: we need a more globally consistent approach to the problem solved by +// StableSourceFileId, perhaps built atop source_file.name_hash. +impl StableSourceFileId { + pub fn new(source_file: &SourceFile) -> StableSourceFileId { + StableSourceFileId::new_from_pieces( + &source_file.name, + source_file.name_was_remapped, + source_file.unmapped_path.as_ref(), + ) + } + + fn new_from_pieces( + name: &FileName, + name_was_remapped: bool, + unmapped_path: Option<&FileName>, + ) -> StableSourceFileId { + let mut hasher = StableHasher::new(); + + if let FileName::Real(real_name) = name { + // rust-lang/rust#70924: Use the stable (virtualized) name when + // available. (We do not want artifacts from transient file system + // paths for libstd to leak into our build artifacts.) + real_name.stable_name().hash(&mut hasher) + } else { + name.hash(&mut hasher); + } + name_was_remapped.hash(&mut hasher); + unmapped_path.hash(&mut hasher); + + StableSourceFileId(hasher.finish()) + } +} + +// _____________________________________________________________________________ +// SourceMap +// + +#[derive(Default)] +pub(super) struct SourceMapFiles { + source_files: monotonic::MonotonicVec<Lrc<SourceFile>>, + stable_id_to_source_file: FxHashMap<StableSourceFileId, Lrc<SourceFile>>, +} + +pub struct SourceMap { + /// The address space below this value is currently used by the files in the source map. + used_address_space: AtomicU32, + + files: Lock<SourceMapFiles>, + file_loader: Box<dyn FileLoader + Sync + Send>, + // This is used to apply the file path remapping as specified via + // `--remap-path-prefix` to all `SourceFile`s allocated within this `SourceMap`. + path_mapping: FilePathMapping, + + /// The algorithm used for hashing the contents of each source file. + hash_kind: SourceFileHashAlgorithm, +} + +impl SourceMap { + pub fn new(path_mapping: FilePathMapping) -> SourceMap { + Self::with_file_loader_and_hash_kind( + Box::new(RealFileLoader), + path_mapping, + SourceFileHashAlgorithm::Md5, + ) + } + + pub fn with_file_loader_and_hash_kind( + file_loader: Box<dyn FileLoader + Sync + Send>, + path_mapping: FilePathMapping, + hash_kind: SourceFileHashAlgorithm, + ) -> SourceMap { + SourceMap { + used_address_space: AtomicU32::new(0), + files: Default::default(), + file_loader, + path_mapping, + hash_kind, + } + } + + pub fn path_mapping(&self) -> &FilePathMapping { + &self.path_mapping + } + + pub fn file_exists(&self, path: &Path) -> bool { + self.file_loader.file_exists(path) + } + + pub fn load_file(&self, path: &Path) -> io::Result<Lrc<SourceFile>> { + let src = self.file_loader.read_file(path)?; + let filename = path.to_owned().into(); + Ok(self.new_source_file(filename, src)) + } + + /// Loads source file as a binary blob. + /// + /// Unlike `load_file`, guarantees that no normalization like BOM-removal + /// takes place. + pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> { + // Ideally, this should use `self.file_loader`, but it can't + // deal with binary files yet. + let bytes = fs::read(path)?; + + // We need to add file to the `SourceMap`, so that it is present + // in dep-info. There's also an edge case that file might be both + // loaded as a binary via `include_bytes!` and as proper `SourceFile` + // via `mod`, so we try to use real file contents and not just an + // empty string. + let text = std::str::from_utf8(&bytes).unwrap_or("").to_string(); + self.new_source_file(path.to_owned().into(), text); + Ok(bytes) + } + + // By returning a `MonotonicVec`, we ensure that consumers cannot invalidate + // any existing indices pointing into `files`. + pub fn files(&self) -> MappedLockGuard<'_, monotonic::MonotonicVec<Lrc<SourceFile>>> { + LockGuard::map(self.files.borrow(), |files| &mut files.source_files) + } + + pub fn source_file_by_stable_id( + &self, + stable_id: StableSourceFileId, + ) -> Option<Lrc<SourceFile>> { + self.files.borrow().stable_id_to_source_file.get(&stable_id).cloned() + } + + fn allocate_address_space(&self, size: usize) -> Result<usize, OffsetOverflowError> { + let size = u32::try_from(size).map_err(|_| OffsetOverflowError)?; + + loop { + let current = self.used_address_space.load(Ordering::Relaxed); + let next = current + .checked_add(size) + // Add one so there is some space between files. This lets us distinguish + // positions in the `SourceMap`, even in the presence of zero-length files. + .and_then(|next| next.checked_add(1)) + .ok_or(OffsetOverflowError)?; + + if self + .used_address_space + .compare_exchange(current, next, Ordering::Relaxed, Ordering::Relaxed) + .is_ok() + { + return Ok(usize::try_from(current).unwrap()); + } + } + } + + /// Creates a new `SourceFile`. + /// If a file already exists in the `SourceMap` with the same ID, that file is returned + /// unmodified. + pub fn new_source_file(&self, filename: FileName, src: String) -> Lrc<SourceFile> { + self.try_new_source_file(filename, src).unwrap_or_else(|OffsetOverflowError| { + eprintln!("fatal error: rustc does not support files larger than 4GB"); + crate::fatal_error::FatalError.raise() + }) + } + + fn try_new_source_file( + &self, + mut filename: FileName, + src: String, + ) -> Result<Lrc<SourceFile>, OffsetOverflowError> { + // The path is used to determine the directory for loading submodules and + // include files, so it must be before remapping. + // Note that filename may not be a valid path, eg it may be `<anon>` etc, + // but this is okay because the directory determined by `path.pop()` will + // be empty, so the working directory will be used. + let unmapped_path = filename.clone(); + + let was_remapped; + if let FileName::Real(real_filename) = &mut filename { + match real_filename { + RealFileName::Named(path_to_be_remapped) + | RealFileName::Devirtualized { + local_path: path_to_be_remapped, + virtual_name: _, + } => { + let mapped = self.path_mapping.map_prefix(path_to_be_remapped.clone()); + was_remapped = mapped.1; + *path_to_be_remapped = mapped.0; + } + } + } else { + was_remapped = false; + } + + let file_id = + StableSourceFileId::new_from_pieces(&filename, was_remapped, Some(&unmapped_path)); + + let lrc_sf = match self.source_file_by_stable_id(file_id) { + Some(lrc_sf) => lrc_sf, + None => { + let start_pos = self.allocate_address_space(src.len())?; + + let source_file = Lrc::new(SourceFile::new( + filename, + was_remapped, + unmapped_path, + src, + Pos::from_usize(start_pos), + self.hash_kind, + )); + + let mut files = self.files.borrow_mut(); + + files.source_files.push(source_file.clone()); + files.stable_id_to_source_file.insert(file_id, source_file.clone()); + + source_file + } + }; + Ok(lrc_sf) + } + + /// Allocates a new `SourceFile` representing a source file from an external + /// crate. The source code of such an "imported `SourceFile`" is not available, + /// but we still know enough to generate accurate debuginfo location + /// information for things inlined from other crates. + pub fn new_imported_source_file( + &self, + filename: FileName, + name_was_remapped: bool, + src_hash: SourceFileHash, + name_hash: u128, + source_len: usize, + cnum: CrateNum, + mut file_local_lines: Vec<BytePos>, + mut file_local_multibyte_chars: Vec<MultiByteChar>, + mut file_local_non_narrow_chars: Vec<NonNarrowChar>, + mut file_local_normalized_pos: Vec<NormalizedPos>, + original_start_pos: BytePos, + original_end_pos: BytePos, + ) -> Lrc<SourceFile> { + let start_pos = self + .allocate_address_space(source_len) + .expect("not enough address space for imported source file"); + + let end_pos = Pos::from_usize(start_pos + source_len); + let start_pos = Pos::from_usize(start_pos); + + for pos in &mut file_local_lines { + *pos = *pos + start_pos; + } + + for mbc in &mut file_local_multibyte_chars { + mbc.pos = mbc.pos + start_pos; + } + + for swc in &mut file_local_non_narrow_chars { + *swc = *swc + start_pos; + } + + for nc in &mut file_local_normalized_pos { + nc.pos = nc.pos + start_pos; + } + + let source_file = Lrc::new(SourceFile { + name: filename, + name_was_remapped, + unmapped_path: None, + src: None, + src_hash, + external_src: Lock::new(ExternalSource::Foreign { + kind: ExternalSourceKind::AbsentOk, + original_start_pos, + original_end_pos, + }), + start_pos, + end_pos, + lines: file_local_lines, + multibyte_chars: file_local_multibyte_chars, + non_narrow_chars: file_local_non_narrow_chars, + normalized_pos: file_local_normalized_pos, + name_hash, + cnum, + }); + + let mut files = self.files.borrow_mut(); + + files.source_files.push(source_file.clone()); + files + .stable_id_to_source_file + .insert(StableSourceFileId::new(&source_file), source_file.clone()); + + source_file + } + + pub fn mk_substr_filename(&self, sp: Span) -> String { + let pos = self.lookup_char_pos(sp.lo()); + format!("<{}:{}:{}>", pos.file.name, pos.line, pos.col.to_usize() + 1) + } + + // If there is a doctest offset, applies it to the line. + pub fn doctest_offset_line(&self, file: &FileName, orig: usize) -> usize { + match file { + FileName::DocTest(_, offset) => { + if *offset < 0 { + orig - (-(*offset)) as usize + } else { + orig + *offset as usize + } + } + _ => orig, + } + } + + /// Looks up source information about a `BytePos`. + pub fn lookup_char_pos(&self, pos: BytePos) -> Loc { + let chpos = self.bytepos_to_file_charpos(pos); + match self.lookup_line(pos) { + Ok(SourceFileAndLine { sf: f, line: a }) => { + let line = a + 1; // Line numbers start at 1 + let linebpos = f.lines[a]; + let linechpos = self.bytepos_to_file_charpos(linebpos); + let col = chpos - linechpos; + + let col_display = { + let start_width_idx = f + .non_narrow_chars + .binary_search_by_key(&linebpos, |x| x.pos()) + .unwrap_or_else(|x| x); + let end_width_idx = f + .non_narrow_chars + .binary_search_by_key(&pos, |x| x.pos()) + .unwrap_or_else(|x| x); + let special_chars = end_width_idx - start_width_idx; + let non_narrow: usize = f.non_narrow_chars[start_width_idx..end_width_idx] + .iter() + .map(|x| x.width()) + .sum(); + col.0 - special_chars + non_narrow + }; + debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos); + debug!("char pos {:?} is on the line at char pos {:?}", chpos, linechpos); + debug!("byte is on line: {}", line); + assert!(chpos >= linechpos); + Loc { file: f, line, col, col_display } + } + Err(f) => { + let col_display = { + let end_width_idx = f + .non_narrow_chars + .binary_search_by_key(&pos, |x| x.pos()) + .unwrap_or_else(|x| x); + let non_narrow: usize = + f.non_narrow_chars[0..end_width_idx].iter().map(|x| x.width()).sum(); + chpos.0 - end_width_idx + non_narrow + }; + Loc { file: f, line: 0, col: chpos, col_display } + } + } + } + + // If the corresponding `SourceFile` is empty, does not return a line number. + pub fn lookup_line(&self, pos: BytePos) -> Result<SourceFileAndLine, Lrc<SourceFile>> { + let idx = self.lookup_source_file_idx(pos); + + let f = (*self.files.borrow().source_files)[idx].clone(); + + match f.lookup_line(pos) { + Some(line) => Ok(SourceFileAndLine { sf: f, line }), + None => Err(f), + } + } + + /// Returns `Some(span)`, a union of the LHS and RHS span. The LHS must precede the RHS. If + /// there are gaps between LHS and RHS, the resulting union will cross these gaps. + /// For this to work, + /// + /// * the syntax contexts of both spans much match, + /// * the LHS span needs to end on the same line the RHS span begins, + /// * the LHS span must start at or before the RHS span. + pub fn merge_spans(&self, sp_lhs: Span, sp_rhs: Span) -> Option<Span> { + // Ensure we're at the same expansion ID. + if sp_lhs.ctxt() != sp_rhs.ctxt() { + return None; + } + + let lhs_end = match self.lookup_line(sp_lhs.hi()) { + Ok(x) => x, + Err(_) => return None, + }; + let rhs_begin = match self.lookup_line(sp_rhs.lo()) { + Ok(x) => x, + Err(_) => return None, + }; + + // If we must cross lines to merge, don't merge. + if lhs_end.line != rhs_begin.line { + return None; + } + + // Ensure these follow the expected order and that we don't overlap. + if (sp_lhs.lo() <= sp_rhs.lo()) && (sp_lhs.hi() <= sp_rhs.lo()) { + Some(sp_lhs.to(sp_rhs)) + } else { + None + } + } + + pub fn span_to_string(&self, sp: Span) -> String { + if self.files.borrow().source_files.is_empty() && sp.is_dummy() { + return "no-location".to_string(); + } + + let lo = self.lookup_char_pos(sp.lo()); + let hi = self.lookup_char_pos(sp.hi()); + format!( + "{}:{}:{}: {}:{}", + lo.file.name, + lo.line, + lo.col.to_usize() + 1, + hi.line, + hi.col.to_usize() + 1, + ) + } + + pub fn span_to_filename(&self, sp: Span) -> FileName { + self.lookup_char_pos(sp.lo()).file.name.clone() + } + + pub fn span_to_unmapped_path(&self, sp: Span) -> FileName { + self.lookup_char_pos(sp.lo()) + .file + .unmapped_path + .clone() + .expect("`SourceMap::span_to_unmapped_path` called for imported `SourceFile`?") + } + + pub fn is_multiline(&self, sp: Span) -> bool { + let lo = self.lookup_char_pos(sp.lo()); + let hi = self.lookup_char_pos(sp.hi()); + lo.line != hi.line + } + + pub fn is_valid_span(&self, sp: Span) -> Result<(Loc, Loc), SpanLinesError> { + let lo = self.lookup_char_pos(sp.lo()); + debug!("span_to_lines: lo={:?}", lo); + let hi = self.lookup_char_pos(sp.hi()); + debug!("span_to_lines: hi={:?}", hi); + if lo.file.start_pos != hi.file.start_pos { + return Err(SpanLinesError::DistinctSources(DistinctSources { + begin: (lo.file.name.clone(), lo.file.start_pos), + end: (hi.file.name.clone(), hi.file.start_pos), + })); + } + Ok((lo, hi)) + } + + pub fn is_line_before_span_empty(&self, sp: Span) -> bool { + match self.span_to_prev_source(sp) { + Ok(s) => s.split('\n').last().map(|l| l.trim_start().is_empty()).unwrap_or(false), + Err(_) => false, + } + } + + pub fn span_to_lines(&self, sp: Span) -> FileLinesResult { + debug!("span_to_lines(sp={:?})", sp); + let (lo, hi) = self.is_valid_span(sp)?; + assert!(hi.line >= lo.line); + + if sp.is_dummy() { + return Ok(FileLines { file: lo.file, lines: Vec::new() }); + } + + let mut lines = Vec::with_capacity(hi.line - lo.line + 1); + + // The span starts partway through the first line, + // but after that it starts from offset 0. + let mut start_col = lo.col; + + // For every line but the last, it extends from `start_col` + // and to the end of the line. Be careful because the line + // numbers in Loc are 1-based, so we subtract 1 to get 0-based + // lines. + // + // FIXME: now that we handle DUMMY_SP up above, we should consider + // asserting that the line numbers here are all indeed 1-based. + let hi_line = hi.line.saturating_sub(1); + for line_index in lo.line.saturating_sub(1)..hi_line { + let line_len = lo.file.get_line(line_index).map(|s| s.chars().count()).unwrap_or(0); + lines.push(LineInfo { line_index, start_col, end_col: CharPos::from_usize(line_len) }); + start_col = CharPos::from_usize(0); + } + + // For the last line, it extends from `start_col` to `hi.col`: + lines.push(LineInfo { line_index: hi_line, start_col, end_col: hi.col }); + + Ok(FileLines { file: lo.file, lines }) + } + + /// Extracts the source surrounding the given `Span` using the `extract_source` function. The + /// extract function takes three arguments: a string slice containing the source, an index in + /// the slice for the beginning of the span and an index in the slice for the end of the span. + fn span_to_source<F>(&self, sp: Span, extract_source: F) -> Result<String, SpanSnippetError> + where + F: Fn(&str, usize, usize) -> Result<String, SpanSnippetError>, + { + let local_begin = self.lookup_byte_offset(sp.lo()); + let local_end = self.lookup_byte_offset(sp.hi()); + + if local_begin.sf.start_pos != local_end.sf.start_pos { + Err(SpanSnippetError::DistinctSources(DistinctSources { + begin: (local_begin.sf.name.clone(), local_begin.sf.start_pos), + end: (local_end.sf.name.clone(), local_end.sf.start_pos), + })) + } else { + self.ensure_source_file_source_present(local_begin.sf.clone()); + + let start_index = local_begin.pos.to_usize(); + let end_index = local_end.pos.to_usize(); + let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); + + if start_index > end_index || end_index > source_len { + return Err(SpanSnippetError::MalformedForSourcemap(MalformedSourceMapPositions { + name: local_begin.sf.name.clone(), + source_len, + begin_pos: local_begin.pos, + end_pos: local_end.pos, + })); + } + + if let Some(ref src) = local_begin.sf.src { + extract_source(src, start_index, end_index) + } else if let Some(src) = local_begin.sf.external_src.borrow().get_source() { + extract_source(src, start_index, end_index) + } else { + Err(SpanSnippetError::SourceNotAvailable { filename: local_begin.sf.name.clone() }) + } + } + } + + /// Returns the source snippet as `String` corresponding to the given `Span`. + pub fn span_to_snippet(&self, sp: Span) -> Result<String, SpanSnippetError> { + self.span_to_source(sp, |src, start_index, end_index| { + src.get(start_index..end_index) + .map(|s| s.to_string()) + .ok_or_else(|| SpanSnippetError::IllFormedSpan(sp)) + }) + } + + pub fn span_to_margin(&self, sp: Span) -> Option<usize> { + match self.span_to_prev_source(sp) { + Err(_) => None, + Ok(source) => source + .split('\n') + .last() + .map(|last_line| last_line.len() - last_line.trim_start().len()), + } + } + + /// Returns the source snippet as `String` before the given `Span`. + pub fn span_to_prev_source(&self, sp: Span) -> Result<String, SpanSnippetError> { + self.span_to_source(sp, |src, start_index, _| { + src.get(..start_index) + .map(|s| s.to_string()) + .ok_or_else(|| SpanSnippetError::IllFormedSpan(sp)) + }) + } + + /// Extends the given `Span` to just after the previous occurrence of `c`. Return the same span + /// if no character could be found or if an error occurred while retrieving the code snippet. + pub fn span_extend_to_prev_char(&self, sp: Span, c: char) -> Span { + if let Ok(prev_source) = self.span_to_prev_source(sp) { + let prev_source = prev_source.rsplit(c).next().unwrap_or("").trim_start(); + if !prev_source.is_empty() && !prev_source.contains('\n') { + return sp.with_lo(BytePos(sp.lo().0 - prev_source.len() as u32)); + } + } + + sp + } + + /// Extends the given `Span` to just after the previous occurrence of `pat` when surrounded by + /// whitespace. Returns the same span if no character could be found or if an error occurred + /// while retrieving the code snippet. + pub fn span_extend_to_prev_str(&self, sp: Span, pat: &str, accept_newlines: bool) -> Span { + // assure that the pattern is delimited, to avoid the following + // fn my_fn() + // ^^^^ returned span without the check + // ---------- correct span + for ws in &[" ", "\t", "\n"] { + let pat = pat.to_owned() + ws; + if let Ok(prev_source) = self.span_to_prev_source(sp) { + let prev_source = prev_source.rsplit(&pat).next().unwrap_or("").trim_start(); + if !prev_source.is_empty() && (!prev_source.contains('\n') || accept_newlines) { + return sp.with_lo(BytePos(sp.lo().0 - prev_source.len() as u32)); + } + } + } + + sp + } + + /// Given a `Span`, tries to get a shorter span ending before the first occurrence of `char` + /// `c`. + pub fn span_until_char(&self, sp: Span, c: char) -> Span { + match self.span_to_snippet(sp) { + Ok(snippet) => { + let snippet = snippet.split(c).next().unwrap_or("").trim_end(); + if !snippet.is_empty() && !snippet.contains('\n') { + sp.with_hi(BytePos(sp.lo().0 + snippet.len() as u32)) + } else { + sp + } + } + _ => sp, + } + } + + /// Given a `Span`, tries to get a shorter span ending just after the first occurrence of `char` + /// `c`. + pub fn span_through_char(&self, sp: Span, c: char) -> Span { + if let Ok(snippet) = self.span_to_snippet(sp) { + if let Some(offset) = snippet.find(c) { + return sp.with_hi(BytePos(sp.lo().0 + (offset + c.len_utf8()) as u32)); + } + } + sp + } + + /// Given a `Span`, gets a new `Span` covering the first token and all its trailing whitespace + /// or the original `Span`. + /// + /// If `sp` points to `"let mut x"`, then a span pointing at `"let "` will be returned. + pub fn span_until_non_whitespace(&self, sp: Span) -> Span { + let mut whitespace_found = false; + + self.span_take_while(sp, |c| { + if !whitespace_found && c.is_whitespace() { + whitespace_found = true; + } + + !whitespace_found || c.is_whitespace() + }) + } + + /// Given a `Span`, gets a new `Span` covering the first token without its trailing whitespace + /// or the original `Span` in case of error. + /// + /// If `sp` points to `"let mut x"`, then a span pointing at `"let"` will be returned. + pub fn span_until_whitespace(&self, sp: Span) -> Span { + self.span_take_while(sp, |c| !c.is_whitespace()) + } + + /// Given a `Span`, gets a shorter one until `predicate` yields `false`. + pub fn span_take_while<P>(&self, sp: Span, predicate: P) -> Span + where + P: for<'r> FnMut(&'r char) -> bool, + { + if let Ok(snippet) = self.span_to_snippet(sp) { + let offset = snippet.chars().take_while(predicate).map(|c| c.len_utf8()).sum::<usize>(); + + sp.with_hi(BytePos(sp.lo().0 + (offset as u32))) + } else { + sp + } + } + + /// Given a `Span`, return a span ending in the closest `{`. This is useful when you have a + /// `Span` enclosing a whole item but we need to point at only the head (usually the first + /// line) of that item. + /// + /// *Only suitable for diagnostics.* + pub fn guess_head_span(&self, sp: Span) -> Span { + // FIXME: extend the AST items to have a head span, or replace callers with pointing at + // the item's ident when appropriate. + self.span_until_char(sp, '{') + } + + /// Returns a new span representing just the start point of this span. + pub fn start_point(&self, sp: Span) -> Span { + let pos = sp.lo().0; + let width = self.find_width_of_character_at_span(sp, false); + let corrected_start_position = pos.checked_add(width).unwrap_or(pos); + let end_point = BytePos(cmp::max(corrected_start_position, sp.lo().0)); + sp.with_hi(end_point) + } + + /// Returns a new span representing just the end point of this span. + pub fn end_point(&self, sp: Span) -> Span { + let pos = sp.hi().0; + + let width = self.find_width_of_character_at_span(sp, false); + let corrected_end_position = pos.checked_sub(width).unwrap_or(pos); + + let end_point = BytePos(cmp::max(corrected_end_position, sp.lo().0)); + sp.with_lo(end_point) + } + + /// Returns a new span representing the next character after the end-point of this span. + pub fn next_point(&self, sp: Span) -> Span { + let start_of_next_point = sp.hi().0; + + let width = self.find_width_of_character_at_span(sp.shrink_to_hi(), true); + // If the width is 1, then the next span should point to the same `lo` and `hi`. However, + // in the case of a multibyte character, where the width != 1, the next span should + // span multiple bytes to include the whole character. + let end_of_next_point = + start_of_next_point.checked_add(width - 1).unwrap_or(start_of_next_point); + + let end_of_next_point = BytePos(cmp::max(sp.lo().0 + 1, end_of_next_point)); + Span::new(BytePos(start_of_next_point), end_of_next_point, sp.ctxt()) + } + + /// Finds the width of a character, either before or after the provided span. + fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 { + let sp = sp.data(); + if sp.lo == sp.hi { + debug!("find_width_of_character_at_span: early return empty span"); + return 1; + } + + let local_begin = self.lookup_byte_offset(sp.lo); + let local_end = self.lookup_byte_offset(sp.hi); + debug!( + "find_width_of_character_at_span: local_begin=`{:?}`, local_end=`{:?}`", + local_begin, local_end + ); + + if local_begin.sf.start_pos != local_end.sf.start_pos { + debug!("find_width_of_character_at_span: begin and end are in different files"); + return 1; + } + + let start_index = local_begin.pos.to_usize(); + let end_index = local_end.pos.to_usize(); + debug!( + "find_width_of_character_at_span: start_index=`{:?}`, end_index=`{:?}`", + start_index, end_index + ); + + // Disregard indexes that are at the start or end of their spans, they can't fit bigger + // characters. + if (!forwards && end_index == usize::MIN) || (forwards && start_index == usize::MAX) { + debug!("find_width_of_character_at_span: start or end of span, cannot be multibyte"); + return 1; + } + + let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); + debug!("find_width_of_character_at_span: source_len=`{:?}`", source_len); + // Ensure indexes are also not malformed. + if start_index > end_index || end_index > source_len { + debug!("find_width_of_character_at_span: source indexes are malformed"); + return 1; + } + + let src = local_begin.sf.external_src.borrow(); + + // We need to extend the snippet to the end of the src rather than to end_index so when + // searching forwards for boundaries we've got somewhere to search. + let snippet = if let Some(ref src) = local_begin.sf.src { + let len = src.len(); + &src[start_index..len] + } else if let Some(src) = src.get_source() { + let len = src.len(); + &src[start_index..len] + } else { + return 1; + }; + debug!("find_width_of_character_at_span: snippet=`{:?}`", snippet); + + let mut target = if forwards { end_index + 1 } else { end_index - 1 }; + debug!("find_width_of_character_at_span: initial target=`{:?}`", target); + + while !snippet.is_char_boundary(target - start_index) && target < source_len { + target = if forwards { + target + 1 + } else { + match target.checked_sub(1) { + Some(target) => target, + None => { + break; + } + } + }; + debug!("find_width_of_character_at_span: target=`{:?}`", target); + } + debug!("find_width_of_character_at_span: final target=`{:?}`", target); + + if forwards { (target - end_index) as u32 } else { (end_index - target) as u32 } + } + + pub fn get_source_file(&self, filename: &FileName) -> Option<Lrc<SourceFile>> { + for sf in self.files.borrow().source_files.iter() { + if *filename == sf.name { + return Some(sf.clone()); + } + } + None + } + + /// For a global `BytePos`, computes the local offset within the containing `SourceFile`. + pub fn lookup_byte_offset(&self, bpos: BytePos) -> SourceFileAndBytePos { + let idx = self.lookup_source_file_idx(bpos); + let sf = (*self.files.borrow().source_files)[idx].clone(); + let offset = bpos - sf.start_pos; + SourceFileAndBytePos { sf, pos: offset } + } + + /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`. + pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { + let idx = self.lookup_source_file_idx(bpos); + let map = &(*self.files.borrow().source_files)[idx]; + + // The number of extra bytes due to multibyte chars in the `SourceFile`. + let mut total_extra_bytes = 0; + + for mbc in map.multibyte_chars.iter() { + debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos); + if mbc.pos < bpos { + // Every character is at least one byte, so we only + // count the actual extra bytes. + total_extra_bytes += mbc.bytes as u32 - 1; + // We should never see a byte position in the middle of a + // character. + assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32); + } else { + break; + } + } + + assert!(map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32()); + CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes as usize) + } + + // Returns the index of the `SourceFile` (in `self.files`) that contains `pos`. + // This index is guaranteed to be valid for the lifetime of this `SourceMap`, + // since `source_files` is a `MonotonicVec` + pub fn lookup_source_file_idx(&self, pos: BytePos) -> usize { + self.files + .borrow() + .source_files + .binary_search_by_key(&pos, |key| key.start_pos) + .unwrap_or_else(|p| p - 1) + } + + pub fn count_lines(&self) -> usize { + self.files().iter().fold(0, |a, f| a + f.count_lines()) + } + + pub fn generate_fn_name_span(&self, span: Span) -> Option<Span> { + let prev_span = self.span_extend_to_prev_str(span, "fn", true); + if let Ok(snippet) = self.span_to_snippet(prev_span) { + debug!( + "generate_fn_name_span: span={:?}, prev_span={:?}, snippet={:?}", + span, prev_span, snippet + ); + + if snippet.is_empty() { + return None; + }; + + let len = snippet + .find(|c: char| !c.is_alphanumeric() && c != '_') + .expect("no label after fn"); + Some(prev_span.with_hi(BytePos(prev_span.lo().0 + len as u32))) + } else { + None + } + } + + /// Takes the span of a type parameter in a function signature and try to generate a span for + /// the function name (with generics) and a new snippet for this span with the pointed type + /// parameter as a new local type parameter. + /// + /// For instance: + /// ```rust,ignore (pseudo-Rust) + /// // Given span + /// fn my_function(param: T) + /// // ^ Original span + /// + /// // Result + /// fn my_function(param: T) + /// // ^^^^^^^^^^^ Generated span with snippet `my_function<T>` + /// ``` + /// + /// Attention: The method used is very fragile since it essentially duplicates the work of the + /// parser. If you need to use this function or something similar, please consider updating the + /// `SourceMap` functions and this function to something more robust. + pub fn generate_local_type_param_snippet(&self, span: Span) -> Option<(Span, String)> { + // Try to extend the span to the previous "fn" keyword to retrieve the function + // signature. + let sugg_span = self.span_extend_to_prev_str(span, "fn", false); + if sugg_span != span { + if let Ok(snippet) = self.span_to_snippet(sugg_span) { + // Consume the function name. + let mut offset = snippet + .find(|c: char| !c.is_alphanumeric() && c != '_') + .expect("no label after fn"); + + // Consume the generics part of the function signature. + let mut bracket_counter = 0; + let mut last_char = None; + for c in snippet[offset..].chars() { + match c { + '<' => bracket_counter += 1, + '>' => bracket_counter -= 1, + '(' => { + if bracket_counter == 0 { + break; + } + } + _ => {} + } + offset += c.len_utf8(); + last_char = Some(c); + } + + // Adjust the suggestion span to encompass the function name with its generics. + let sugg_span = sugg_span.with_hi(BytePos(sugg_span.lo().0 + offset as u32)); + + // Prepare the new suggested snippet to append the type parameter that triggered + // the error in the generics of the function signature. + let mut new_snippet = if last_char == Some('>') { + format!("{}, ", &snippet[..(offset - '>'.len_utf8())]) + } else { + format!("{}<", &snippet[..offset]) + }; + new_snippet + .push_str(&self.span_to_snippet(span).unwrap_or_else(|_| "T".to_string())); + new_snippet.push('>'); + + return Some((sugg_span, new_snippet)); + } + } + + None + } + pub fn ensure_source_file_source_present(&self, source_file: Lrc<SourceFile>) -> bool { + source_file.add_external_src(|| match source_file.name { + FileName::Real(ref name) => self.file_loader.read_file(name.local_path()).ok(), + _ => None, + }) + } + + pub fn is_imported(&self, sp: Span) -> bool { + let source_file_index = self.lookup_source_file_idx(sp.lo()); + let source_file = &self.files()[source_file_index]; + source_file.is_imported() + } +} + +#[derive(Clone)] +pub struct FilePathMapping { + mapping: Vec<(PathBuf, PathBuf)>, +} + +impl FilePathMapping { + pub fn empty() -> FilePathMapping { + FilePathMapping { mapping: vec![] } + } + + pub fn new(mapping: Vec<(PathBuf, PathBuf)>) -> FilePathMapping { + FilePathMapping { mapping } + } + + /// Applies any path prefix substitution as defined by the mapping. + /// The return value is the remapped path and a boolean indicating whether + /// the path was affected by the mapping. + pub fn map_prefix(&self, path: PathBuf) -> (PathBuf, bool) { + // NOTE: We are iterating over the mapping entries from last to first + // because entries specified later on the command line should + // take precedence. + for &(ref from, ref to) in self.mapping.iter().rev() { + if let Ok(rest) = path.strip_prefix(from) { + return (to.join(rest), true); + } + } + + (path, false) + } +} diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs new file mode 100644 index 00000000000..b8459eee4ec --- /dev/null +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -0,0 +1,272 @@ +use super::*; + +use rustc_data_structures::sync::Lrc; + +fn init_source_map() -> SourceMap { + let sm = SourceMap::new(FilePathMapping::empty()); + sm.new_source_file(PathBuf::from("blork.rs").into(), "first line.\nsecond line".to_string()); + sm.new_source_file(PathBuf::from("empty.rs").into(), String::new()); + sm.new_source_file(PathBuf::from("blork2.rs").into(), "first line.\nsecond line".to_string()); + sm +} + +/// Tests `lookup_byte_offset`. +#[test] +fn t3() { + let sm = init_source_map(); + + let srcfbp1 = sm.lookup_byte_offset(BytePos(23)); + assert_eq!(srcfbp1.sf.name, PathBuf::from("blork.rs").into()); + assert_eq!(srcfbp1.pos, BytePos(23)); + + let srcfbp1 = sm.lookup_byte_offset(BytePos(24)); + assert_eq!(srcfbp1.sf.name, PathBuf::from("empty.rs").into()); + assert_eq!(srcfbp1.pos, BytePos(0)); + + let srcfbp2 = sm.lookup_byte_offset(BytePos(25)); + assert_eq!(srcfbp2.sf.name, PathBuf::from("blork2.rs").into()); + assert_eq!(srcfbp2.pos, BytePos(0)); +} + +/// Tests `bytepos_to_file_charpos`. +#[test] +fn t4() { + let sm = init_source_map(); + + let cp1 = sm.bytepos_to_file_charpos(BytePos(22)); + assert_eq!(cp1, CharPos(22)); + + let cp2 = sm.bytepos_to_file_charpos(BytePos(25)); + assert_eq!(cp2, CharPos(0)); +} + +/// Tests zero-length `SourceFile`s. +#[test] +fn t5() { + let sm = init_source_map(); + + let loc1 = sm.lookup_char_pos(BytePos(22)); + assert_eq!(loc1.file.name, PathBuf::from("blork.rs").into()); + assert_eq!(loc1.line, 2); + assert_eq!(loc1.col, CharPos(10)); + + let loc2 = sm.lookup_char_pos(BytePos(25)); + assert_eq!(loc2.file.name, PathBuf::from("blork2.rs").into()); + assert_eq!(loc2.line, 1); + assert_eq!(loc2.col, CharPos(0)); +} + +fn init_source_map_mbc() -> SourceMap { + let sm = SourceMap::new(FilePathMapping::empty()); + // "€" is a three-byte UTF8 char. + sm.new_source_file( + PathBuf::from("blork.rs").into(), + "fir€st €€€€ line.\nsecond line".to_string(), + ); + sm.new_source_file( + PathBuf::from("blork2.rs").into(), + "first line€€.\n€ second line".to_string(), + ); + sm +} + +/// Tests `bytepos_to_file_charpos` in the presence of multi-byte chars. +#[test] +fn t6() { + let sm = init_source_map_mbc(); + + let cp1 = sm.bytepos_to_file_charpos(BytePos(3)); + assert_eq!(cp1, CharPos(3)); + + let cp2 = sm.bytepos_to_file_charpos(BytePos(6)); + assert_eq!(cp2, CharPos(4)); + + let cp3 = sm.bytepos_to_file_charpos(BytePos(56)); + assert_eq!(cp3, CharPos(12)); + + let cp4 = sm.bytepos_to_file_charpos(BytePos(61)); + assert_eq!(cp4, CharPos(15)); +} + +/// Test `span_to_lines` for a span ending at the end of a `SourceFile`. +#[test] +fn t7() { + let sm = init_source_map(); + let span = Span::with_root_ctxt(BytePos(12), BytePos(23)); + let file_lines = sm.span_to_lines(span).unwrap(); + + assert_eq!(file_lines.file.name, PathBuf::from("blork.rs").into()); + assert_eq!(file_lines.lines.len(), 1); + assert_eq!(file_lines.lines[0].line_index, 1); +} + +/// Given a string like " ~~~~~~~~~~~~ ", produces a span +/// converting that range. The idea is that the string has the same +/// length as the input, and we uncover the byte positions. Note +/// that this can span lines and so on. +fn span_from_selection(input: &str, selection: &str) -> Span { + assert_eq!(input.len(), selection.len()); + let left_index = selection.find('~').unwrap() as u32; + let right_index = selection.rfind('~').map(|x| x as u32).unwrap_or(left_index); + Span::with_root_ctxt(BytePos(left_index), BytePos(right_index + 1)) +} + +/// Tests `span_to_snippet` and `span_to_lines` for a span converting 3 +/// lines in the middle of a file. +#[test] +fn span_to_snippet_and_lines_spanning_multiple_lines() { + let sm = SourceMap::new(FilePathMapping::empty()); + let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n"; + let selection = " \n ~~\n~~~\n~~~~~ \n \n"; + sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_string()); + let span = span_from_selection(inputtext, selection); + + // Check that we are extracting the text we thought we were extracting. + assert_eq!(&sm.span_to_snippet(span).unwrap(), "BB\nCCC\nDDDDD"); + + // Check that span_to_lines gives us the complete result with the lines/cols we expected. + let lines = sm.span_to_lines(span).unwrap(); + let expected = vec![ + LineInfo { line_index: 1, start_col: CharPos(4), end_col: CharPos(6) }, + LineInfo { line_index: 2, start_col: CharPos(0), end_col: CharPos(3) }, + LineInfo { line_index: 3, start_col: CharPos(0), end_col: CharPos(5) }, + ]; + assert_eq!(lines.lines, expected); +} + +/// Test span_to_snippet for a span ending at the end of a `SourceFile`. +#[test] +fn t8() { + let sm = init_source_map(); + let span = Span::with_root_ctxt(BytePos(12), BytePos(23)); + let snippet = sm.span_to_snippet(span); + + assert_eq!(snippet, Ok("second line".to_string())); +} + +/// Test `span_to_str` for a span ending at the end of a `SourceFile`. +#[test] +fn t9() { + let sm = init_source_map(); + let span = Span::with_root_ctxt(BytePos(12), BytePos(23)); + let sstr = sm.span_to_string(span); + + assert_eq!(sstr, "blork.rs:2:1: 2:12"); +} + +/// Tests failing to merge two spans on different lines. +#[test] +fn span_merging_fail() { + let sm = SourceMap::new(FilePathMapping::empty()); + let inputtext = "bbbb BB\ncc CCC\n"; + let selection1 = " ~~\n \n"; + let selection2 = " \n ~~~\n"; + sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_owned()); + let span1 = span_from_selection(inputtext, selection1); + let span2 = span_from_selection(inputtext, selection2); + + assert!(sm.merge_spans(span1, span2).is_none()); +} + +/// Tests loading an external source file that requires normalization. +#[test] +fn t10() { + let sm = SourceMap::new(FilePathMapping::empty()); + let unnormalized = "first line.\r\nsecond line"; + let normalized = "first line.\nsecond line"; + + let src_file = sm.new_source_file(PathBuf::from("blork.rs").into(), unnormalized.to_string()); + + assert_eq!(src_file.src.as_ref().unwrap().as_ref(), normalized); + assert!( + src_file.src_hash.matches(unnormalized), + "src_hash should use the source before normalization" + ); + + let SourceFile { + name, + name_was_remapped, + src_hash, + start_pos, + end_pos, + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + name_hash, + .. + } = (*src_file).clone(); + + let imported_src_file = sm.new_imported_source_file( + name, + name_was_remapped, + src_hash, + name_hash, + (end_pos - start_pos).to_usize(), + CrateNum::new(0), + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + start_pos, + end_pos, + ); + + assert!( + imported_src_file.external_src.borrow().get_source().is_none(), + "imported source file should not have source yet" + ); + imported_src_file.add_external_src(|| Some(unnormalized.to_string())); + assert_eq!( + imported_src_file.external_src.borrow().get_source().unwrap().as_ref(), + normalized, + "imported source file should be normalized" + ); +} + +/// Returns the span corresponding to the `n`th occurrence of `substring` in `source_text`. +trait SourceMapExtension { + fn span_substr( + &self, + file: &Lrc<SourceFile>, + source_text: &str, + substring: &str, + n: usize, + ) -> Span; +} + +impl SourceMapExtension for SourceMap { + fn span_substr( + &self, + file: &Lrc<SourceFile>, + source_text: &str, + substring: &str, + n: usize, + ) -> Span { + println!( + "span_substr(file={:?}/{:?}, substring={:?}, n={})", + file.name, file.start_pos, substring, n + ); + let mut i = 0; + let mut hi = 0; + loop { + let offset = source_text[hi..].find(substring).unwrap_or_else(|| { + panic!( + "source_text `{}` does not have {} occurrences of `{}`, only {}", + source_text, n, substring, i + ); + }); + let lo = hi + offset; + hi = lo + substring.len(); + if i == n { + let span = Span::with_root_ctxt( + BytePos(lo as u32 + file.start_pos.0), + BytePos(hi as u32 + file.start_pos.0), + ); + assert_eq!(&self.span_to_snippet(span).unwrap()[..], substring); + return span; + } + i += 1; + } + } +} diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs new file mode 100644 index 00000000000..b05e01d666b --- /dev/null +++ b/compiler/rustc_span/src/span_encoding.rs @@ -0,0 +1,133 @@ +// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value). +// One format is used for keeping span data inline, +// another contains index into an out-of-line span interner. +// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd. +// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28 + +use crate::hygiene::SyntaxContext; +use crate::SESSION_GLOBALS; +use crate::{BytePos, SpanData}; + +use rustc_data_structures::fx::FxIndexSet; + +/// A compressed span. +/// +/// `SpanData` is 12 bytes, which is a bit too big to stick everywhere. `Span` +/// is a form that only takes up 8 bytes, with less space for the length and +/// context. The vast majority (99.9%+) of `SpanData` instances will fit within +/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are +/// stored in a separate interner table, and the `Span` will index into that +/// table. Interning is rare enough that the cost is low, but common enough +/// that the code is exercised regularly. +/// +/// An earlier version of this code used only 4 bytes for `Span`, but that was +/// slower because only 80--90% of spans could be stored inline (even less in +/// very large crates) and so the interner was used a lot more. +/// +/// Inline (compressed) format: +/// - `span.base_or_index == span_data.lo` +/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`) +/// - `span.ctxt == span_data.ctxt` (must be `<= MAX_CTXT`) +/// +/// Interned format: +/// - `span.base_or_index == index` (indexes into the interner table) +/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero) +/// - `span.ctxt == 0` +/// +/// The inline form uses 0 for the tag value (rather than 1) so that we don't +/// need to mask out the tag bit when getting the length, and so that the +/// dummy span can be all zeroes. +/// +/// Notes about the choice of field sizes: +/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base` +/// values never cause interning. The number of bits needed for `base` +/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate. +/// `script-servo` is the largest crate in `rustc-perf`, requiring 26 bits +/// for some spans. +/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits +/// in `SpanData`, which means that large `len` values will cause interning. +/// The number of bits needed for `len` does not depend on the crate size. +/// The most common number of bits for `len` are 0--7, with a peak usually at +/// 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough +/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur +/// dozens of times in a typical crate. +/// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that +/// large `ctxt` values will cause interning. The number of bits needed for +/// `ctxt` values depend partly on the crate size and partly on the form of +/// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt`, +/// but larger crates might need more than 16 bits. +/// +#[derive(Clone, Copy, Eq, PartialEq, Hash)] +pub struct Span { + base_or_index: u32, + len_or_tag: u16, + ctxt_or_zero: u16, +} + +const LEN_TAG: u16 = 0b1000_0000_0000_0000; +const MAX_LEN: u32 = 0b0111_1111_1111_1111; +const MAX_CTXT: u32 = 0b1111_1111_1111_1111; + +/// Dummy span, both position and length are zero, syntax context is zero as well. +pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_zero: 0 }; + +impl Span { + #[inline] + pub fn new(mut lo: BytePos, mut hi: BytePos, ctxt: SyntaxContext) -> Self { + if lo > hi { + std::mem::swap(&mut lo, &mut hi); + } + + let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32()); + + if len <= MAX_LEN && ctxt2 <= MAX_CTXT { + // Inline format. + Span { base_or_index: base, len_or_tag: len as u16, ctxt_or_zero: ctxt2 as u16 } + } else { + // Interned format. + let index = with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt })); + Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_zero: 0 } + } + } + + #[inline] + pub fn data(self) -> SpanData { + if self.len_or_tag != LEN_TAG { + // Inline format. + debug_assert!(self.len_or_tag as u32 <= MAX_LEN); + SpanData { + lo: BytePos(self.base_or_index), + hi: BytePos(self.base_or_index + self.len_or_tag as u32), + ctxt: SyntaxContext::from_u32(self.ctxt_or_zero as u32), + } + } else { + // Interned format. + debug_assert!(self.ctxt_or_zero == 0); + let index = self.base_or_index; + with_span_interner(|interner| *interner.get(index)) + } + } +} + +#[derive(Default)] +pub struct SpanInterner { + spans: FxIndexSet<SpanData>, +} + +impl SpanInterner { + fn intern(&mut self, span_data: &SpanData) -> u32 { + let (index, _) = self.spans.insert_full(*span_data); + index as u32 + } + + #[inline] + fn get(&self, index: u32) -> &SpanData { + &self.spans[index as usize] + } +} + +// If an interner exists, return it. Otherwise, prepare a fresh one. +#[inline] +fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T { + SESSION_GLOBALS.with(|session_globals| f(&mut *session_globals.span_interner.lock())) +} diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs new file mode 100644 index 00000000000..5092b945f72 --- /dev/null +++ b/compiler/rustc_span/src/symbol.rs @@ -0,0 +1,1693 @@ +//! An "interner" is a data structure that associates values with usize tags and +//! allows bidirectional lookup; i.e., given a value, one can easily find the +//! type, and vice versa. + +use rustc_arena::DroplessArena; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey}; +use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + +use std::cmp::{Ord, PartialEq, PartialOrd}; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::str; + +use crate::{Span, DUMMY_SP, SESSION_GLOBALS}; + +#[cfg(test)] +mod tests; + +// The proc macro code for this is in `src/librustc_macros/src/symbols.rs`. +symbols! { + // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`, + // this should be rarely necessary though if the keywords are kept in alphabetic order. + Keywords { + // Special reserved identifiers used internally for elided lifetimes, + // unnamed method parameters, crate root module, error recovery etc. + Invalid: "", + PathRoot: "{{root}}", + DollarCrate: "$crate", + Underscore: "_", + + // Keywords that are used in stable Rust. + As: "as", + Break: "break", + Const: "const", + Continue: "continue", + Crate: "crate", + Else: "else", + Enum: "enum", + Extern: "extern", + False: "false", + Fn: "fn", + For: "for", + If: "if", + Impl: "impl", + In: "in", + Let: "let", + Loop: "loop", + Match: "match", + Mod: "mod", + Move: "move", + Mut: "mut", + Pub: "pub", + Ref: "ref", + Return: "return", + SelfLower: "self", + SelfUpper: "Self", + Static: "static", + Struct: "struct", + Super: "super", + Trait: "trait", + True: "true", + Type: "type", + Unsafe: "unsafe", + Use: "use", + Where: "where", + While: "while", + + // Keywords that are used in unstable Rust or reserved for future use. + Abstract: "abstract", + Become: "become", + Box: "box", + Do: "do", + Final: "final", + Macro: "macro", + Override: "override", + Priv: "priv", + Typeof: "typeof", + Unsized: "unsized", + Virtual: "virtual", + Yield: "yield", + + // Edition-specific keywords that are used in stable Rust. + Async: "async", // >= 2018 Edition only + Await: "await", // >= 2018 Edition only + Dyn: "dyn", // >= 2018 Edition only + + // Edition-specific keywords that are used in unstable Rust or reserved for future use. + Try: "try", // >= 2018 Edition only + + // Special lifetime names + UnderscoreLifetime: "'_", + StaticLifetime: "'static", + + // Weak keywords, have special meaning only in specific contexts. + Auto: "auto", + Catch: "catch", + Default: "default", + MacroRules: "macro_rules", + Raw: "raw", + Union: "union", + } + + // Pre-interned symbols that can be referred to with `rustc_span::sym::*`. + // + // The symbol is the stringified identifier unless otherwise specified, in + // which case the name should mention the non-identifier punctuation. + // E.g. `sym::proc_dash_macro` represents "proc-macro", and it shouldn't be + // called `sym::proc_macro` because then it's easy to mistakenly think it + // represents "proc_macro". + // + // As well as the symbols listed, there are symbols for the the strings + // "0", "1", ..., "9", which are accessible via `sym::integer`. + // + // The proc macro will abort if symbols are not in alphabetical order (as + // defined by `impl Ord for str`) or if any symbols are duplicated. Vim + // users can sort the list by selecting it and executing the command + // `:'<,'>!LC_ALL=C sort`. + // + // There is currently no checking that all symbols are used; that would be + // nice to have. + Symbols { + Alignment, + Arc, + Argument, + ArgumentV1, + Arguments, + C, + Center, + Clone, + Copy, + Count, + Debug, + Decodable, + Decoder, + Default, + Encodable, + Encoder, + Eq, + Equal, + Err, + Error, + FormatSpec, + Formatter, + From, + Future, + FxHashMap, + FxHashSet, + GlobalAlloc, + Hash, + HashMap, + HashSet, + Hasher, + Implied, + Input, + IntoIterator, + Is, + ItemContext, + Iterator, + Layout, + Left, + LintPass, + None, + Ok, + Option, + Ord, + Ordering, + Output, + Param, + PartialEq, + PartialOrd, + Pending, + Pin, + Poll, + ProcMacro, + ProcMacroHack, + ProceduralMasqueradeDummyType, + Range, + RangeFrom, + RangeFull, + RangeInclusive, + RangeTo, + RangeToInclusive, + Rc, + Ready, + Result, + Return, + Right, + RustcDecodable, + RustcEncodable, + Send, + Some, + StructuralEq, + StructuralPartialEq, + Sync, + Target, + Try, + Ty, + TyCtxt, + TyKind, + Unknown, + Vec, + Yield, + _DECLS, + _Self, + __D, + __H, + __S, + __next, + __try_var, + _d, + _e, + _task_context, + aarch64_target_feature, + abi, + abi_amdgpu_kernel, + abi_avr_interrupt, + abi_efiapi, + abi_msp430_interrupt, + abi_ptx, + abi_sysv64, + abi_thiscall, + abi_unadjusted, + abi_vectorcall, + abi_x86_interrupt, + abort, + aborts, + add, + add_assign, + add_with_overflow, + address, + advanced_slice_patterns, + adx_target_feature, + alias, + align, + align_offset, + alignstack, + all, + alloc, + alloc_error_handler, + alloc_layout, + alloc_zeroed, + allocator, + allocator_internals, + allow, + allow_fail, + allow_internal_unsafe, + allow_internal_unstable, + allow_internal_unstable_backcompat_hack, + allowed, + always, + and, + and_then, + any, + arbitrary_enum_discriminant, + arbitrary_self_types, + arith_offset, + arm_target_feature, + array, + arrays, + as_str, + asm, + assert, + assert_inhabited, + assert_receiver_is_total_eq, + assert_uninit_valid, + assert_zero_valid, + associated_consts, + associated_type_bounds, + associated_type_defaults, + associated_types, + assume, + assume_init, + async_await, + async_closure, + atomics, + att_syntax, + attr, + attr_literals, + attributes, + augmented_assignments, + automatically_derived, + avx512_target_feature, + await_macro, + bang, + begin_panic, + bench, + bin, + bind_by_move_pattern_guards, + bindings_after_at, + bitand, + bitand_assign, + bitor, + bitor_assign, + bitreverse, + bitxor, + bitxor_assign, + block, + bool, + borrowck_graphviz_format, + borrowck_graphviz_postflow, + borrowck_graphviz_preflow, + box_free, + box_patterns, + box_syntax, + braced_empty_structs, + breakpoint, + bridge, + bswap, + c_variadic, + call, + call_mut, + call_once, + caller_location, + cdylib, + ceilf32, + ceilf64, + cfg, + cfg_accessible, + cfg_attr, + cfg_attr_multi, + cfg_doctest, + cfg_sanitize, + cfg_target_feature, + cfg_target_has_atomic, + cfg_target_thread_local, + cfg_target_vendor, + cfg_version, + char, + client, + clippy, + clone, + clone_closures, + clone_from, + closure_to_fn_coercion, + cmp, + cmpxchg16b_target_feature, + coerce_unsized, + cold, + column, + compile_error, + compiler_builtins, + concat, + concat_idents, + conservative_impl_trait, + console, + const_compare_raw_pointers, + const_constructor, + const_eval_limit, + const_extern_fn, + const_fn, + const_fn_transmute, + const_fn_union, + const_generics, + const_if_match, + const_in_array_repeat_expressions, + const_indexing, + const_let, + const_loop, + const_mut_refs, + const_panic, + const_precise_live_drops, + const_ptr, + const_raw_ptr_deref, + const_raw_ptr_to_usize_cast, + const_slice_ptr, + const_trait_bound_opt_out, + const_trait_impl, + const_transmute, + contents, + context, + convert, + copy, + copy_closures, + copy_nonoverlapping, + copysignf32, + copysignf64, + core, + core_intrinsics, + cosf32, + cosf64, + crate_id, + crate_in_paths, + crate_local, + crate_name, + crate_type, + crate_visibility_modifier, + crt_dash_static: "crt-static", + ctlz, + ctlz_nonzero, + ctpop, + cttz, + cttz_nonzero, + custom_attribute, + custom_derive, + custom_inner_attributes, + custom_test_frameworks, + d, + dead_code, + dealloc, + debug, + debug_assertions, + debug_struct, + debug_trait, + debug_trait_builder, + debug_tuple, + decl_macro, + declare_lint_pass, + decode, + default_lib_allocator, + default_type_parameter_fallback, + default_type_params, + delay_span_bug_from_inside_query, + deny, + deprecated, + deref, + deref_mut, + derive, + diagnostic, + direct, + discriminant_kind, + discriminant_type, + discriminant_value, + dispatch_from_dyn, + div, + div_assign, + doc, + doc_alias, + doc_cfg, + doc_keyword, + doc_masked, + doc_spotlight, + doctest, + document_private_items, + dotdot_in_tuple_patterns, + dotdoteq_in_patterns, + double_braced_closure: "{{closure}}", + double_braced_constant: "{{constant}}", + double_braced_constructor: "{{constructor}}", + double_braced_crate: "{{crate}}", + double_braced_impl: "{{impl}}", + double_braced_misc: "{{misc}}", + double_braced_opaque: "{{opaque}}", + drop, + drop_in_place, + drop_types_in_const, + dropck_eyepatch, + dropck_parametricity, + dylib, + dyn_trait, + eh_catch_typeinfo, + eh_personality, + emit_enum, + emit_enum_variant, + emit_enum_variant_arg, + emit_struct, + emit_struct_field, + enable, + enclosing_scope, + encode, + env, + eq, + err, + exact_div, + except, + exchange_malloc, + exclusive_range_pattern, + exhaustive_integer_patterns, + exhaustive_patterns, + existential_type, + exp2f32, + exp2f64, + expected, + expf32, + expf64, + export_name, + expr, + extern_absolute_paths, + extern_crate_item_prelude, + extern_crate_self, + extern_in_paths, + extern_prelude, + extern_types, + external_doc, + f, + f16c_target_feature, + f32, + f32_runtime, + f64, + f64_runtime, + fabsf32, + fabsf64, + fadd_fast, + fdiv_fast, + feature, + ffi_const, + ffi_pure, + ffi_returns_twice, + field, + field_init_shorthand, + file, + fill, + finish, + flags, + float_to_int_unchecked, + floorf32, + floorf64, + fmaf32, + fmaf64, + fmt, + fmt_internals, + fmul_fast, + fn_must_use, + fn_mut, + fn_once, + fn_once_output, + forbid, + forget, + format, + format_args, + format_args_capture, + format_args_nl, + freeze, + frem_fast, + from, + from_desugaring, + from_error, + from_generator, + from_method, + from_ok, + from_size_align_unchecked, + from_trait, + from_usize, + fsub_fast, + fundamental, + future, + future_trait, + ge, + gen_future, + gen_kill, + generator, + generator_state, + generators, + generic_associated_types, + generic_param_attrs, + get_context, + global_allocator, + global_asm, + globs, + gt, + half_open_range_patterns, + hash, + hexagon_target_feature, + hidden, + homogeneous_aggregate, + html_favicon_url, + html_logo_url, + html_no_source, + html_playground_url, + html_root_url, + i, + i128, + i128_type, + i16, + i32, + i64, + i8, + ident, + if_let, + if_let_guard, + if_while_or_patterns, + ignore, + impl_header_lifetime_elision, + impl_lint_pass, + impl_macros, + impl_trait_in_bindings, + import_shadowing, + in_band_lifetimes, + include, + include_bytes, + include_str, + inclusive_range_syntax, + index, + index_mut, + infer_outlives_requirements, + infer_static_outlives_requirements, + inlateout, + inline, + inout, + intel, + into_iter, + into_result, + intrinsics, + irrefutable_let_patterns, + isize, + issue, + issue_5723_bootstrap, + issue_tracker_base_url, + item, + item_like_imports, + iter, + keyword, + kind, + label, + label_break_value, + lang, + lang_items, + lateout, + lazy_normalization_consts, + le, + let_chains, + lhs, + lib, + libc, + lifetime, + likely, + line, + link, + link_args, + link_cfg, + link_llvm_intrinsics, + link_name, + link_ordinal, + link_section, + linkage, + lint_reasons, + literal, + llvm_asm, + local_inner_macros, + log10f32, + log10f64, + log2f32, + log2f64, + log_syntax, + logf32, + logf64, + loop_break_value, + lt, + macro_at_most_once_rep, + macro_escape, + macro_export, + macro_lifetime_matcher, + macro_literal_matcher, + macro_reexport, + macro_use, + macro_vis_matcher, + macros_in_extern, + main, + managed_boxes, + manually_drop, + map, + marker, + marker_trait_attr, + masked, + match_beginning_vert, + match_default_bindings, + maxnumf32, + maxnumf64, + may_dangle, + maybe_uninit, + maybe_uninit_uninit, + maybe_uninit_zeroed, + mem_uninitialized, + mem_zeroed, + member_constraints, + memory, + message, + meta, + min_align_of, + min_align_of_val, + min_const_fn, + min_const_generics, + min_const_unsafe_fn, + min_specialization, + minnumf32, + minnumf64, + mips_target_feature, + mmx_target_feature, + module, + module_path, + more_struct_aliases, + movbe_target_feature, + move_ref_pattern, + move_val_init, + mul, + mul_assign, + mul_with_overflow, + must_use, + mut_ptr, + mut_slice_ptr, + naked, + naked_functions, + name, + ne, + nearbyintf32, + nearbyintf64, + needs_allocator, + needs_drop, + needs_panic_runtime, + neg, + negate_unsigned, + negative_impls, + never, + never_type, + never_type_fallback, + new, + new_unchecked, + next, + nll, + no, + no_builtins, + no_core, + no_crate_inject, + no_debug, + no_default_passes, + no_implicit_prelude, + no_inline, + no_link, + no_main, + no_mangle, + no_niche, + no_sanitize, + no_stack_check, + no_start, + no_std, + nomem, + non_ascii_idents, + non_exhaustive, + non_modrs_mods, + none_error, + nontemporal_store, + nontrapping_dash_fptoint: "nontrapping-fptoint", + noreturn, + nostack, + not, + note, + object_safe_for_dispatch, + of, + offset, + omit_gdb_pretty_printer_section, + on, + on_unimplemented, + oom, + opaque, + ops, + opt_out_copy, + optimize, + optimize_attribute, + optin_builtin_traits, + option, + option_env, + option_type, + options, + or, + or_patterns, + other, + out, + overlapping_marker_traits, + owned_box, + packed, + panic, + panic_abort, + panic_bounds_check, + panic_handler, + panic_impl, + panic_implementation, + panic_info, + panic_location, + panic_runtime, + panic_unwind, + param_attrs, + parent_trait, + partial_cmp, + partial_ord, + passes, + pat, + path, + pattern_parentheses, + phantom_data, + pin, + pinned, + platform_intrinsics, + plugin, + plugin_registrar, + plugins, + pointer, + poll, + position, + post_dash_lto: "post-lto", + powerpc_target_feature, + powf32, + powf64, + powif32, + powif64, + pre_dash_lto: "pre-lto", + precise_pointer_size_matching, + precision, + pref_align_of, + prefetch_read_data, + prefetch_read_instruction, + prefetch_write_data, + prefetch_write_instruction, + prelude, + prelude_import, + preserves_flags, + primitive, + proc_dash_macro: "proc-macro", + proc_macro, + proc_macro_attribute, + proc_macro_def_site, + proc_macro_derive, + proc_macro_expr, + proc_macro_gen, + proc_macro_hygiene, + proc_macro_internals, + proc_macro_mod, + proc_macro_non_items, + proc_macro_path_invoc, + profiler_builtins, + profiler_runtime, + ptr_guaranteed_eq, + ptr_guaranteed_ne, + ptr_offset_from, + pub_restricted, + pure, + pushpop_unsafe, + quad_precision_float, + question_mark, + quote, + range_inclusive_new, + raw_dylib, + raw_identifiers, + raw_ref_op, + re_rebalance_coherence, + read_enum, + read_enum_variant, + read_enum_variant_arg, + read_struct, + read_struct_field, + readonly, + realloc, + reason, + receiver, + recursion_limit, + reexport_test_harness_main, + reference, + reflect, + register_attr, + register_tool, + relaxed_adts, + rem, + rem_assign, + repr, + repr128, + repr_align, + repr_align_enum, + repr_no_niche, + repr_packed, + repr_simd, + repr_transparent, + result, + result_type, + rhs, + rintf32, + rintf64, + riscv_target_feature, + rlib, + rotate_left, + rotate_right, + roundf32, + roundf64, + rt, + rtm_target_feature, + rust, + rust_2015_preview, + rust_2018_preview, + rust_begin_unwind, + rust_eh_catch_typeinfo, + rust_eh_personality, + rust_eh_register_frames, + rust_eh_unregister_frames, + rust_oom, + rustc, + rustc_allocator, + rustc_allocator_nounwind, + rustc_allow_const_fn_ptr, + rustc_args_required_const, + rustc_attrs, + rustc_builtin_macro, + rustc_clean, + rustc_const_stable, + rustc_const_unstable, + rustc_conversion_suggestion, + rustc_def_path, + rustc_deprecated, + rustc_diagnostic_item, + rustc_diagnostic_macros, + rustc_dirty, + rustc_dummy, + rustc_dump_env_program_clauses, + rustc_dump_program_clauses, + rustc_dump_user_substs, + rustc_error, + rustc_expected_cgu_reuse, + rustc_if_this_changed, + rustc_inherit_overflow_checks, + rustc_layout, + rustc_layout_scalar_valid_range_end, + rustc_layout_scalar_valid_range_start, + rustc_macro_transparency, + rustc_mir, + rustc_nonnull_optimization_guaranteed, + rustc_object_lifetime_default, + rustc_on_unimplemented, + rustc_outlives, + rustc_paren_sugar, + rustc_partition_codegened, + rustc_partition_reused, + rustc_peek, + rustc_peek_definite_init, + rustc_peek_indirectly_mutable, + rustc_peek_liveness, + rustc_peek_maybe_init, + rustc_peek_maybe_uninit, + rustc_polymorphize_error, + rustc_private, + rustc_proc_macro_decls, + rustc_promotable, + rustc_regions, + rustc_reservation_impl, + rustc_serialize, + rustc_specialization_trait, + rustc_stable, + rustc_std_internal_symbol, + rustc_symbol_name, + rustc_synthetic, + rustc_test_marker, + rustc_then_this_would_need, + rustc_unsafe_specialization_marker, + rustc_variance, + rustfmt, + rvalue_static_promotion, + sanitize, + sanitizer_runtime, + saturating_add, + saturating_sub, + self_in_typedefs, + self_struct_ctor, + semitransparent, + send_trait, + shl, + shl_assign, + should_panic, + shr, + shr_assign, + simd, + simd_add, + simd_and, + simd_bitmask, + simd_cast, + simd_ceil, + simd_div, + simd_eq, + simd_extract, + simd_fabs, + simd_fcos, + simd_fexp, + simd_fexp2, + simd_ffi, + simd_flog, + simd_flog10, + simd_flog2, + simd_floor, + simd_fma, + simd_fmax, + simd_fmin, + simd_fpow, + simd_fpowi, + simd_fsin, + simd_fsqrt, + simd_gather, + simd_ge, + simd_gt, + simd_insert, + simd_le, + simd_lt, + simd_mul, + simd_ne, + simd_or, + simd_reduce_add_ordered, + simd_reduce_add_unordered, + simd_reduce_all, + simd_reduce_and, + simd_reduce_any, + simd_reduce_max, + simd_reduce_max_nanless, + simd_reduce_min, + simd_reduce_min_nanless, + simd_reduce_mul_ordered, + simd_reduce_mul_unordered, + simd_reduce_or, + simd_reduce_xor, + simd_rem, + simd_saturating_add, + simd_saturating_sub, + simd_scatter, + simd_select, + simd_select_bitmask, + simd_shl, + simd_shr, + simd_sub, + simd_xor, + since, + sinf32, + sinf64, + size, + size_of, + size_of_val, + sized, + slice, + slice_alloc, + slice_patterns, + slice_u8, + slice_u8_alloc, + slicing_syntax, + soft, + specialization, + speed, + spotlight, + sqrtf32, + sqrtf64, + sse4a_target_feature, + stable, + staged_api, + start, + state, + static_in_const, + static_nobundle, + static_recursion, + staticlib, + std, + std_inject, + stmt, + stmt_expr_attributes, + stop_after_dataflow, + str, + str_alloc, + string_type, + stringify, + struct_field_attributes, + struct_inherit, + struct_variant, + structural_match, + structural_peq, + structural_teq, + sty, + sub, + sub_assign, + sub_with_overflow, + suggestion, + sym, + sync, + sync_trait, + target_arch, + target_endian, + target_env, + target_family, + target_feature, + target_feature_11, + target_has_atomic, + target_has_atomic_load_store, + target_os, + target_pointer_width, + target_target_vendor, + target_thread_local, + target_vendor, + task, + tbm_target_feature, + termination, + termination_trait, + termination_trait_test, + test, + test_2018_feature, + test_accepted_feature, + test_case, + test_removed_feature, + test_runner, + then_with, + thread, + thread_local, + tool_attributes, + tool_lints, + trace_macros, + track_caller, + trait_alias, + transmute, + transparent, + transparent_enums, + transparent_unions, + trivial_bounds, + truncf32, + truncf64, + try_blocks, + try_trait, + tt, + tuple, + tuple_indexing, + two_phase, + ty, + type_alias_enum_variants, + type_alias_impl_trait, + type_ascription, + type_id, + type_length_limit, + type_macros, + type_name, + u128, + u16, + u32, + u64, + u8, + unaligned_volatile_load, + unaligned_volatile_store, + unboxed_closures, + unchecked_add, + unchecked_div, + unchecked_mul, + unchecked_rem, + unchecked_shl, + unchecked_shr, + unchecked_sub, + underscore_const_names, + underscore_imports, + underscore_lifetimes, + uniform_paths, + unit, + universal_impl_trait, + unix, + unlikely, + unmarked_api, + unpin, + unreachable, + unreachable_code, + unrestricted_attribute_tokens, + unsafe_block_in_unsafe_fn, + unsafe_cell, + unsafe_no_drop_flag, + unsize, + unsized_locals, + unsized_tuple_coercion, + unstable, + untagged_unions, + unused_qualifications, + unwind, + unwind_attributes, + unwrap_or, + use_extern_macros, + use_nested_groups, + used, + usize, + v1, + va_arg, + va_copy, + va_end, + va_list, + va_start, + val, + var, + variant_count, + vec, + vec_type, + version, + vis, + visible_private_types, + volatile, + volatile_copy_memory, + volatile_copy_nonoverlapping_memory, + volatile_load, + volatile_set_memory, + volatile_store, + warn, + wasm_import_module, + wasm_target_feature, + while_let, + width, + windows, + windows_subsystem, + wrapping_add, + wrapping_mul, + wrapping_sub, + write_bytes, + } +} + +#[derive(Copy, Clone, Eq, HashStable_Generic, Encodable, Decodable)] +pub struct Ident { + pub name: Symbol, + pub span: Span, +} + +impl Ident { + #[inline] + /// Constructs a new identifier from a symbol and a span. + pub const fn new(name: Symbol, span: Span) -> Ident { + Ident { name, span } + } + + /// Constructs a new identifier with a dummy span. + #[inline] + pub const fn with_dummy_span(name: Symbol) -> Ident { + Ident::new(name, DUMMY_SP) + } + + #[inline] + pub fn invalid() -> Ident { + Ident::with_dummy_span(kw::Invalid) + } + + /// Maps a string to an identifier with a dummy span. + pub fn from_str(string: &str) -> Ident { + Ident::with_dummy_span(Symbol::intern(string)) + } + + /// Maps a string and a span to an identifier. + pub fn from_str_and_span(string: &str, span: Span) -> Ident { + Ident::new(Symbol::intern(string), span) + } + + /// Replaces `lo` and `hi` with those from `span`, but keep hygiene context. + pub fn with_span_pos(self, span: Span) -> Ident { + Ident::new(self.name, span.with_ctxt(self.span.ctxt())) + } + + pub fn without_first_quote(self) -> Ident { + Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span) + } + + /// "Normalize" ident for use in comparisons using "item hygiene". + /// Identifiers with same string value become same if they came from the same macro 2.0 macro + /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from + /// different macro 2.0 macros. + /// Technically, this operation strips all non-opaque marks from ident's syntactic context. + pub fn normalize_to_macros_2_0(self) -> Ident { + Ident::new(self.name, self.span.normalize_to_macros_2_0()) + } + + /// "Normalize" ident for use in comparisons using "local variable hygiene". + /// Identifiers with same string value become same if they came from the same non-transparent + /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different + /// non-transparent macros. + /// Technically, this operation strips all transparent marks from ident's syntactic context. + pub fn normalize_to_macro_rules(self) -> Ident { + Ident::new(self.name, self.span.normalize_to_macro_rules()) + } + + /// Convert the name to a `SymbolStr`. This is a slowish operation because + /// it requires locking the symbol interner. + pub fn as_str(self) -> SymbolStr { + self.name.as_str() + } +} + +impl PartialEq for Ident { + fn eq(&self, rhs: &Self) -> bool { + self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt() + } +} + +impl Hash for Ident { + fn hash<H: Hasher>(&self, state: &mut H) { + self.name.hash(state); + self.span.ctxt().hash(state); + } +} + +impl fmt::Debug for Ident { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f)?; + fmt::Debug::fmt(&self.span.ctxt(), f) + } +} + +/// This implementation is supposed to be used in error messages, so it's expected to be identical +/// to printing the original identifier token written in source code (`token_to_string`), +/// except that AST identifiers don't keep the rawness flag, so we have to guess it. +impl fmt::Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&IdentPrinter::new(self.name, self.is_raw_guess(), None), f) + } +} + +/// This is the most general way to print identifiers. +/// AST pretty-printer is used as a fallback for turning AST structures into token streams for +/// proc macros. Additionally, proc macros may stringify their input and expect it survive the +/// stringification (especially true for proc macro derives written between Rust 1.15 and 1.30). +/// So we need to somehow pretty-print `$crate` in a way preserving at least some of its +/// hygiene data, most importantly name of the crate it refers to. +/// As a result we print `$crate` as `crate` if it refers to the local crate +/// and as `::other_crate_name` if it refers to some other crate. +/// Note, that this is only done if the ident token is printed from inside of AST pretty-pringing, +/// but not otherwise. Pretty-printing is the only way for proc macros to discover token contents, +/// so we should not perform this lossy conversion if the top level call to the pretty-printer was +/// done for a token stream or a single token. +pub struct IdentPrinter { + symbol: Symbol, + is_raw: bool, + /// Span used for retrieving the crate name to which `$crate` refers to, + /// if this field is `None` then the `$crate` conversion doesn't happen. + convert_dollar_crate: Option<Span>, +} + +impl IdentPrinter { + /// The most general `IdentPrinter` constructor. Do not use this. + pub fn new(symbol: Symbol, is_raw: bool, convert_dollar_crate: Option<Span>) -> IdentPrinter { + IdentPrinter { symbol, is_raw, convert_dollar_crate } + } + + /// This implementation is supposed to be used when printing identifiers + /// as a part of pretty-printing for larger AST pieces. + /// Do not use this either. + pub fn for_ast_ident(ident: Ident, is_raw: bool) -> IdentPrinter { + IdentPrinter::new(ident.name, is_raw, Some(ident.span)) + } +} + +impl fmt::Display for IdentPrinter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_raw { + f.write_str("r#")?; + } else { + if self.symbol == kw::DollarCrate { + if let Some(span) = self.convert_dollar_crate { + let converted = span.ctxt().dollar_crate_name(); + if !converted.is_path_segment_keyword() { + f.write_str("::")?; + } + return fmt::Display::fmt(&converted, f); + } + } + } + fmt::Display::fmt(&self.symbol, f) + } +} + +/// An newtype around `Ident` that calls [Ident::normalize_to_macro_rules] on +/// construction. +// FIXME(matthewj, petrochenkov) Use this more often, add a similar +// `ModernIdent` struct and use that as well. +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct MacroRulesNormalizedIdent(Ident); + +impl MacroRulesNormalizedIdent { + pub fn new(ident: Ident) -> Self { + Self(ident.normalize_to_macro_rules()) + } +} + +impl fmt::Debug for MacroRulesNormalizedIdent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.0, f) + } +} + +impl fmt::Display for MacroRulesNormalizedIdent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} + +/// An interned string. +/// +/// Internally, a `Symbol` is implemented as an index, and all operations +/// (including hashing, equality, and ordering) operate on that index. The use +/// of `rustc_index::newtype_index!` means that `Option<Symbol>` only takes up 4 bytes, +/// because `rustc_index::newtype_index!` reserves the last 256 values for tagging purposes. +/// +/// Note that `Symbol` cannot directly be a `rustc_index::newtype_index!` because it +/// implements `fmt::Debug`, `Encodable`, and `Decodable` in special ways. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Symbol(SymbolIndex); + +rustc_index::newtype_index! { + pub struct SymbolIndex { .. } +} + +impl Symbol { + const fn new(n: u32) -> Self { + Symbol(SymbolIndex::from_u32(n)) + } + + /// Maps a string to its interned representation. + pub fn intern(string: &str) -> Self { + with_interner(|interner| interner.intern(string)) + } + + /// Access the symbol's chars. This is a slowish operation because it + /// requires locking the symbol interner. + pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R { + with_interner(|interner| f(interner.get(self))) + } + + /// Convert to a `SymbolStr`. This is a slowish operation because it + /// requires locking the symbol interner. + pub fn as_str(self) -> SymbolStr { + with_interner(|interner| unsafe { + SymbolStr { string: std::mem::transmute::<&str, &str>(interner.get(self)) } + }) + } + + pub fn as_u32(self) -> u32 { + self.0.as_u32() + } + + /// This method is supposed to be used in error messages, so it's expected to be + /// identical to printing the original identifier token written in source code + /// (`token_to_string`, `Ident::to_string`), except that symbols don't keep the rawness flag + /// or edition, so we have to guess the rawness using the global edition. + pub fn to_ident_string(self) -> String { + Ident::with_dummy_span(self).to_string() + } +} + +impl fmt::Debug for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|str| fmt::Debug::fmt(&str, f)) + } +} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|str| fmt::Display::fmt(&str, f)) + } +} + +impl<S: Encoder> Encodable<S> for Symbol { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + self.with(|string| s.emit_str(string)) + } +} + +impl<D: Decoder> Decodable<D> for Symbol { + #[inline] + fn decode(d: &mut D) -> Result<Symbol, D::Error> { + Ok(Symbol::intern(&d.read_str()?)) + } +} + +impl<CTX> HashStable<CTX> for Symbol { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.as_str().hash_stable(hcx, hasher); + } +} + +impl<CTX> ToStableHashKey<CTX> for Symbol { + type KeyType = SymbolStr; + + #[inline] + fn to_stable_hash_key(&self, _: &CTX) -> SymbolStr { + self.as_str() + } +} + +// The `&'static str`s in this type actually point into the arena. +// +// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278 +// found that to regress performance up to 2% in some cases. This might be +// revisited after further improvements to `indexmap`. +#[derive(Default)] +pub struct Interner { + arena: DroplessArena, + names: FxHashMap<&'static str, Symbol>, + strings: Vec<&'static str>, +} + +impl Interner { + fn prefill(init: &[&'static str]) -> Self { + Interner { + strings: init.into(), + names: init.iter().copied().zip((0..).map(Symbol::new)).collect(), + ..Default::default() + } + } + + #[inline] + pub fn intern(&mut self, string: &str) -> Symbol { + if let Some(&name) = self.names.get(string) { + return name; + } + + let name = Symbol::new(self.strings.len() as u32); + + // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be + // UTF-8. + let string: &str = + unsafe { str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes())) }; + // It is safe to extend the arena allocation to `'static` because we only access + // these while the arena is still alive. + let string: &'static str = unsafe { &*(string as *const str) }; + self.strings.push(string); + self.names.insert(string, name); + name + } + + // Get the symbol as a string. `Symbol::as_str()` should be used in + // preference to this function. + pub fn get(&self, symbol: Symbol) -> &str { + self.strings[symbol.0.as_usize()] + } +} + +// This module has a very short name because it's used a lot. +/// This module contains all the defined keyword `Symbol`s. +/// +/// Given that `kw` is imported, use them like `kw::keyword_name`. +/// For example `kw::Loop` or `kw::Break`. +pub mod kw { + use super::Symbol; + keywords!(); +} + +// This module has a very short name because it's used a lot. +/// This module contains all the defined non-keyword `Symbol`s. +/// +/// Given that `sym` is imported, use them like `sym::symbol_name`. +/// For example `sym::rustfmt` or `sym::u8`. +#[allow(rustc::default_hash_types)] +pub mod sym { + use super::Symbol; + use std::convert::TryInto; + + define_symbols!(); + + // Used from a macro in `librustc_feature/accepted.rs` + pub use super::kw::MacroRules as macro_rules; + + // Get the symbol for an integer. The first few non-negative integers each + // have a static symbol and therefore are fast. + pub fn integer<N: TryInto<usize> + Copy + ToString>(n: N) -> Symbol { + if let Result::Ok(idx) = n.try_into() { + if let Option::Some(&sym_) = digits_array.get(idx) { + return sym_; + } + } + Symbol::intern(&n.to_string()) + } +} + +impl Symbol { + fn is_used_keyword_2018(self) -> bool { + self >= kw::Async && self <= kw::Dyn + } + + fn is_unused_keyword_2018(self) -> bool { + self == kw::Try + } + + /// Used for sanity checking rustdoc keyword sections. + pub fn is_doc_keyword(self) -> bool { + self <= kw::Union + } + + /// A keyword or reserved identifier that can be used as a path segment. + pub fn is_path_segment_keyword(self) -> bool { + self == kw::Super + || self == kw::SelfLower + || self == kw::SelfUpper + || self == kw::Crate + || self == kw::PathRoot + || self == kw::DollarCrate + } + + /// Returns `true` if the symbol is `true` or `false`. + pub fn is_bool_lit(self) -> bool { + self == kw::True || self == kw::False + } + + /// This symbol can be a raw identifier. + pub fn can_be_raw(self) -> bool { + self != kw::Invalid && self != kw::Underscore && !self.is_path_segment_keyword() + } +} + +impl Ident { + // Returns `true` for reserved identifiers used internally for elided lifetimes, + // unnamed method parameters, crate root module, error recovery etc. + pub fn is_special(self) -> bool { + self.name <= kw::Underscore + } + + /// Returns `true` if the token is a keyword used in the language. + pub fn is_used_keyword(self) -> bool { + // Note: `span.edition()` is relatively expensive, don't call it unless necessary. + self.name >= kw::As && self.name <= kw::While + || self.name.is_used_keyword_2018() && self.span.rust_2018() + } + + /// Returns `true` if the token is a keyword reserved for possible future use. + pub fn is_unused_keyword(self) -> bool { + // Note: `span.edition()` is relatively expensive, don't call it unless necessary. + self.name >= kw::Abstract && self.name <= kw::Yield + || self.name.is_unused_keyword_2018() && self.span.rust_2018() + } + + /// Returns `true` if the token is either a special identifier or a keyword. + pub fn is_reserved(self) -> bool { + self.is_special() || self.is_used_keyword() || self.is_unused_keyword() + } + + /// A keyword or reserved identifier that can be used as a path segment. + pub fn is_path_segment_keyword(self) -> bool { + self.name.is_path_segment_keyword() + } + + /// We see this identifier in a normal identifier position, like variable name or a type. + /// How was it written originally? Did it use the raw form? Let's try to guess. + pub fn is_raw_guess(self) -> bool { + self.name.can_be_raw() && self.is_reserved() + } +} + +#[inline] +fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T { + SESSION_GLOBALS.with(|session_globals| f(&mut *session_globals.symbol_interner.lock())) +} + +/// An alternative to `Symbol`, useful when the chars within the symbol need to +/// be accessed. It deliberately has limited functionality and should only be +/// used for temporary values. +/// +/// Because the interner outlives any thread which uses this type, we can +/// safely treat `string` which points to interner data, as an immortal string, +/// as long as this type never crosses between threads. +// +// FIXME: ensure that the interner outlives any thread which uses `SymbolStr`, +// by creating a new thread right after constructing the interner. +#[derive(Clone, Eq, PartialOrd, Ord)] +pub struct SymbolStr { + string: &'static str, +} + +// This impl allows a `SymbolStr` to be directly equated with a `String` or +// `&str`. +impl<T: std::ops::Deref<Target = str>> std::cmp::PartialEq<T> for SymbolStr { + fn eq(&self, other: &T) -> bool { + self.string == other.deref() + } +} + +impl !Send for SymbolStr {} +impl !Sync for SymbolStr {} + +/// This impl means that if `ss` is a `SymbolStr`: +/// - `*ss` is a `str`; +/// - `&*ss` is a `&str` (and `match &*ss { ... }` is a common pattern). +/// - `&ss as &str` is a `&str`, which means that `&ss` can be passed to a +/// function expecting a `&str`. +impl std::ops::Deref for SymbolStr { + type Target = str; + #[inline] + fn deref(&self) -> &str { + self.string + } +} + +impl fmt::Debug for SymbolStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.string, f) + } +} + +impl fmt::Display for SymbolStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.string, f) + } +} + +impl<CTX> HashStable<CTX> for SymbolStr { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.string.hash_stable(hcx, hasher) + } +} + +impl<CTX> ToStableHashKey<CTX> for SymbolStr { + type KeyType = SymbolStr; + + #[inline] + fn to_stable_hash_key(&self, _: &CTX) -> SymbolStr { + self.clone() + } +} diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs new file mode 100644 index 00000000000..47da03424b7 --- /dev/null +++ b/compiler/rustc_span/src/symbol/tests.rs @@ -0,0 +1,25 @@ +use super::*; + +use crate::{edition, SessionGlobals}; + +#[test] +fn interner_tests() { + let mut i: Interner = Interner::default(); + // first one is zero: + assert_eq!(i.intern("dog"), Symbol::new(0)); + // re-use gets the same entry: + assert_eq!(i.intern("dog"), Symbol::new(0)); + // different string gets a different #: + assert_eq!(i.intern("cat"), Symbol::new(1)); + assert_eq!(i.intern("cat"), Symbol::new(1)); + // dog is still at zero + assert_eq!(i.intern("dog"), Symbol::new(0)); +} + +#[test] +fn without_first_quote_test() { + SESSION_GLOBALS.set(&SessionGlobals::new(edition::DEFAULT_EDITION), || { + let i = Ident::from_str("'break"); + assert_eq!(i.without_first_quote().name, kw::Break); + }); +} diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs new file mode 100644 index 00000000000..3c8eb8bcd31 --- /dev/null +++ b/compiler/rustc_span/src/tests.rs @@ -0,0 +1,40 @@ +use super::*; + +#[test] +fn test_lookup_line() { + let lines = &[BytePos(3), BytePos(17), BytePos(28)]; + + assert_eq!(lookup_line(lines, BytePos(0)), -1); + assert_eq!(lookup_line(lines, BytePos(3)), 0); + assert_eq!(lookup_line(lines, BytePos(4)), 0); + + assert_eq!(lookup_line(lines, BytePos(16)), 0); + assert_eq!(lookup_line(lines, BytePos(17)), 1); + assert_eq!(lookup_line(lines, BytePos(18)), 1); + + assert_eq!(lookup_line(lines, BytePos(28)), 2); + assert_eq!(lookup_line(lines, BytePos(29)), 2); +} + +#[test] +fn test_normalize_newlines() { + fn check(before: &str, after: &str, expected_positions: &[u32]) { + let mut actual = before.to_string(); + let mut actual_positions = vec![]; + normalize_newlines(&mut actual, &mut actual_positions); + let actual_positions: Vec<_> = actual_positions.into_iter().map(|nc| nc.pos.0).collect(); + assert_eq!(actual.as_str(), after); + assert_eq!(actual_positions, expected_positions); + } + check("", "", &[]); + check("\n", "\n", &[]); + check("\r", "\r", &[]); + check("\r\r", "\r\r", &[]); + check("\r\n", "\n", &[1]); + check("hello world", "hello world", &[]); + check("hello\nworld", "hello\nworld", &[]); + check("hello\r\nworld", "hello\nworld", &[6]); + check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n", &[1, 7, 13]); + check("\r\r\n", "\r\n", &[2]); + check("hello\rworld", "hello\rworld", &[]); +} |
