diff options
| author | Jeffrey Seyfried <jeffrey.seyfried@gmail.com> | 2017-03-17 04:04:41 +0000 |
|---|---|---|
| committer | Jeffrey Seyfried <jeffrey.seyfried@gmail.com> | 2017-03-29 00:41:10 +0000 |
| commit | ec7c0aece17c9a11bc2eca15b994355a161bf878 (patch) | |
| tree | 05ff8d97fedf1ab11028eddbd4982896cc61387f /src/libsyntax_pos | |
| parent | 496996c2af6174cb83a65756249d289f315dff80 (diff) | |
| download | rust-ec7c0aece17c9a11bc2eca15b994355a161bf878.tar.gz rust-ec7c0aece17c9a11bc2eca15b994355a161bf878.zip | |
Merge `ExpnId` and `SyntaxContext`.
Diffstat (limited to 'src/libsyntax_pos')
| -rw-r--r-- | src/libsyntax_pos/hygiene.rs | 94 | ||||
| -rw-r--r-- | src/libsyntax_pos/lib.rs | 101 | ||||
| -rw-r--r-- | src/libsyntax_pos/symbol.rs | 389 |
3 files changed, 550 insertions, 34 deletions
diff --git a/src/libsyntax_pos/hygiene.rs b/src/libsyntax_pos/hygiene.rs index feebbcd6f03..8a9ff647b3e 100644 --- a/src/libsyntax_pos/hygiene.rs +++ b/src/libsyntax_pos/hygiene.rs @@ -15,12 +15,16 @@ //! and definition contexts*. J. Funct. Program. 22, 2 (March 2012), 181-216. //! DOI=10.1017/S0956796812000093 http://dx.doi.org/10.1017/S0956796812000093 +use Span; +use symbol::Symbol; + +use serialize::{Encodable, Decodable, Encoder, Decoder}; use std::cell::RefCell; use std::collections::HashMap; use std::fmt; /// A SyntaxContext represents a chain of macro expansions (represented by marks). -#[derive(Clone, Copy, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable, Default)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SyntaxContext(u32); #[derive(Copy, Clone)] @@ -36,8 +40,8 @@ pub struct Mark(u32); impl Mark { pub fn fresh() -> Self { HygieneData::with(|data| { - let next_mark = Mark(data.next_mark.0 + 1); - ::std::mem::replace(&mut data.next_mark, next_mark) + data.marks.push(None); + Mark(data.marks.len() as u32 - 1) }) } @@ -53,23 +57,31 @@ impl Mark { pub fn from_u32(raw: u32) -> Mark { Mark(raw) } + + pub fn expn_info(self) -> Option<ExpnInfo> { + HygieneData::with(|data| data.marks[self.0 as usize].clone()) + } + + pub fn set_expn_info(self, info: ExpnInfo) { + HygieneData::with(|data| data.marks[self.0 as usize] = Some(info)) + } } struct HygieneData { + marks: Vec<Option<ExpnInfo>>, syntax_contexts: Vec<SyntaxContextData>, markings: HashMap<(SyntaxContext, Mark), SyntaxContext>, - next_mark: Mark, } impl HygieneData { fn new() -> Self { HygieneData { + marks: vec![None], syntax_contexts: vec![SyntaxContextData { outer_mark: Mark::root(), prev_ctxt: SyntaxContext::empty(), }], markings: HashMap::new(), - next_mark: Mark(1), } } @@ -81,8 +93,8 @@ impl HygieneData { } } -pub fn reset_hygiene_data() { - HygieneData::with(|data| *data = HygieneData::new()) +pub fn clear_markings() { + HygieneData::with(|data| data.markings = HashMap::new()); } impl SyntaxContext { @@ -113,6 +125,10 @@ impl SyntaxContext { }) }) } + + pub fn outer(self) -> Mark { + HygieneData::with(|data| data.syntax_contexts[self.0 as usize].outer_mark) + } } impl fmt::Debug for SyntaxContext { @@ -120,3 +136,67 @@ impl fmt::Debug for SyntaxContext { write!(f, "#{}", self.0) } } + +/// Extra information for tracking spans of macro and syntax sugar expansion +#[derive(Clone, Hash, Debug)] +pub struct ExpnInfo { + /// The location of the actual macro invocation or syntax sugar , e.g. + /// `let x = foo!();` or `if let Some(y) = x {}` + /// + /// This may recursively refer to other macro invocations, e.g. if + /// `foo!()` invoked `bar!()` internally, and there was an + /// expression inside `bar!`; the call_site of the expression in + /// the expansion would point to the `bar!` invocation; that + /// call_site span would have its own ExpnInfo, with the call_site + /// pointing to the `foo!` invocation. + pub call_site: Span, + /// Information about the expansion. + pub callee: NameAndSpan +} + +#[derive(Clone, Hash, Debug)] +pub struct NameAndSpan { + /// The format with which the macro was invoked. + pub format: ExpnFormat, + /// Whether the macro is allowed to use #[unstable]/feature-gated + /// features internally without forcing the whole crate to opt-in + /// to them. + pub allow_internal_unstable: bool, + /// The span of the macro definition itself. The macro may not + /// have a sensible definition span (e.g. something defined + /// completely inside libsyntax) in which case this is None. + pub span: Option<Span> +} + +impl NameAndSpan { + pub fn name(&self) -> Symbol { + match self.format { + ExpnFormat::MacroAttribute(s) | + ExpnFormat::MacroBang(s) | + ExpnFormat::CompilerDesugaring(s) => s, + } + } +} + +/// The source of expansion. +#[derive(Clone, Hash, Debug, PartialEq, Eq)] +pub enum ExpnFormat { + /// e.g. #[derive(...)] <item> + MacroAttribute(Symbol), + /// e.g. `format!()` + MacroBang(Symbol), + /// Desugaring done by the compiler during HIR lowering. + CompilerDesugaring(Symbol) +} + +impl Encodable for SyntaxContext { + fn encode<E: Encoder>(&self, _: &mut E) -> Result<(), E::Error> { + Ok(()) // FIXME(jseyfried) intercrate hygiene + } +} + +impl Decodable for SyntaxContext { + fn decode<D: Decoder>(_: &mut D) -> Result<SyntaxContext, D::Error> { + Ok(SyntaxContext::empty()) // FIXME(jseyfried) intercrate hygiene + } +} diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index 1b62d62348b..9b45e364ecf 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -25,6 +25,7 @@ #![feature(const_fn)] #![feature(custom_attribute)] +#![feature(optin_builtin_traits)] #![allow(unused_attributes)] #![feature(rustc_private)] #![feature(staged_api)] @@ -43,6 +44,9 @@ extern crate serialize; extern crate serialize as rustc_serialize; // used by deriving pub mod hygiene; +pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan}; + +pub mod symbol; pub type FileName = String; @@ -60,7 +64,7 @@ pub struct Span { pub hi: BytePos, /// Information about where the macro came from, if this piece of /// code was created by a macro expansion. - pub expn_id: ExpnId + pub ctxt: SyntaxContext, } /// A collection of spans. Spans have two orthogonal attributes: @@ -79,7 +83,7 @@ impl Span { /// Returns a new span representing just the end-point of this span pub fn end_point(self) -> Span { let lo = cmp::max(self.hi.0 - 1, self.lo.0); - Span { lo: BytePos(lo), hi: self.hi, expn_id: self.expn_id} + Span { lo: BytePos(lo), hi: self.hi, ctxt: self.ctxt } } /// Returns `self` if `self` is not the dummy span, and `other` otherwise. @@ -107,6 +111,69 @@ impl Span { None } } + + /// Return the source span - this is either the supplied span, or the span for + /// the macro callsite that expanded to it. + pub fn source_callsite(self) -> Span { + self.ctxt.outer().expn_info().map(|info| info.call_site.source_callsite()).unwrap_or(self) + } + + /// Return the source callee. + /// + /// Returns None if the supplied span has no expansion trace, + /// else returns the NameAndSpan for the macro definition + /// corresponding to the source callsite. + pub fn source_callee(self) -> Option<NameAndSpan> { + fn source_callee(info: ExpnInfo) -> NameAndSpan { + match info.call_site.ctxt.outer().expn_info() { + Some(info) => source_callee(info), + None => info.callee, + } + } + self.ctxt.outer().expn_info().map(source_callee) + } + + /// Check if a span is "internal" to a macro in which #[unstable] + /// items can be used (that is, a macro marked with + /// `#[allow_internal_unstable]`). + pub fn allows_unstable(&self) -> bool { + match self.ctxt.outer().expn_info() { + Some(info) => info.callee.allow_internal_unstable, + None => false, + } + } + + pub fn macro_backtrace(mut self) -> Vec<MacroBacktrace> { + let mut prev_span = DUMMY_SP; + let mut result = vec![]; + loop { + let info = match self.ctxt.outer().expn_info() { + Some(info) => info, + None => break, + }; + + let (pre, post) = match info.callee.format { + ExpnFormat::MacroAttribute(..) => ("#[", "]"), + ExpnFormat::MacroBang(..) => ("", "!"), + ExpnFormat::CompilerDesugaring(..) => ("desugaring of `", "`"), + }; + let macro_decl_name = format!("{}{}{}", pre, info.callee.name(), post); + let def_site_span = info.callee.span; + + // Don't print recursive invocations + if !info.call_site.source_equal(&prev_span) { + result.push(MacroBacktrace { + call_site: info.call_site, + macro_decl_name: macro_decl_name, + def_site_span: def_site_span, + }); + } + + prev_span = self; + self = info.call_site; + } + result + } } #[derive(Clone, Debug)] @@ -147,8 +214,8 @@ impl serialize::UseSpecializedDecodable for Span { } fn default_span_debug(span: Span, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Span {{ lo: {:?}, hi: {:?}, expn_id: {:?} }}", - span.lo, span.hi, span.expn_id) + write!(f, "Span {{ lo: {:?}, hi: {:?}, ctxt: {:?} }}", + span.lo, span.hi, span.ctxt) } impl fmt::Debug for Span { @@ -157,12 +224,7 @@ impl fmt::Debug for Span { } } -pub const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_id: NO_EXPANSION }; - -// Generic span to be used for code originating from the command line -pub const COMMAND_LINE_SP: Span = Span { lo: BytePos(0), - hi: BytePos(0), - expn_id: COMMAND_LINE_EXPN }; +pub const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), ctxt: NO_EXPANSION }; impl MultiSpan { pub fn new() -> MultiSpan { @@ -256,22 +318,7 @@ impl From<Span> for MultiSpan { } } -#[derive(PartialEq, Eq, Clone, Debug, Hash, RustcEncodable, RustcDecodable, Copy, Ord, PartialOrd)] -pub struct ExpnId(pub u32); - -pub const NO_EXPANSION: ExpnId = ExpnId(!0); -// For code appearing from the command line -pub const COMMAND_LINE_EXPN: ExpnId = ExpnId(!1); - -impl ExpnId { - pub fn from_u32(id: u32) -> ExpnId { - ExpnId(id) - } - - pub fn into_u32(self) -> u32 { - self.0 - } -} +pub const NO_EXPANSION: SyntaxContext = SyntaxContext::empty(); /// Identifies an offset of a multi-byte character in a FileMap #[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)] @@ -651,7 +698,7 @@ thread_local!(pub static SPAN_DEBUG: Cell<fn(Span, &mut fmt::Formatter) -> fmt:: /* assuming that we're not in macro expansion */ pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span { - Span {lo: lo, hi: hi, expn_id: NO_EXPANSION} + Span {lo: lo, hi: hi, ctxt: NO_EXPANSION} } pub struct MacroBacktrace { diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs new file mode 100644 index 00000000000..b866652c49f --- /dev/null +++ b/src/libsyntax_pos/symbol.rs @@ -0,0 +1,389 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! An "interner" is a data structure that associates values with usize tags and +//! allows bidirectional lookup; i.e. given a value, one can easily find the +//! type, and vice versa. + +use hygiene::SyntaxContext; + +use serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::cell::RefCell; +use std::collections::HashMap; +use std::fmt; + +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Ident { + pub name: Symbol, + pub ctxt: SyntaxContext, +} + +impl Ident { + pub const fn with_empty_ctxt(name: Symbol) -> Ident { + Ident { name: name, ctxt: SyntaxContext::empty() } + } + + /// Maps a string to an identifier with an empty syntax context. + pub fn from_str(string: &str) -> Ident { + Ident::with_empty_ctxt(Symbol::intern(string)) + } + + pub fn unhygienize(self) -> Ident { + Ident { name: self.name, ctxt: SyntaxContext::empty() } + } +} + +impl fmt::Debug for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}{:?}", self.name, self.ctxt) + } +} + +impl fmt::Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.name, f) + } +} + +impl Encodable for Ident { + fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { + self.name.encode(s) + } +} + +impl Decodable for Ident { + fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> { + Ok(Ident::with_empty_ctxt(Symbol::decode(d)?)) + } +} + +/// A symbol is an interned or gensymed string. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Symbol(u32); + +// The interner in thread-local, so `Symbol` shouldn't move between threads. +impl !Send for Symbol { } + +impl Symbol { + /// Maps a string to its interned representation. + pub fn intern(string: &str) -> Self { + with_interner(|interner| interner.intern(string)) + } + + /// gensym's a new usize, using the current interner. + pub fn gensym(string: &str) -> Self { + with_interner(|interner| interner.gensym(string)) + } + + pub fn as_str(self) -> InternedString { + with_interner(|interner| unsafe { + InternedString { + string: ::std::mem::transmute::<&str, &str>(interner.get(self)) + } + }) + } + + pub fn as_u32(self) -> u32 { + self.0 + } +} + +impl fmt::Debug for Symbol { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}({})", self, self.0) + } +} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.as_str(), f) + } +} + +impl Encodable for Symbol { + fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_str(&self.as_str()) + } +} + +impl Decodable for Symbol { + fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> { + Ok(Symbol::intern(&d.read_str()?)) + } +} + +impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol { + fn eq(&self, other: &T) -> bool { + self.as_str() == other.deref() + } +} + +#[derive(Default)] +pub struct Interner { + names: HashMap<Box<str>, Symbol>, + strings: Vec<Box<str>>, +} + +impl Interner { + pub fn new() -> Self { + Interner::default() + } + + fn prefill(init: &[&str]) -> Self { + let mut this = Interner::new(); + for &string in init { + this.intern(string); + } + this + } + + pub fn intern(&mut self, string: &str) -> Symbol { + if let Some(&name) = self.names.get(string) { + return name; + } + + let name = Symbol(self.strings.len() as u32); + let string = string.to_string().into_boxed_str(); + self.strings.push(string.clone()); + self.names.insert(string, name); + name + } + + fn gensym(&mut self, string: &str) -> Symbol { + let gensym = Symbol(self.strings.len() as u32); + // leave out of `names` to avoid colliding + self.strings.push(string.to_string().into_boxed_str()); + gensym + } + + pub fn get(&self, name: Symbol) -> &str { + &self.strings[name.0 as usize] + } +} + +// In this macro, there is the requirement that the name (the number) must be monotonically +// increasing by one in the special identifiers, starting at 0; the same holds for the keywords, +// except starting from the next number instead of zero. +macro_rules! declare_keywords {( + $( ($index: expr, $konst: ident, $string: expr) )* +) => { + pub mod keywords { + use super::{Symbol, Ident}; + #[derive(Clone, Copy, PartialEq, Eq)] + pub struct Keyword { + ident: Ident, + } + impl Keyword { + #[inline] pub fn ident(self) -> Ident { self.ident } + #[inline] pub fn name(self) -> Symbol { self.ident.name } + } + $( + #[allow(non_upper_case_globals)] + pub const $konst: Keyword = Keyword { + ident: Ident::with_empty_ctxt(super::Symbol($index)) + }; + )* + } + + impl Interner { + fn fresh() -> Self { + Interner::prefill(&[$($string,)*]) + } + } +}} + +// NB: leaving holes in the ident table is bad! a different ident will get +// interned with the id from the hole, but it will be between the min and max +// of the reserved words, and thus tagged as "reserved". +// After modifying this list adjust `is_strict_keyword`/`is_reserved_keyword`, +// this should be rarely necessary though if the keywords are kept in alphabetic order. +declare_keywords! { + // Invalid identifier + (0, Invalid, "") + + // Strict keywords used in the language. + (1, As, "as") + (2, Box, "box") + (3, Break, "break") + (4, Const, "const") + (5, Continue, "continue") + (6, Crate, "crate") + (7, Else, "else") + (8, Enum, "enum") + (9, Extern, "extern") + (10, False, "false") + (11, Fn, "fn") + (12, For, "for") + (13, If, "if") + (14, Impl, "impl") + (15, In, "in") + (16, Let, "let") + (17, Loop, "loop") + (18, Match, "match") + (19, Mod, "mod") + (20, Move, "move") + (21, Mut, "mut") + (22, Pub, "pub") + (23, Ref, "ref") + (24, Return, "return") + (25, SelfValue, "self") + (26, SelfType, "Self") + (27, Static, "static") + (28, Struct, "struct") + (29, Super, "super") + (30, Trait, "trait") + (31, True, "true") + (32, Type, "type") + (33, Unsafe, "unsafe") + (34, Use, "use") + (35, Where, "where") + (36, While, "while") + + // Keywords reserved for future use. + (37, Abstract, "abstract") + (38, Alignof, "alignof") + (39, Become, "become") + (40, Do, "do") + (41, Final, "final") + (42, Macro, "macro") + (43, Offsetof, "offsetof") + (44, Override, "override") + (45, Priv, "priv") + (46, Proc, "proc") + (47, Pure, "pure") + (48, Sizeof, "sizeof") + (49, Typeof, "typeof") + (50, Unsized, "unsized") + (51, Virtual, "virtual") + (52, Yield, "yield") + + // Weak keywords, have special meaning only in specific contexts. + (53, Default, "default") + (54, StaticLifetime, "'static") + (55, Union, "union") + (56, Catch, "catch") + + // A virtual keyword that resolves to the crate root when used in a lexical scope. + (57, CrateRoot, "{{root}}") +} + +// If an interner exists in TLS, return it. Otherwise, prepare a fresh one. +fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T { + thread_local!(static INTERNER: RefCell<Interner> = { + RefCell::new(Interner::fresh()) + }); + INTERNER.with(|interner| f(&mut *interner.borrow_mut())) +} + +/// Represents a string stored in the thread-local interner. Because the +/// interner lives for the life of the thread, this can be safely treated as an +/// immortal string, as long as it never crosses between threads. +/// +/// FIXME(pcwalton): You must be careful about what you do in the destructors +/// of objects stored in TLS, because they may run after the interner is +/// destroyed. In particular, they must not access string contents. This can +/// be fixed in the future by just leaking all strings until thread death +/// somehow. +#[derive(Clone, Hash, PartialOrd, Eq, Ord)] +pub struct InternedString { + string: &'static str, +} + +impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> { + fn as_ref(&self) -> &U { + self.string.as_ref() + } +} + +impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString { + fn eq(&self, other: &T) -> bool { + self.string == other.deref() + } +} + +impl ::std::cmp::PartialEq<InternedString> for str { + fn eq(&self, other: &InternedString) -> bool { + self == other.string + } +} + +impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str { + fn eq(&self, other: &InternedString) -> bool { + *self == other.string + } +} + +impl ::std::cmp::PartialEq<InternedString> for String { + fn eq(&self, other: &InternedString) -> bool { + self == other.string + } +} + +impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String { + fn eq(&self, other: &InternedString) -> bool { + *self == other.string + } +} + +impl !Send for InternedString { } + +impl ::std::ops::Deref for InternedString { + type Target = str; + fn deref(&self) -> &str { self.string } +} + +impl fmt::Debug for InternedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self.string, f) + } +} + +impl fmt::Display for InternedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.string, f) + } +} + +impl Decodable for InternedString { + fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> { + Ok(Symbol::intern(&d.read_str()?).as_str()) + } +} + +impl Encodable for InternedString { + fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_str(self.string) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn interner_tests() { + let mut i: Interner = Interner::new(); + // first one is zero: + assert_eq!(i.intern("dog"), Symbol(0)); + // re-use gets the same entry: + assert_eq!(i.intern ("dog"), Symbol(0)); + // different string gets a different #: + assert_eq!(i.intern("cat"), Symbol(1)); + assert_eq!(i.intern("cat"), Symbol(1)); + // dog is still at zero + assert_eq!(i.intern("dog"), Symbol(0)); + // gensym gets 3 + assert_eq!(i.gensym("zebra"), Symbol(2)); + // gensym of same string gets new number : + assert_eq!(i.gensym("zebra"), Symbol(3)); + // gensym of *existing* string gets new number: + assert_eq!(i.gensym("dog"), Symbol(4)); + } +} |
