From 85b50d03120cf7bd1beb472a78ce9427c0d8d06e Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Sun, 9 Dec 2018 22:53:00 +0100 Subject: Add missing, non-panicking `maybe_new_parser_from_file` variant --- src/libsyntax/parse/mod.rs | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) (limited to 'src/libsyntax') diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 866dba24dcb..0fda08db67d 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -15,7 +15,7 @@ use ast::{self, CrateConfig, NodeId}; use early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId}; use source_map::{SourceMap, FilePathMapping}; use syntax_pos::{Span, SourceFile, FileName, MultiSpan}; -use errors::{Handler, ColorConfig, Diagnostic, DiagnosticBuilder}; +use errors::{FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder}; use feature_gate::UnstableFeatures; use parse::parser::Parser; use ptr::P; @@ -192,6 +192,14 @@ pub fn new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path) -> Parser<'a> source_file_to_parser(sess, file_to_source_file(sess, path, None)) } +/// Create a new parser, returning buffered diagnostics if the file doesn't +/// exist or from lexing the initial token stream. +pub fn maybe_new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path) + -> Result, Vec> { + let file = try_file_to_source_file(sess, path, None).map_err(|db| vec![db])?; + maybe_source_file_to_parser(sess, file) +} + /// Given a session, a crate config, a path, and a span, add /// the file at the given path to the source_map, and return a parser. /// On an error, use the given span as the source of the problem. @@ -236,18 +244,31 @@ pub fn new_parser_from_tts(sess: &ParseSess, tts: Vec) -> Parser { // base abstractions +/// Given a session and a path and an optional span (for error reporting), +/// add the path to the session's source_map and return the new source_file or +/// error when a file can't be read. +fn try_file_to_source_file(sess: &ParseSess, path: &Path, spanopt: Option) + -> Result, Diagnostic> { + sess.source_map().load_file(path) + .map_err(|e| { + let msg = format!("couldn't read {}: {}", path.display(), e); + let mut diag = Diagnostic::new(Level::Fatal, &msg); + if let Some(sp) = spanopt { + diag.set_span(sp); + } + diag + }) +} + /// Given a session and a path and an optional span (for error reporting), /// add the path to the session's source_map and return the new source_file. fn file_to_source_file(sess: &ParseSess, path: &Path, spanopt: Option) -> Lrc { - match sess.source_map().load_file(path) { + match try_file_to_source_file(sess, path, spanopt) { Ok(source_file) => source_file, - Err(e) => { - let msg = format!("couldn't read {}: {}", path.display(), e); - match spanopt { - Some(sp) => sess.span_diagnostic.span_fatal(sp, &msg).raise(), - None => sess.span_diagnostic.fatal(&msg).raise() - } + Err(d) => { + DiagnosticBuilder::new_diagnostic(&sess.span_diagnostic, d).emit(); + FatalError.raise(); } } } -- cgit 1.4.1-3-g733a5 From 0f68749260d04b61dfe2b38dbc423aefe2914a58 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 11 Dec 2018 09:06:51 +1100 Subject: Use a `newtype_index!` within `Symbol`. This shrinks `Option` from 8 bytes to 4 bytes, which shrinks `Token` from 24 bytes to 16 bytes. This reduces instruction counts by up to 1% across a range of benchmarks. --- src/libsyntax/parse/token.rs | 4 +++ src/libsyntax_pos/lib.rs | 3 +++ src/libsyntax_pos/symbol.rs | 61 ++++++++++++++++++++++++++++---------------- 3 files changed, 46 insertions(+), 22 deletions(-) (limited to 'src/libsyntax') diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 6b9cc2f9792..83763bd32d4 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -207,6 +207,10 @@ pub enum Token { Eof, } +// `Token` is used a lot. Make sure it doesn't unintentionally get bigger. +#[cfg(target_arch = "x86_64")] +static_assert!(MEM_SIZE_OF_STATEMENT: mem::size_of::() == 16); + impl Token { pub fn interpolated(nt: Nonterminal) -> Token { Token::Interpolated(Lrc::new((nt, LazyTokenStream::new()))) diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index 8b7ffa499cd..9aafb9fc549 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -24,10 +24,13 @@ #![feature(nll)] #![feature(non_exhaustive)] #![feature(optin_builtin_traits)] +#![feature(rustc_attrs)] #![feature(specialization)] +#![feature(step_trait)] #![cfg_attr(not(stage0), feature(stdsimd))] extern crate arena; +#[macro_use] extern crate rustc_data_structures; #[macro_use] diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index 05c53878e70..3d87134511e 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -14,6 +14,7 @@ use arena::DroplessArena; use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::indexed_vec::Idx; use serialize::{Decodable, Decoder, Encodable, Encoder}; use std::fmt; @@ -143,9 +144,18 @@ impl Decodable for Ident { } } -/// A symbol is an interned or gensymed string. +/// A symbol is an interned or gensymed string. The use of newtype_index! means +/// that Option only takes up 4 bytes, because newtype_index! reserves +/// the last 256 values for tagging purposes. +/// +/// Note that Symbol cannot be a newtype_index! directly because it implements +/// fmt::Debug, Encodable, and Decodable in special ways. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Symbol(u32); +pub struct Symbol(SymbolIndex); + +newtype_index! { + pub struct SymbolIndex { .. } +} // The interner is pointed to by a thread local value which is only set on the main thread // with parallelization is disabled. So we don't allow `Symbol` to transfer between threads @@ -156,6 +166,10 @@ impl !Send for Symbol { } impl !Sync for Symbol { } impl Symbol { + const fn new(n: u32) -> Self { + Symbol(SymbolIndex::from_u32_const(n)) + } + /// Maps a string to its interned representation. pub fn intern(string: &str) -> Self { with_interner(|interner| interner.intern(string)) @@ -189,7 +203,7 @@ impl Symbol { } pub fn as_u32(self) -> u32 { - self.0 + self.0.as_u32() } } @@ -197,7 +211,7 @@ impl fmt::Debug for Symbol { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let is_gensymed = with_interner(|interner| interner.is_gensymed(*self)); if is_gensymed { - write!(f, "{}({})", self, self.0) + write!(f, "{}({:?})", self, self.0) } else { write!(f, "{}", self) } @@ -229,6 +243,9 @@ impl> PartialEq for Symbol { } // The `&'static str`s in this type actually point into the arena. +// +// Note that normal symbols are indexed upward from 0, and gensyms are indexed +// downward from SymbolIndex::MAX_AS_U32. #[derive(Default)] pub struct Interner { arena: DroplessArena, @@ -243,7 +260,7 @@ impl Interner { for &string in init { if string == "" { // We can't allocate empty strings in the arena, so handle this here. - let name = Symbol(this.strings.len() as u32); + let name = Symbol::new(this.strings.len() as u32); this.names.insert("", name); this.strings.push(""); } else { @@ -258,7 +275,7 @@ impl Interner { return name; } - let name = Symbol(self.strings.len() as u32); + let name = Symbol::new(self.strings.len() as u32); // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be // UTF-8. @@ -276,10 +293,10 @@ impl Interner { } pub fn interned(&self, symbol: Symbol) -> Symbol { - if (symbol.0 as usize) < self.strings.len() { + if (symbol.0.as_usize()) < self.strings.len() { symbol } else { - self.interned(self.gensyms[(!0 - symbol.0) as usize]) + self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]) } } @@ -290,17 +307,17 @@ impl Interner { fn gensymed(&mut self, symbol: Symbol) -> Symbol { self.gensyms.push(symbol); - Symbol(!0 - self.gensyms.len() as u32 + 1) + Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1) } fn is_gensymed(&mut self, symbol: Symbol) -> bool { - symbol.0 as usize >= self.strings.len() + symbol.0.as_usize() >= self.strings.len() } pub fn get(&self, symbol: Symbol) -> &str { - match self.strings.get(symbol.0 as usize) { + match self.strings.get(symbol.0.as_usize()) { Some(string) => string, - None => self.get(self.gensyms[(!0 - symbol.0) as usize]), + None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]), } } } @@ -324,7 +341,7 @@ macro_rules! declare_keywords {( $( #[allow(non_upper_case_globals)] pub const $konst: Keyword = Keyword { - ident: Ident::with_empty_ctxt(super::Symbol($index)) + ident: Ident::with_empty_ctxt(super::Symbol::new($index)) }; )* @@ -709,19 +726,19 @@ mod tests { fn interner_tests() { let mut i: Interner = Interner::default(); // first one is zero: - assert_eq!(i.intern("dog"), Symbol(0)); + assert_eq!(i.intern("dog"), Symbol::new(0)); // re-use gets the same entry: - assert_eq!(i.intern("dog"), Symbol(0)); + assert_eq!(i.intern("dog"), Symbol::new(0)); // different string gets a different #: - assert_eq!(i.intern("cat"), Symbol(1)); - assert_eq!(i.intern("cat"), Symbol(1)); + assert_eq!(i.intern("cat"), Symbol::new(1)); + assert_eq!(i.intern("cat"), Symbol::new(1)); // dog is still at zero - assert_eq!(i.intern("dog"), Symbol(0)); - assert_eq!(i.gensym("zebra"), Symbol(4294967295)); - // gensym of same string gets new number : - assert_eq!(i.gensym("zebra"), Symbol(4294967294)); + assert_eq!(i.intern("dog"), Symbol::new(0)); + assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32)); + // gensym of same string gets new number: + assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1)); // gensym of *existing* string gets new number: - assert_eq!(i.gensym("dog"), Symbol(4294967293)); + assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2)); } #[test] -- cgit 1.4.1-3-g733a5