From 26451ef7b5e00887dc8f27717ff34262df23d655 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 22 May 2019 12:42:23 +1000 Subject: Avoid unnecessary internings. Most involving `Symbol::intern` on string literals. --- src/libsyntax/parse/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/libsyntax/parse') diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 6c29437362c..e0430ac5563 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -5787,7 +5787,7 @@ impl<'a> Parser<'a> { VisibilityKind::Inherited => {} _ => { let is_macro_rules: bool = match self.token { - token::Ident(sid, _) => sid.name == Symbol::intern("macro_rules"), + token::Ident(sid, _) => sid.name == sym::macro_rules, _ => false, }; let mut err = if is_macro_rules { -- cgit 1.4.1-3-g733a5 From 9c7d28d4fdd95bcd6062fb82a2dd2f280bda3e72 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 22 May 2019 19:25:39 +1000 Subject: Pre-intern "0", "1", ..., "9", and use where appropriate. --- src/librustc/hir/lowering.rs | 2 +- src/librustc/hir/map/def_collector.rs | 5 ++--- src/librustc/middle/mem_categorization.rs | 4 ++-- src/librustc_macros/src/symbols.rs | 20 ++++++++++++++++++++ src/librustc_mir/interpret/validity.rs | 4 ++-- src/libsyntax/parse/lexer/mod.rs | 4 ++-- src/libsyntax/parse/literal.rs | 2 +- src/libsyntax_pos/symbol.rs | 20 ++++++++++++++++++-- 8 files changed, 48 insertions(+), 13 deletions(-) (limited to 'src/libsyntax/parse') diff --git a/src/librustc/hir/lowering.rs b/src/librustc/hir/lowering.rs index daf47618164..1d51e7cd742 100644 --- a/src/librustc/hir/lowering.rs +++ b/src/librustc/hir/lowering.rs @@ -2956,7 +2956,7 @@ impl<'a> LoweringContext<'a> { ident: match f.ident { Some(ident) => ident, // FIXME(jseyfried): positional field hygiene - None => Ident::new(Symbol::intern(&index.to_string()), f.span), + None => Ident::new(sym::integer(index), f.span), }, vis: self.lower_visibility(&f.vis, None), ty: self.lower_ty(&f.ty, ImplTraitContext::disallowed()), diff --git a/src/librustc/hir/map/def_collector.rs b/src/librustc/hir/map/def_collector.rs index bb9e76f0262..bde27c71f9a 100644 --- a/src/librustc/hir/map/def_collector.rs +++ b/src/librustc/hir/map/def_collector.rs @@ -5,8 +5,7 @@ use crate::session::CrateDisambiguator; use syntax::ast::*; use syntax::ext::hygiene::Mark; use syntax::visit; -use syntax::symbol::kw; -use syntax::symbol::Symbol; +use syntax::symbol::{kw, sym}; use syntax::parse::token::{self, Token}; use syntax_pos::Span; @@ -221,7 +220,7 @@ impl<'a> visit::Visitor<'a> for DefCollector<'a> { _: &'a Generics, _: NodeId, _: Span) { for (index, field) in data.fields().iter().enumerate() { let name = field.ident.map(|ident| ident.name) - .unwrap_or_else(|| Symbol::intern(&index.to_string())); + .unwrap_or_else(|| sym::integer(index)); let def = self.create_def(field.id, DefPathData::ValueNs(name.as_interned_str()), field.span); diff --git a/src/librustc/middle/mem_categorization.rs b/src/librustc/middle/mem_categorization.rs index c7f8cf684e6..6af43b04a7d 100644 --- a/src/librustc/middle/mem_categorization.rs +++ b/src/librustc/middle/mem_categorization.rs @@ -1316,7 +1316,7 @@ impl<'a, 'gcx, 'tcx> MemCategorizationContext<'a, 'gcx, 'tcx> { for (i, subpat) in subpats.iter().enumerate_and_adjust(expected_len, ddpos) { let subpat_ty = self.pat_ty_adjusted(&subpat)?; // see (*2) - let interior = InteriorField(FieldIndex(i, Name::intern(&i.to_string()))); + let interior = InteriorField(FieldIndex(i, sym::integer(i))); let subcmt = Rc::new( self.cat_imm_interior(pat, cmt.clone(), subpat_ty, interior)); self.cat_pattern_(subcmt, &subpat, op)?; @@ -1363,7 +1363,7 @@ impl<'a, 'gcx, 'tcx> MemCategorizationContext<'a, 'gcx, 'tcx> { }; for (i, subpat) in subpats.iter().enumerate_and_adjust(expected_len, ddpos) { let subpat_ty = self.pat_ty_adjusted(&subpat)?; // see (*2) - let interior = InteriorField(FieldIndex(i, Name::intern(&i.to_string()))); + let interior = InteriorField(FieldIndex(i, sym::integer(i))); let subcmt = Rc::new( self.cat_imm_interior(pat, cmt.clone(), subpat_ty, interior)); self.cat_pattern_(subcmt, &subpat, op)?; diff --git a/src/librustc_macros/src/symbols.rs b/src/librustc_macros/src/symbols.rs index 3883682fa9d..1f6e54807d8 100644 --- a/src/librustc_macros/src/symbols.rs +++ b/src/librustc_macros/src/symbols.rs @@ -96,6 +96,7 @@ pub fn symbols(input: TokenStream) -> TokenStream { let mut keyword_stream = quote! {}; let mut symbols_stream = quote! {}; + let mut digits_stream = quote! {}; let mut prefill_stream = quote! {}; let mut counter = 0u32; let mut keys = HashSet::::new(); @@ -106,6 +107,7 @@ pub fn symbols(input: TokenStream) -> TokenStream { } }; + // Generate the listed keywords. for keyword in &input.keywords.0 { let name = &keyword.name; let value = &keyword.value; @@ -119,6 +121,7 @@ pub fn symbols(input: TokenStream) -> TokenStream { counter += 1; } + // Generate the listed symbols. for symbol in &input.symbols.0 { let name = &symbol.name; let value = match &symbol.value { @@ -135,6 +138,19 @@ pub fn symbols(input: TokenStream) -> TokenStream { counter += 1; } + // Generate symbols for the strings "0", "1", ..., "9". + for n in 0..10 { + let n = n.to_string(); + check_dup(&n); + prefill_stream.extend(quote! { + #n, + }); + digits_stream.extend(quote! { + Symbol::new(#counter), + }); + counter += 1; + } + let tt = TokenStream::from(quote! { macro_rules! keywords { () => { @@ -145,6 +161,10 @@ pub fn symbols(input: TokenStream) -> TokenStream { macro_rules! symbols { () => { #symbols_stream + + pub const digits_array: &[Symbol; 10] = &[ + #digits_stream + ]; } } diff --git a/src/librustc_mir/interpret/validity.rs b/src/librustc_mir/interpret/validity.rs index 0d3ee830574..ccc38191a93 100644 --- a/src/librustc_mir/interpret/validity.rs +++ b/src/librustc_mir/interpret/validity.rs @@ -2,7 +2,7 @@ use std::fmt::Write; use std::hash::Hash; use std::ops::RangeInclusive; -use syntax_pos::symbol::Symbol; +use syntax_pos::symbol::{sym, Symbol}; use rustc::hir; use rustc::ty::layout::{self, Size, Align, TyLayout, LayoutOf, VariantIdx}; use rustc::ty; @@ -188,7 +188,7 @@ impl<'rt, 'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> ValidityVisitor<'rt, 'a, ' PathElem::ClosureVar(name.unwrap_or_else(|| { // Fall back to showing the field index. - Symbol::intern(&field.to_string()) + sym::integer(field) })) } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index deb76d6d70a..a06a84f162a 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1,7 +1,7 @@ use crate::ast::{self, Ident}; use crate::parse::ParseSess; use crate::parse::token::{self, Token}; -use crate::symbol::Symbol; +use crate::symbol::{sym, Symbol}; use crate::parse::unescape; use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char}; @@ -754,7 +754,7 @@ impl<'a> StringReader<'a> { } _ => { // just a 0 - return (token::Integer, self.name_from(start_bpos)); + return (token::Integer, sym::integer(0)); } } } else if c.is_digit(10) { diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 0305b1f59b9..80bb89ef81a 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -197,7 +197,7 @@ impl LitKind { ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), ast::LitIntType::Unsuffixed => None, }; - (token::Integer, Symbol::intern(&n.to_string()), suffix) + (token::Integer, sym::integer(n), suffix) } LitKind::Float(symbol, ty) => { (token::Float, symbol, Some(Symbol::intern(ty.ty_to_string()))) diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index ce75094de59..6167a90b22a 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -9,10 +9,10 @@ use rustc_data_structures::newtype_index; use rustc_macros::symbols; use serialize::{Decodable, Decoder, Encodable, Encoder}; -use std::fmt; -use std::str; use std::cmp::{PartialEq, Ordering, PartialOrd, Ord}; +use std::fmt; use std::hash::{Hash, Hasher}; +use std::str; use crate::hygiene::SyntaxContext; use crate::{Span, DUMMY_SP, GLOBALS}; @@ -102,6 +102,9 @@ symbols! { // Symbols that can be referred to with syntax_pos::sym::*. The symbol is // the stringified identifier unless otherwise specified (e.g. // `proc_dash_macro` represents "proc-macro"). + // + // As well as the symbols listed, there are symbols for the the strings + // "0", "1", ..., "9", which are accessible via `sym::integer`. Symbols { aarch64_target_feature, abi, @@ -966,8 +969,21 @@ pub mod kw { // This module has a very short name because it's used a lot. pub mod sym { + use std::convert::TryInto; use super::Symbol; + symbols!(); + + // Get the symbol for an integer. The first few non-negative integers each + // have a static symbol and therefore are fast. + pub fn integer + Copy + ToString>(n: N) -> Symbol { + if let Result::Ok(idx) = n.try_into() { + if let Option::Some(&sym) = digits_array.get(idx) { + return sym; + } + } + Symbol::intern(&n.to_string()) + } } impl Symbol { -- cgit 1.4.1-3-g733a5 From 21f28448e0cab81ad5697a9c01ef8dda9f730c27 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 23 May 2019 12:22:43 +1000 Subject: Add `to_symbol` methods. --- Cargo.lock | 1 + src/librustc_target/Cargo.toml | 1 + src/librustc_target/abi/mod.rs | 8 ++++++++ src/libsyntax/ast.rs | 24 +++++++++++++++++++++++- src/libsyntax/parse/literal.rs | 6 +++--- 5 files changed, 36 insertions(+), 4 deletions(-) (limited to 'src/libsyntax/parse') diff --git a/Cargo.lock b/Cargo.lock index 39364bad6f1..d5e2969e964 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3009,6 +3009,7 @@ dependencies = [ "rustc_cratesio_shim 0.0.0", "rustc_data_structures 0.0.0", "serialize 0.0.0", + "syntax_pos 0.0.0", ] [[package]] diff --git a/src/librustc_target/Cargo.toml b/src/librustc_target/Cargo.toml index ecea15a9922..3ab25146331 100644 --- a/src/librustc_target/Cargo.toml +++ b/src/librustc_target/Cargo.toml @@ -15,3 +15,4 @@ log = "0.4" rustc_cratesio_shim = { path = "../librustc_cratesio_shim" } rustc_data_structures = { path = "../librustc_data_structures" } serialize = { path = "../libserialize" } +syntax_pos = { path = "../libsyntax_pos" } diff --git a/src/librustc_target/abi/mod.rs b/src/librustc_target/abi/mod.rs index 4b61057e5cf..8fc5e6aae34 100644 --- a/src/librustc_target/abi/mod.rs +++ b/src/librustc_target/abi/mod.rs @@ -7,6 +7,7 @@ use std::fmt; use std::ops::{Add, Deref, Sub, Mul, AddAssign, Range, RangeInclusive}; use rustc_data_structures::indexed_vec::{Idx, IndexVec}; +use syntax_pos::symbol::{sym, Symbol}; pub mod call; @@ -552,6 +553,13 @@ impl FloatTy { } } + pub fn to_symbol(self) -> Symbol { + match self { + FloatTy::F32 => sym::f32, + FloatTy::F64 => sym::f64, + } + } + pub fn bit_width(self) -> usize { match self { FloatTy::F32 => 32, diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 3276f152575..75e83bd9f9c 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -10,7 +10,7 @@ use crate::parse::token; use crate::print::pprust; use crate::ptr::P; use crate::source_map::{dummy_spanned, respan, Spanned}; -use crate::symbol::{kw, Symbol}; +use crate::symbol::{kw, sym, Symbol}; use crate::tokenstream::TokenStream; use crate::ThinVec; @@ -1531,6 +1531,17 @@ impl IntTy { } } + pub fn to_symbol(&self) -> Symbol { + match *self { + IntTy::Isize => sym::isize, + IntTy::I8 => sym::i8, + IntTy::I16 => sym::i16, + IntTy::I32 => sym::i32, + IntTy::I64 => sym::i64, + IntTy::I128 => sym::i128, + } + } + pub fn val_to_string(&self, val: i128) -> String { // Cast to a `u128` so we can correctly print `INT128_MIN`. All integral types // are parsed as `u128`, so we wouldn't want to print an extra negative @@ -1572,6 +1583,17 @@ impl UintTy { } } + pub fn to_symbol(&self) -> Symbol { + match *self { + UintTy::Usize => sym::usize, + UintTy::U8 => sym::u8, + UintTy::U16 => sym::u16, + UintTy::U32 => sym::u32, + UintTy::U64 => sym::u64, + UintTy::U128 => sym::u128, + } + } + pub fn val_to_string(&self, val: u128) -> String { format!("{}{}", val, self.ty_to_string()) } diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 80bb89ef81a..58573093127 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -193,14 +193,14 @@ impl LitKind { } LitKind::Int(n, ty) => { let suffix = match ty { - ast::LitIntType::Unsigned(ty) => Some(Symbol::intern(ty.ty_to_string())), - ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), + ast::LitIntType::Unsigned(ty) => Some(ty.to_symbol()), + ast::LitIntType::Signed(ty) => Some(ty.to_symbol()), ast::LitIntType::Unsuffixed => None, }; (token::Integer, sym::integer(n), suffix) } LitKind::Float(symbol, ty) => { - (token::Float, symbol, Some(Symbol::intern(ty.ty_to_string()))) + (token::Float, symbol, Some(ty.to_symbol())) } LitKind::FloatUnsuffixed(symbol) => { (token::Float, symbol, None) -- cgit 1.4.1-3-g733a5 From 303bf1509bdf5ffd150539acf44f1c500c7079bd Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 23 May 2019 12:34:38 +1000 Subject: Avoid some re-interning in `to_lit_token`. --- src/libsyntax/parse/literal.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/libsyntax/parse') diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 58573093127..18019a89130 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -171,12 +171,15 @@ impl LitKind { /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). pub fn to_lit_token(&self) -> token::Lit { let (kind, symbol, suffix) = match *self { - LitKind::Str(string, ast::StrStyle::Cooked) => { - let escaped = string.as_str().escape_default().to_string(); - (token::Str, Symbol::intern(&escaped), None) + LitKind::Str(symbol, ast::StrStyle::Cooked) => { + // Don't re-intern unless the escaped string is different. + let s = &symbol.as_str(); + let escaped = s.escape_default().to_string(); + let symbol = if escaped == *s { symbol } else { Symbol::intern(&escaped) }; + (token::Str, symbol, None) } - LitKind::Str(string, ast::StrStyle::Raw(n)) => { - (token::StrRaw(n), string, None) + LitKind::Str(symbol, ast::StrStyle::Raw(n)) => { + (token::StrRaw(n), symbol, None) } LitKind::ByteStr(ref bytes) => { let string = bytes.iter().cloned().flat_map(ascii::escape_default) -- cgit 1.4.1-3-g733a5 From 8ae01a90088fd62987030ff382733ed67791d4f8 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 23 May 2019 15:31:43 +1000 Subject: Use `Symbol` equality in `is_ident_named`. --- src/libsyntax/parse/diagnostics.rs | 6 +++--- src/libsyntax/parse/parser.rs | 6 +++--- src/libsyntax/parse/token.rs | 4 ++-- src/libsyntax_pos/symbol.rs | 2 ++ 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'src/libsyntax/parse') diff --git a/src/libsyntax/parse/diagnostics.rs b/src/libsyntax/parse/diagnostics.rs index 9431b559da5..b3d49524d76 100644 --- a/src/libsyntax/parse/diagnostics.rs +++ b/src/libsyntax/parse/diagnostics.rs @@ -8,7 +8,7 @@ use crate::parse::parser::{BlockMode, PathStyle, SemiColonMode, TokenType, Token use crate::print::pprust; use crate::ptr::P; use crate::source_map::Spanned; -use crate::symbol::kw; +use crate::symbol::{kw, sym}; use crate::ThinVec; use crate::util::parser::AssocOp; use errors::{Applicability, DiagnosticBuilder, DiagnosticId}; @@ -263,7 +263,7 @@ impl<'a> Parser<'a> { }; self.last_unexpected_token_span = Some(self.span); let mut err = self.fatal(&msg_exp); - if self.token.is_ident_named("and") { + if self.token.is_ident_named(sym::and) { err.span_suggestion_short( self.span, "use `&&` instead of `and` for the boolean operator", @@ -271,7 +271,7 @@ impl<'a> Parser<'a> { Applicability::MaybeIncorrect, ); } - if self.token.is_ident_named("or") { + if self.token.is_ident_named(sym::or) { err.span_suggestion_short( self.span, "use `||` instead of `or` for the boolean operator", diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index e0430ac5563..07efeaa4cf2 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -2759,7 +2759,7 @@ impl<'a> Parser<'a> { let (span, e) = self.interpolated_or_expr_span(e)?; (lo.to(span), ExprKind::Box(e)) } - token::Ident(..) if self.token.is_ident_named("not") => { + token::Ident(..) if self.token.is_ident_named(sym::not) => { // `not` is just an ordinary identifier in Rust-the-language, // but as `rustc`-the-compiler, we can issue clever diagnostics // for confused users who really want to say `!` @@ -4592,7 +4592,7 @@ impl<'a> Parser<'a> { let do_not_suggest_help = self.token.is_keyword(kw::In) || self.token == token::Colon; - if self.token.is_ident_named("and") { + if self.token.is_ident_named(sym::and) { e.span_suggestion_short( self.span, "use `&&` instead of `and` for the boolean operator", @@ -4600,7 +4600,7 @@ impl<'a> Parser<'a> { Applicability::MaybeIncorrect, ); } - if self.token.is_ident_named("or") { + if self.token.is_ident_named(sym::or) { e.span_suggestion_short( self.span, "use `||` instead of `or` for the boolean operator", diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index e5361b2db4e..47185df8d61 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -391,9 +391,9 @@ impl Token { /// Returns `true` if the token is a identifier whose name is the given /// string slice. - crate fn is_ident_named(&self, name: &str) -> bool { + crate fn is_ident_named(&self, name: Symbol) -> bool { match self.ident() { - Some((ident, _)) => ident.as_str() == name, + Some((ident, _)) => ident.name == name, None => false } } diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index 6167a90b22a..da6c41adfd9 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -133,6 +133,7 @@ symbols! { allow_internal_unstable, allow_internal_unstable_backcompat_hack, always, + and, any, arbitrary_self_types, Arguments, @@ -420,6 +421,7 @@ symbols! { option, Option, opt_out_copy, + or, Ord, Ordering, Output, -- cgit 1.4.1-3-g733a5