From 478f8287c0e2c35cda511fd3ac01b7ac78ee7cfe Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 2 Jun 2025 08:59:29 +1000 Subject: Introduce `ByteSymbol`. It's like `Symbol` but for byte strings. The interner is now used for both `Symbol` and `ByteSymbol`. E.g. if you intern `"dog"` and `b"dog"` you'll get a `Symbol` and a `ByteSymbol` with the same index and the characters will only be stored once. The motivation for this is to eliminate the `Arc`s in `ast::LitKind`, to make `ast::LitKind` impl `Copy`, and to avoid the need to arena-allocate `ast::LitKind` in HIR. The latter change reduces peak memory by a non-trivial amount on literal-heavy benchmarks such as `deep-vector` and `tuple-stress`. `Encoder`, `Decoder`, `SpanEncoder`, and `SpanDecoder` all get some changes so that they can handle normal strings and byte strings. This change does slow down compilation of programs that use `include_bytes!` on large files, because the contents of those files are now interned (hashed). This makes `include_bytes!` more similar to `include_str!`, though `include_bytes!` contents still aren't escaped, and hashing is still much cheaper than escaping. --- compiler/rustc_span/src/lib.rs | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) (limited to 'compiler/rustc_span/src/lib.rs') diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index c8a29a2f68f..3d3a681c798 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -66,7 +66,9 @@ mod span_encoding; pub use span_encoding::{DUMMY_SP, Span}; pub mod symbol; -pub use symbol::{Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym}; +pub use symbol::{ + ByteSymbol, Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym, +}; mod analyze_source_file; pub mod fatal_error; @@ -1184,11 +1186,12 @@ rustc_index::newtype_index! { /// It is similar to rustc_type_ir's TyEncoder. pub trait SpanEncoder: Encoder { fn encode_span(&mut self, span: Span); - fn encode_symbol(&mut self, symbol: Symbol); + fn encode_symbol(&mut self, sym: Symbol); + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol); fn encode_expn_id(&mut self, expn_id: ExpnId); fn encode_syntax_context(&mut self, syntax_context: SyntaxContext); - /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx. - /// Therefore, make sure to include the context when encode a `CrateNum`. + /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a + /// tcx. Therefore, make sure to include the context when encode a `CrateNum`. fn encode_crate_num(&mut self, crate_num: CrateNum); fn encode_def_index(&mut self, def_index: DefIndex); fn encode_def_id(&mut self, def_id: DefId); @@ -1201,8 +1204,12 @@ impl SpanEncoder for FileEncoder { span.hi.encode(self); } - fn encode_symbol(&mut self, symbol: Symbol) { - self.emit_str(symbol.as_str()); + fn encode_symbol(&mut self, sym: Symbol) { + self.emit_str(sym.as_str()); + } + + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) { + self.emit_byte_str(byte_sym.as_byte_str()); } fn encode_expn_id(&mut self, _expn_id: ExpnId) { @@ -1239,6 +1246,12 @@ impl Encodable for Symbol { } } +impl Encodable for ByteSymbol { + fn encode(&self, s: &mut E) { + s.encode_byte_symbol(*self); + } +} + impl Encodable for ExpnId { fn encode(&self, s: &mut E) { s.encode_expn_id(*self) @@ -1280,6 +1293,7 @@ impl Encodable for AttrId { pub trait SpanDecoder: Decoder { fn decode_span(&mut self) -> Span; fn decode_symbol(&mut self) -> Symbol; + fn decode_byte_symbol(&mut self) -> ByteSymbol; fn decode_expn_id(&mut self) -> ExpnId; fn decode_syntax_context(&mut self) -> SyntaxContext; fn decode_crate_num(&mut self) -> CrateNum; @@ -1300,6 +1314,10 @@ impl SpanDecoder for MemDecoder<'_> { Symbol::intern(self.read_str()) } + fn decode_byte_symbol(&mut self) -> ByteSymbol { + ByteSymbol::intern(self.read_byte_str()) + } + fn decode_expn_id(&mut self) -> ExpnId { panic!("cannot decode `ExpnId` with `MemDecoder`"); } @@ -1337,6 +1355,12 @@ impl Decodable for Symbol { } } +impl Decodable for ByteSymbol { + fn decode(s: &mut D) -> ByteSymbol { + s.decode_byte_symbol() + } +} + impl Decodable for ExpnId { fn decode(s: &mut D) -> ExpnId { s.decode_expn_id() -- cgit 1.4.1-3-g733a5