diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2025-06-02 08:59:29 +1000 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2025-06-30 20:42:27 +1000 |
| commit | 478f8287c0e2c35cda511fd3ac01b7ac78ee7cfe (patch) | |
| tree | 4d8f19b4e4e440ed8a22ee809ce2a565707d4c27 /compiler/rustc_span/src/lib.rs | |
| parent | ed2d759783dc9de134bbb3f01085b1e6dbf539f3 (diff) | |
| download | rust-478f8287c0e2c35cda511fd3ac01b7ac78ee7cfe.tar.gz rust-478f8287c0e2c35cda511fd3ac01b7ac78ee7cfe.zip | |
Introduce `ByteSymbol`.
It's like `Symbol` but for byte strings. The interner is now used for both `Symbol` and `ByteSymbol`. E.g. if you intern `"dog"` and `b"dog"` you'll get a `Symbol` and a `ByteSymbol` with the same index and the characters will only be stored once. The motivation for this is to eliminate the `Arc`s in `ast::LitKind`, to make `ast::LitKind` impl `Copy`, and to avoid the need to arena-allocate `ast::LitKind` in HIR. The latter change reduces peak memory by a non-trivial amount on literal-heavy benchmarks such as `deep-vector` and `tuple-stress`. `Encoder`, `Decoder`, `SpanEncoder`, and `SpanDecoder` all get some changes so that they can handle normal strings and byte strings. This change does slow down compilation of programs that use `include_bytes!` on large files, because the contents of those files are now interned (hashed). This makes `include_bytes!` more similar to `include_str!`, though `include_bytes!` contents still aren't escaped, and hashing is still much cheaper than escaping.
Diffstat (limited to 'compiler/rustc_span/src/lib.rs')
| -rw-r--r-- | compiler/rustc_span/src/lib.rs | 36 |
1 files changed, 30 insertions, 6 deletions
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index c8a29a2f68f..3d3a681c798 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -66,7 +66,9 @@ mod span_encoding; pub use span_encoding::{DUMMY_SP, Span}; pub mod symbol; -pub use symbol::{Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym}; +pub use symbol::{ + ByteSymbol, Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym, +}; mod analyze_source_file; pub mod fatal_error; @@ -1184,11 +1186,12 @@ rustc_index::newtype_index! { /// It is similar to rustc_type_ir's TyEncoder. pub trait SpanEncoder: Encoder { fn encode_span(&mut self, span: Span); - fn encode_symbol(&mut self, symbol: Symbol); + fn encode_symbol(&mut self, sym: Symbol); + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol); fn encode_expn_id(&mut self, expn_id: ExpnId); fn encode_syntax_context(&mut self, syntax_context: SyntaxContext); - /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx. - /// Therefore, make sure to include the context when encode a `CrateNum`. + /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a + /// tcx. Therefore, make sure to include the context when encode a `CrateNum`. fn encode_crate_num(&mut self, crate_num: CrateNum); fn encode_def_index(&mut self, def_index: DefIndex); fn encode_def_id(&mut self, def_id: DefId); @@ -1201,8 +1204,12 @@ impl SpanEncoder for FileEncoder { span.hi.encode(self); } - fn encode_symbol(&mut self, symbol: Symbol) { - self.emit_str(symbol.as_str()); + fn encode_symbol(&mut self, sym: Symbol) { + self.emit_str(sym.as_str()); + } + + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) { + self.emit_byte_str(byte_sym.as_byte_str()); } fn encode_expn_id(&mut self, _expn_id: ExpnId) { @@ -1239,6 +1246,12 @@ impl<E: SpanEncoder> Encodable<E> for Symbol { } } +impl<E: SpanEncoder> Encodable<E> for ByteSymbol { + fn encode(&self, s: &mut E) { + s.encode_byte_symbol(*self); + } +} + impl<E: SpanEncoder> Encodable<E> for ExpnId { fn encode(&self, s: &mut E) { s.encode_expn_id(*self) @@ -1280,6 +1293,7 @@ impl<E: SpanEncoder> Encodable<E> for AttrId { pub trait SpanDecoder: Decoder { fn decode_span(&mut self) -> Span; fn decode_symbol(&mut self) -> Symbol; + fn decode_byte_symbol(&mut self) -> ByteSymbol; fn decode_expn_id(&mut self) -> ExpnId; fn decode_syntax_context(&mut self) -> SyntaxContext; fn decode_crate_num(&mut self) -> CrateNum; @@ -1300,6 +1314,10 @@ impl SpanDecoder for MemDecoder<'_> { Symbol::intern(self.read_str()) } + fn decode_byte_symbol(&mut self) -> ByteSymbol { + ByteSymbol::intern(self.read_byte_str()) + } + fn decode_expn_id(&mut self) -> ExpnId { panic!("cannot decode `ExpnId` with `MemDecoder`"); } @@ -1337,6 +1355,12 @@ impl<D: SpanDecoder> Decodable<D> for Symbol { } } +impl<D: SpanDecoder> Decodable<D> for ByteSymbol { + fn decode(s: &mut D) -> ByteSymbol { + s.decode_byte_symbol() + } +} + impl<D: SpanDecoder> Decodable<D> for ExpnId { fn decode(s: &mut D) -> ExpnId { s.decode_expn_id() |
