about summary refs log tree commit diff
path: root/compiler/rustc_serialize/src/serialize.rs
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2023-04-28 09:06:57 +1000
committerNicholas Nethercote <n.nethercote@gmail.com>2023-04-28 18:34:54 +1000
commit7a16d25365b5f0aa815948237c46fb1843386d7a (patch)
treea3d90080fd95337b23238cd73eca3ee6066bf95e /compiler/rustc_serialize/src/serialize.rs
parentfa133f5354ac29096d1577d5ba9c1400c2ad3b0f (diff)
downloadrust-7a16d25365b5f0aa815948237c46fb1843386d7a.tar.gz
rust-7a16d25365b5f0aa815948237c46fb1843386d7a.zip
Add some provided methods to `Encoder`/`Decoder`.
The methods for `i8`, `bool`, `char`, `str` are the same for all impls,
because they layered on top of other methods.
Diffstat (limited to 'compiler/rustc_serialize/src/serialize.rs')
-rw-r--r--compiler/rustc_serialize/src/serialize.rs64
1 files changed, 56 insertions, 8 deletions
diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs
index 79c2f76c01c..e1bc598736f 100644
--- a/compiler/rustc_serialize/src/serialize.rs
+++ b/compiler/rustc_serialize/src/serialize.rs
@@ -12,6 +12,13 @@ use std::path;
 use std::rc::Rc;
 use std::sync::Arc;
 
+/// A byte that [cannot occur in UTF8 sequences][utf8]. Used to mark the end of a string.
+/// This way we can skip validation and still be relatively sure that deserialization
+/// did not desynchronize.
+///
+/// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout
+const STR_SENTINEL: u8 = 0xC1;
+
 /// A note about error handling.
 ///
 /// Encoders may be fallible, but in practice failure is rare and there are so
@@ -40,10 +47,29 @@ pub trait Encoder {
     fn emit_i64(&mut self, v: i64);
     fn emit_i32(&mut self, v: i32);
     fn emit_i16(&mut self, v: i16);
-    fn emit_i8(&mut self, v: i8);
-    fn emit_bool(&mut self, v: bool);
-    fn emit_char(&mut self, v: char);
-    fn emit_str(&mut self, v: &str);
+
+    #[inline]
+    fn emit_i8(&mut self, v: i8) {
+        self.emit_u8(v as u8);
+    }
+
+    #[inline]
+    fn emit_bool(&mut self, v: bool) {
+        self.emit_u8(if v { 1 } else { 0 });
+    }
+
+    #[inline]
+    fn emit_char(&mut self, v: char) {
+        self.emit_u32(v as u32);
+    }
+
+    #[inline]
+    fn emit_str(&mut self, v: &str) {
+        self.emit_usize(v.len());
+        self.emit_raw_bytes(v.as_bytes());
+        self.emit_u8(STR_SENTINEL);
+    }
+
     fn emit_raw_bytes(&mut self, s: &[u8]);
 
     fn emit_enum_variant<F>(&mut self, v_id: usize, f: F)
@@ -79,10 +105,32 @@ pub trait Decoder {
     fn read_i64(&mut self) -> i64;
     fn read_i32(&mut self) -> i32;
     fn read_i16(&mut self) -> i16;
-    fn read_i8(&mut self) -> i8;
-    fn read_bool(&mut self) -> bool;
-    fn read_char(&mut self) -> char;
-    fn read_str(&mut self) -> &str;
+
+    #[inline]
+    fn read_i8(&mut self) -> i8 {
+        self.read_u8() as i8
+    }
+
+    #[inline]
+    fn read_bool(&mut self) -> bool {
+        let value = self.read_u8();
+        value != 0
+    }
+
+    #[inline]
+    fn read_char(&mut self) -> char {
+        let bits = self.read_u32();
+        std::char::from_u32(bits).unwrap()
+    }
+
+    #[inline]
+    fn read_str(&mut self) -> &str {
+        let len = self.read_usize();
+        let bytes = self.read_raw_bytes(len + 1);
+        assert!(bytes[len] == STR_SENTINEL);
+        unsafe { std::str::from_utf8_unchecked(&bytes[..len]) }
+    }
+
     fn read_raw_bytes(&mut self, len: usize) -> &[u8];
 
     // Although there is an `emit_enum_variant` method in `Encoder`, the code