about summary refs log tree commit diff
path: root/compiler/rustc_serialize/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_serialize/src')
-rw-r--r--compiler/rustc_serialize/src/leb128.rs16
-rw-r--r--compiler/rustc_serialize/src/lib.rs1
-rw-r--r--compiler/rustc_serialize/src/opaque.rs155
-rw-r--r--compiler/rustc_serialize/src/serialize.rs2
4 files changed, 130 insertions, 44 deletions
diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs
index 7dad9aa01fa..e568b9e6786 100644
--- a/compiler/rustc_serialize/src/leb128.rs
+++ b/compiler/rustc_serialize/src/leb128.rs
@@ -1,3 +1,6 @@
+use crate::opaque::MemDecoder;
+use crate::serialize::Decoder;
+
 /// Returns the length of the longest LEB128 encoding for `T`, assuming `T` is an integer type
 pub const fn max_leb128_len<T>() -> usize {
     // The longest LEB128 encoding for an integer uses 7 bits per byte.
@@ -50,21 +53,19 @@ impl_write_unsigned_leb128!(write_usize_leb128, usize);
 macro_rules! impl_read_unsigned_leb128 {
     ($fn_name:ident, $int_ty:ty) => {
         #[inline]
-        pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty {
+        pub fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty {
             // The first iteration of this loop is unpeeled. This is a
             // performance win because this code is hot and integer values less
             // than 128 are very common, typically occurring 50-80% or more of
             // the time, even for u64 and u128.
-            let byte = slice[*position];
-            *position += 1;
+            let byte = decoder.read_u8();
             if (byte & 0x80) == 0 {
                 return byte as $int_ty;
             }
             let mut result = (byte & 0x7F) as $int_ty;
             let mut shift = 7;
             loop {
-                let byte = slice[*position];
-                *position += 1;
+                let byte = decoder.read_u8();
                 if (byte & 0x80) == 0 {
                     result |= (byte as $int_ty) << shift;
                     return result;
@@ -127,14 +128,13 @@ impl_write_signed_leb128!(write_isize_leb128, isize);
 macro_rules! impl_read_signed_leb128 {
     ($fn_name:ident, $int_ty:ty) => {
         #[inline]
-        pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty {
+        pub fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty {
             let mut result = 0;
             let mut shift = 0;
             let mut byte;
 
             loop {
-                byte = slice[*position];
-                *position += 1;
+                byte = decoder.read_u8();
                 result |= <$int_ty>::from(byte & 0x7F) << shift;
                 shift += 7;
 
diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs
index 1f8d2336c4e..ce8503918b4 100644
--- a/compiler/rustc_serialize/src/lib.rs
+++ b/compiler/rustc_serialize/src/lib.rs
@@ -16,6 +16,7 @@ Core encoding and decoding interfaces.
 #![feature(maybe_uninit_slice)]
 #![feature(new_uninit)]
 #![feature(allocator_api)]
+#![feature(ptr_sub_ptr)]
 #![cfg_attr(test, feature(test))]
 #![allow(rustc::internal)]
 #![deny(rustc::untranslatable_diagnostic)]
diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs
index 53e5c896736..b7976ea3b1c 100644
--- a/compiler/rustc_serialize/src/opaque.rs
+++ b/compiler/rustc_serialize/src/opaque.rs
@@ -2,7 +2,9 @@ use crate::leb128::{self, largest_max_leb128_len};
 use crate::serialize::{Decodable, Decoder, Encodable, Encoder};
 use std::fs::File;
 use std::io::{self, Write};
+use std::marker::PhantomData;
 use std::mem::MaybeUninit;
+use std::ops::Range;
 use std::path::Path;
 use std::ptr;
 
@@ -510,39 +512,126 @@ impl Encoder for FileEncoder {
 // Decoder
 // -----------------------------------------------------------------------------
 
+// Conceptually, `MemDecoder` wraps a `&[u8]` with a cursor into it that is always valid.
+// This is implemented with three pointers, two which represent the original slice and a
+// third that is our cursor.
+// It is an invariant of this type that start <= current <= end.
+// Additionally, the implementation of this type never modifies start and end.
 pub struct MemDecoder<'a> {
-    pub data: &'a [u8],
-    position: usize,
+    start: *const u8,
+    current: *const u8,
+    end: *const u8,
+    _marker: PhantomData<&'a u8>,
 }
 
 impl<'a> MemDecoder<'a> {
     #[inline]
     pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> {
-        MemDecoder { data, position }
+        let Range { start, end } = data.as_ptr_range();
+        MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }
     }
 
     #[inline]
-    pub fn position(&self) -> usize {
-        self.position
+    pub fn data(&self) -> &'a [u8] {
+        // SAFETY: This recovers the original slice, only using members we never modify.
+        unsafe { std::slice::from_raw_parts(self.start, self.len()) }
     }
 
     #[inline]
-    pub fn set_position(&mut self, pos: usize) {
-        self.position = pos
+    pub fn len(&self) -> usize {
+        // SAFETY: This recovers the length of the original slice, only using members we never modify.
+        unsafe { self.end.sub_ptr(self.start) }
+    }
+
+    #[inline]
+    pub fn remaining(&self) -> usize {
+        // SAFETY: This type guarantees current <= end.
+        unsafe { self.end.sub_ptr(self.current) }
+    }
+
+    #[cold]
+    #[inline(never)]
+    fn decoder_exhausted() -> ! {
+        panic!("MemDecoder exhausted")
     }
 
     #[inline]
-    pub fn advance(&mut self, bytes: usize) {
-        self.position += bytes;
+    fn read_byte(&mut self) -> u8 {
+        if self.current == self.end {
+            Self::decoder_exhausted();
+        }
+        // SAFETY: This type guarantees current <= end, and we just checked current == end.
+        unsafe {
+            let byte = *self.current;
+            self.current = self.current.add(1);
+            byte
+        }
+    }
+
+    #[inline]
+    fn read_array<const N: usize>(&mut self) -> [u8; N] {
+        self.read_raw_bytes(N).try_into().unwrap()
+    }
+
+    // The trait method doesn't have a lifetime parameter, and we need a version of this
+    // that definitely returns a slice based on the underlying storage as opposed to
+    // the Decoder itself in order to implement read_str efficiently.
+    #[inline]
+    fn read_raw_bytes_inherent(&mut self, bytes: usize) -> &'a [u8] {
+        if bytes > self.remaining() {
+            Self::decoder_exhausted();
+        }
+        // SAFETY: We just checked if this range is in-bounds above.
+        unsafe {
+            let slice = std::slice::from_raw_parts(self.current, bytes);
+            self.current = self.current.add(bytes);
+            slice
+        }
+    }
+
+    /// While we could manually expose manipulation of the decoder position,
+    /// all current users of that method would need to reset the position later,
+    /// incurring the bounds check of set_position twice.
+    #[inline]
+    pub fn with_position<F, T>(&mut self, pos: usize, func: F) -> T
+    where
+        F: Fn(&mut MemDecoder<'a>) -> T,
+    {
+        struct SetOnDrop<'a, 'guarded> {
+            decoder: &'guarded mut MemDecoder<'a>,
+            current: *const u8,
+        }
+        impl Drop for SetOnDrop<'_, '_> {
+            fn drop(&mut self) {
+                self.decoder.current = self.current;
+            }
+        }
+
+        if pos >= self.len() {
+            Self::decoder_exhausted();
+        }
+        let previous = self.current;
+        // SAFETY: We just checked if this add is in-bounds above.
+        unsafe {
+            self.current = self.start.add(pos);
+        }
+        let guard = SetOnDrop { current: previous, decoder: self };
+        func(guard.decoder)
     }
 }
 
 macro_rules! read_leb128 {
-    ($dec:expr, $fun:ident) => {{ leb128::$fun($dec.data, &mut $dec.position) }};
+    ($dec:expr, $fun:ident) => {{ leb128::$fun($dec) }};
 }
 
 impl<'a> Decoder for MemDecoder<'a> {
     #[inline]
+    fn position(&self) -> usize {
+        // SAFETY: This type guarantees start <= current
+        unsafe { self.current.sub_ptr(self.start) }
+    }
+
+    #[inline]
     fn read_u128(&mut self) -> u128 {
         read_leb128!(self, read_u128_leb128)
     }
@@ -559,17 +648,12 @@ impl<'a> Decoder for MemDecoder<'a> {
 
     #[inline]
     fn read_u16(&mut self) -> u16 {
-        let bytes = [self.data[self.position], self.data[self.position + 1]];
-        let value = u16::from_le_bytes(bytes);
-        self.position += 2;
-        value
+        u16::from_le_bytes(self.read_array())
     }
 
     #[inline]
     fn read_u8(&mut self) -> u8 {
-        let value = self.data[self.position];
-        self.position += 1;
-        value
+        self.read_byte()
     }
 
     #[inline]
@@ -594,17 +678,12 @@ impl<'a> Decoder for MemDecoder<'a> {
 
     #[inline]
     fn read_i16(&mut self) -> i16 {
-        let bytes = [self.data[self.position], self.data[self.position + 1]];
-        let value = i16::from_le_bytes(bytes);
-        self.position += 2;
-        value
+        i16::from_le_bytes(self.read_array())
     }
 
     #[inline]
     fn read_i8(&mut self) -> i8 {
-        let value = self.data[self.position];
-        self.position += 1;
-        value as i8
+        self.read_byte() as i8
     }
 
     #[inline]
@@ -625,22 +704,26 @@ impl<'a> Decoder for MemDecoder<'a> {
     }
 
     #[inline]
-    fn read_str(&mut self) -> &'a str {
+    fn read_str(&mut self) -> &str {
         let len = self.read_usize();
-        let sentinel = self.data[self.position + len];
-        assert!(sentinel == STR_SENTINEL);
-        let s = unsafe {
-            std::str::from_utf8_unchecked(&self.data[self.position..self.position + len])
-        };
-        self.position += len + 1;
-        s
+        let bytes = self.read_raw_bytes_inherent(len + 1);
+        assert!(bytes[len] == STR_SENTINEL);
+        unsafe { std::str::from_utf8_unchecked(&bytes[..len]) }
     }
 
     #[inline]
-    fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] {
-        let start = self.position;
-        self.position += bytes;
-        &self.data[start..self.position]
+    fn read_raw_bytes(&mut self, bytes: usize) -> &[u8] {
+        self.read_raw_bytes_inherent(bytes)
+    }
+
+    #[inline]
+    fn peek_byte(&self) -> u8 {
+        if self.current == self.end {
+            Self::decoder_exhausted();
+        }
+        // SAFETY: This type guarantees current is inbounds or one-past-the-end, which is end.
+        // Since we just checked current == end, the current pointer must be inbounds.
+        unsafe { *self.current }
     }
 }
 
diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs
index 527abc23727..a6d9c7b7d42 100644
--- a/compiler/rustc_serialize/src/serialize.rs
+++ b/compiler/rustc_serialize/src/serialize.rs
@@ -84,6 +84,8 @@ pub trait Decoder {
     fn read_char(&mut self) -> char;
     fn read_str(&mut self) -> &str;
     fn read_raw_bytes(&mut self, len: usize) -> &[u8];
+    fn peek_byte(&self) -> u8;
+    fn position(&self) -> usize;
 }
 
 /// Trait for types that can be serialized