about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorTobias Bucher <tobiasbucher5991@gmail.com>2014-07-01 15:10:22 +0200
committerTobias Bucher <tobiasbucher5991@gmail.com>2014-09-14 21:35:48 +0200
commitdbc3cb3a54c02211d8e4f9ff082c4ee2d544ec7d (patch)
tree3102fbb0e62625cc31092cbd3bf51d66b82ed2cd /src
parent21d1f4d7c0e637d8ae073798e666b880d31781b1 (diff)
downloadrust-dbc3cb3a54c02211d8e4f9ff082c4ee2d544ec7d.tar.gz
rust-dbc3cb3a54c02211d8e4f9ff082c4ee2d544ec7d.zip
Add support for in-place map for `Vec`s of types with same size
This is implemented using a new struct `PartialVec` which implements the proper
drop semantics in case the conversion is interrupted by an unwind.
Diffstat (limited to 'src')
-rw-r--r--src/libcollections/vec.rs258
1 files changed, 258 insertions, 0 deletions
diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs
index a7005cf454d..96592798f7a 100644
--- a/src/libcollections/vec.rs
+++ b/src/libcollections/vec.rs
@@ -1710,6 +1710,252 @@ pub mod raw {
     }
 }
 
+// TODO: Find some way to statically assert that `T` and `U` have the same
+// size.
+//
+/// An owned, partially type-converted vector.
+///
+/// No allocations are performed by usage, only a deallocation happens in the
+/// destructor which should only run when unwinding.
+///
+/// It can be used to convert a vector of `T`s into a vector of `U`s, by
+/// converting the individual elements one-by-one.
+///
+/// You may call the `push` method as often as you get a `Some(t)` from `pop`.
+/// After pushing the same number of `U`s as you got `T`s, you can `unwrap` the
+/// vector.
+///
+/// # Example
+///
+/// ```rust
+/// let pv = PartialVec::new(vec![0u, 1]);
+/// assert_eq!(pv.pop(), Some(0));
+/// assert_eq!(pv.pop(), Some(1));
+/// assert_eq!(pv.pop(), None);
+/// pv.push(2u);
+/// pv.push(3);
+/// assert_eq!(pv.unwrap(), vec![2, 3]);
+/// ```
+//
+// Upheld invariants:
+//
+// (a) `vec` isn't modified except when the `PartialVec` goes out of scope, the
+//     only thing it is used for is keeping the memory which the `PartialVec`
+//     uses for the inplace conversion.
+//
+// (b) `start_u` points to the start of the vector.
+//
+// (c) `end_u` points to one element beyond the vector.
+//
+// (d) `start_u` <= `end_u` <= `start_t` <= `end_t`.
+//
+// (e) From `start_u` (incl.) to `end_u` (excl.) there are sequential instances
+//     of type `U`.
+//
+// (f) From `start_t` (incl.) to `end_t` (excl.) there are sequential instances
+//     of type `T`.
+
+pub struct PartialVec<T,U> {
+    vec: Vec<T>,
+
+    start_u: *mut U,
+    end_u: *mut U,
+    start_t: *mut T,
+    end_t: *mut T,
+}
+
+impl<T,U> PartialVec<T,U> {
+    /// Creates a `PartialVec` from a `Vec`.
+    pub fn new(mut vec: Vec<T>) -> PartialVec<T,U> {
+        // TODO: do this statically
+        assert!(mem::size_of::<T>() != 0);
+        assert!(mem::size_of::<U>() != 0);
+        assert!(mem::size_of::<T>() == mem::size_of::<U>());
+
+        let start = vec.as_mut_ptr();
+
+        // This `as int` cast is safe, because the size of the elements of the
+        // vector is not 0, and:
+        //
+        // 1) If the size of the elements in the vector is 1, the `int` may
+        //    overflow, but it has the correct bit pattern so that the
+        //    `.offset()` function will work.
+        //
+        //    Example:
+        //        Address space 0x0-0xF.
+        //        `u8` array at: 0x1.
+        //        Size of `u8` array: 0x8.
+        //        Calculated `offset`: -0x8.
+        //        After `array.offset(offset)`: 0x9.
+        //        (0x1 + 0x8 = 0x1 - 0x8)
+        //
+        // 2) If the size of the elements in the vector is >1, the `uint` ->
+        //    `int` conversion can't overflow.
+        let offset = vec.len() as int;
+
+        let start_u = start as *mut U;
+        let end_u = start as *mut U;
+        let start_t = start;
+        let end_t = unsafe { start_t.offset(offset) };
+
+        // (b) is satisfied, `start_u` points to the start of `vec`.
+
+        // (c) is also satisfied, `end_t` points to the end of `vec`.
+
+        // `start_u == end_u == start_t <= end_t`, so also `start_u <= end_u <=
+        // start_t <= end_t`, thus (b).
+
+        // As `start_u == end_u`, it is represented correctly that there are no
+        // instances of `U` in `vec`, thus (e) is satisfied.
+
+        // At start, there are only elements of type `T` in `vec`, so (f) is
+        // satisfied, as `start_t` points to the start of `vec` and `end_t` to
+        // the end of it.
+
+        // This points inside the vector, as the vector has length `offset`.
+
+        PartialVec {
+            // (a) is satisfied, `vec` isn't modified in the function.
+            vec: vec,
+            start_u: start_u,
+            end_u: end_u,
+            start_t: start_t,
+            end_t: end_t,
+        }
+    }
+
+    /// Pops a `T` from the `PartialVec`.
+    ///
+    /// Returns `Some(t)` if there are more `T`s in the vector, otherwise
+    /// `None`.
+    fn pop(&mut self) -> Option<T> {
+        // The `if` ensures that there are more `T`s in `vec`.
+        if self.start_t < self.end_t {
+            let result;
+            unsafe {
+                // (f) is satisfied before, so in this if branch there actually
+                // is a `T` at `start_t`.  After shifting the pointer by one,
+                // (f) is again satisfied.
+                result = ptr::read(self.start_t as *const T);
+                self.start_t = self.start_t.offset(1);
+            }
+            Some(result)
+        } else {
+            None
+        }
+    }
+
+    /// Pushes a new `U` to the `PartialVec`.
+    ///
+    /// # Failure
+    ///
+    /// Fails if not enough `T`s were popped to have enough space for the new
+    /// `U`.
+    pub fn push(&mut self, value: U) {
+        // The assert assures that still `end_u <= start_t` (d) after
+        // the function.
+        assert!(self.end_u as *const () < self.start_t as *const (),
+            "writing more elements to PartialVec than reading from it")
+        unsafe {
+            // (e) is satisfied before, and after writing one `U`
+            // to `end_u` and shifting it by one, it's again
+            // satisfied.
+            ptr::write(self.end_u, value);
+            self.end_u = self.end_u.offset(1);
+        }
+    }
+
+    /// Unwraps the new `Vec` of `U`s after having pushed enough `U`s and
+    /// popped all `T`s.
+    ///
+    /// # Failure
+    ///
+    /// Fails if not all `T`s were popped, also fails if not the same amount of
+    /// `U`s was pushed before calling `unwrap`.
+    pub fn unwrap(self) -> Vec<U> {
+        // If `self.end_u == self.end_t`, we know from (e) that there are no
+        // more `T`s in `vec`, we also know that the whole length of `vec` is
+        // now used by `U`s, thus we can just transmute `vec` from a vector of
+        // `T`s to a vector of `U`s safely.
+
+        assert!(self.end_u as *const () == self.end_t as *const (),
+            "trying to unwrap a PartialVec before completing the writes to it");
+
+        // Extract `vec` and prevent the destructor of `PartialVec` from
+        // running.
+        unsafe {
+            let vec = ptr::read(&self.vec);
+            mem::forget(self);
+            mem::transmute(vec)
+        }
+    }
+}
+
+#[unsafe_destructor]
+impl<T,U> Drop for PartialVec<T,U> {
+    fn drop(&mut self) {
+        unsafe {
+            // As per (a) `vec` hasn't been modified until now. As it has a
+            // length currently, this would run destructors of `T`s which might
+            // not be there. So at first, set `vec`s length to `0`. This must
+            // be done at first to remain memory-safe as the destructors of `U`
+            // or `T` might cause unwinding where `vec`s destructor would be
+            // executed.
+            self.vec.set_len(0);
+
+            // As per (e) and (f) we have instances of `U`s and `T`s in `vec`.
+            // Destruct them.
+            while self.start_u < self.end_u {
+                let _ = ptr::read(self.start_u as *const U); // Run a `U` destructor.
+                self.start_u = self.start_u.offset(1);
+            }
+            while self.start_t < self.end_t {
+                let _ = ptr::read(self.start_t as *const T); // Run a `T` destructor.
+                self.start_t = self.start_t.offset(1);
+            }
+            // After this destructor ran, the destructor of `vec` will run,
+            // deallocating the underlying memory.
+        }
+    }
+}
+
+impl<T,U> Iterator<T> for PartialVec<T,U> {
+    fn next(&mut self) -> Option<T> {
+        self.pop()
+    }
+}
+
+impl<T> Vec<T> {
+    /// Converts a `Vec<T>` to a `Vec<U>` where `T` and `U` have the same size.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// let v = vec![0u, 1, 2];
+    /// let w = v.map_inplace(|i| i + 3);
+    /// assert_eq!(w.as_slice() == &[3, 4, 5]);
+    ///
+    /// let big_endian_u16s = vec![0x1122u16, 0x3344];
+    /// let u8s = big_endian_u16s.map_inplace(|x| [
+    ///     ((x >> 8) & 0xff) as u8,
+    ///     (x & 0xff) as u8
+    /// ]);
+    /// assert_eq!(u8s.as_slice() == &[[0x11, 0x22], [0x33, 0x44]]);
+    /// ```
+    pub fn map_inplace<U>(self, f: |T| -> U) -> Vec<U> {
+        let mut pv = PartialVec::new(self);
+        loop {
+            // TODO: need this extra assignment for borrowck to pass
+            let maybe_t = pv.pop();
+            match maybe_t {
+                Some(t) => pv.push(f(t)),
+                None => return pv.unwrap(),
+            };
+        }
+    }
+}
+
+
 #[cfg(test)]
 mod tests {
     extern crate test;
@@ -2039,6 +2285,18 @@ mod tests {
         assert_eq!(vec.as_ptr(), ptr);
         assert_eq!(vec.capacity(), 7);
         assert_eq!(vec.len(), 0);
+
+    #[test]
+    #[should_fail]
+    fn test_map_inplace_incompatible_types_fail() {
+        let v = vec![0u, 1, 2];
+        v.map_inplace(|_| ());
+    }
+
+    #[test]
+    fn test_map_inplace() {
+        let v = vec![0u, 1, 2];
+        assert_eq!(v.map_inplace(|i: uint| i as int - 1).as_slice, &[-1i, 0, 1]);
     }
 
     #[bench]