about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-03-22 01:16:29 +0000
committerbors <bors@rust-lang.org>2021-03-22 01:16:29 +0000
commit35385770ae1ea86a911cc44ac43f856831e44b26 (patch)
tree509eb08317fe567c9294f2162699bb310266f52c
parent97663b6690689379aa0493deb494dfe14627c46b (diff)
parent2d8be457434103bfeeea59f1882a15760f2a5c70 (diff)
downloadrust-35385770ae1ea86a911cc44ac43f856831e44b26.tar.gz
rust-35385770ae1ea86a911cc44ac43f856831e44b26.zip
Auto merge of #79846 - the8472:inplace-tra, r=m-ou-se
Use TrustedRandomAccess for in-place iterators where possible

This can speed up in-place iterators containing simple casts and transmutes from `Copy` types to any type of same size. `!Copy` types can't be optimized since `TrustedRandomAccess`  isn't implemented for those iterators.

```
 name                  on.b ns/iter     o1.b ns/iter     diff ns/iter   diff %  speedup
 vec::bench_transmute  20 (40000 MB/s)  12 (66666 MB/s)            -8  -40.00%   x 1.67
```
-rw-r--r--library/alloc/benches/vec.rs16
-rw-r--r--library/alloc/src/vec/source_iter_marker.rs69
-rw-r--r--src/test/codegen/vec-in-place.rs14
3 files changed, 83 insertions, 16 deletions
diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs
index 73eb353f6e7..7a098219ce4 100644
--- a/library/alloc/benches/vec.rs
+++ b/library/alloc/benches/vec.rs
@@ -548,6 +548,22 @@ fn bench_in_place_zip_iter_mut(b: &mut Bencher) {
     black_box(data);
 }
 
+pub fn vec_cast<T, U>(input: Vec<T>) -> Vec<U> {
+    input.into_iter().map(|e| unsafe { std::mem::transmute_copy(&e) }).collect()
+}
+
+#[bench]
+fn bench_transmute(b: &mut Bencher) {
+    let mut vec = vec![10u32; 100];
+    b.bytes = 800; // 2 casts x 4 bytes x 100
+    b.iter(|| {
+        let v = std::mem::take(&mut vec);
+        let v = black_box(vec_cast::<u32, i32>(v));
+        let v = black_box(vec_cast::<i32, u32>(v));
+        vec = v;
+    });
+}
+
 #[derive(Clone)]
 struct Droppable(usize);
 
diff --git a/library/alloc/src/vec/source_iter_marker.rs b/library/alloc/src/vec/source_iter_marker.rs
index 8c0e95559fa..50882fc1767 100644
--- a/library/alloc/src/vec/source_iter_marker.rs
+++ b/library/alloc/src/vec/source_iter_marker.rs
@@ -1,4 +1,4 @@
-use core::iter::{InPlaceIterable, SourceIter};
+use core::iter::{InPlaceIterable, SourceIter, TrustedRandomAccess};
 use core::mem::{self, ManuallyDrop};
 use core::ptr::{self};
 
@@ -52,16 +52,7 @@ where
             )
         };
 
-        // use try-fold since
-        // - it vectorizes better for some iterator adapters
-        // - unlike most internal iteration methods, it only takes a &mut self
-        // - it lets us thread the write pointer through its innards and get it back in the end
-        let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf };
-        let sink = iterator
-            .try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(dst_end))
-            .unwrap();
-        // iteration succeeded, don't drop head
-        let dst = ManuallyDrop::new(sink).dst;
+        let len = SpecInPlaceCollect::collect_in_place(&mut iterator, dst_buf, dst_end);
 
         let src = unsafe { iterator.as_inner().as_into_iter() };
         // check if SourceIter contract was upheld
@@ -72,7 +63,7 @@ where
         // then the source pointer will stay in its initial position and we can't use it as reference
         if src.ptr != src_ptr {
             debug_assert!(
-                dst as *const _ <= src.ptr,
+                unsafe { dst_buf.add(len) as *const _ } <= src.ptr,
                 "InPlaceIterable contract violation, write pointer advanced beyond read pointer"
             );
         }
@@ -82,10 +73,7 @@ where
         // but prevent drop of the allocation itself once IntoIter goes out of scope
         src.forget_allocation();
 
-        let vec = unsafe {
-            let len = dst.offset_from(dst_buf) as usize;
-            Vec::from_raw_parts(dst_buf, len, cap)
-        };
+        let vec = unsafe { Vec::from_raw_parts(dst_buf, len, cap) };
 
         vec
     }
@@ -106,3 +94,52 @@ fn write_in_place_with_drop<T>(
         Ok(sink)
     }
 }
+
+/// Helper trait to hold specialized implementations of the in-place iterate-collect loop
+trait SpecInPlaceCollect<T, I>: Iterator<Item = T> {
+    /// Collects an iterator (`self`) into the destination buffer (`dst`) and returns the number of items
+    /// collected. `end` is the last writable element of the allocation and used for bounds checks.
+    fn collect_in_place(&mut self, dst: *mut T, end: *const T) -> usize;
+}
+
+impl<T, I> SpecInPlaceCollect<T, I> for I
+where
+    I: Iterator<Item = T>,
+{
+    #[inline]
+    default fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize {
+        // use try-fold since
+        // - it vectorizes better for some iterator adapters
+        // - unlike most internal iteration methods, it only takes a &mut self
+        // - it lets us thread the write pointer through its innards and get it back in the end
+        let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf };
+        let sink =
+            self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).unwrap();
+        // iteration succeeded, don't drop head
+        unsafe { ManuallyDrop::new(sink).dst.offset_from(dst_buf) as usize }
+    }
+}
+
+impl<T, I> SpecInPlaceCollect<T, I> for I
+where
+    I: Iterator<Item = T> + TrustedRandomAccess,
+{
+    #[inline]
+    fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize {
+        let len = self.size();
+        let mut drop_guard = InPlaceDrop { inner: dst_buf, dst: dst_buf };
+        for i in 0..len {
+            // Safety: InplaceIterable contract guarantees that for every element we read
+            // one slot in the underlying storage will have been freed up and we can immediately
+            // write back the result.
+            unsafe {
+                let dst = dst_buf.offset(i as isize);
+                debug_assert!(dst as *const _ <= end, "InPlaceIterable contract violation");
+                ptr::write(dst, self.__iterator_get_unchecked(i));
+                drop_guard.dst = dst.add(1);
+            }
+        }
+        mem::forget(drop_guard);
+        len
+    }
+}
diff --git a/src/test/codegen/vec-in-place.rs b/src/test/codegen/vec-in-place.rs
new file mode 100644
index 00000000000..72ed7492be9
--- /dev/null
+++ b/src/test/codegen/vec-in-place.rs
@@ -0,0 +1,14 @@
+// ignore-debug: the debug assertions get in the way
+// compile-flags: -O
+// min-llvm-version: 11.0
+#![crate_type = "lib"]
+
+// Ensure that trivial casts of vec elements are O(1)
+
+// CHECK-LABEL: @vec_iterator_cast
+#[no_mangle]
+pub fn vec_iterator_cast(vec: Vec<isize>) -> Vec<usize> {
+    // CHECK-NOT: loop
+    // CHECK-NOT: call
+    vec.into_iter().map(|e| e as usize).collect()
+}