about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-03-30 09:03:29 +0000
committerbors <bors@rust-lang.org>2021-03-30 09:03:29 +0000
commit689e8470ffa7aeca17cfee428704a4d6c4148c11 (patch)
treeb44441d2619e490f651ee2bd5438ad81d48d6e34
parenta0e229abadfe47df74625309fba44c0f3b8a0649 (diff)
parent8c8841811414ba3348f8c604b0ce01200cc8be91 (diff)
downloadrust-689e8470ffa7aeca17cfee428704a4d6c4148c11.tar.gz
rust-689e8470ffa7aeca17cfee428704a4d6c4148c11.zip
Auto merge of #83458 - saethlin:improve-vec-benches, r=dtolnay
Clean up Vec's benchmarks

The Vec benchmarks need a lot of love. I sort of noticed this in https://github.com/rust-lang/rust/pull/83357 but the overall situation is much less awesome than I thought at the time. The first commit just removes a lot of asserts and does a touch of other cleanup.

A number of these benchmarks are poorly-named. For example, `bench_map_fast` is not in fact fast, `bench_rev_1` and `bench_rev_2` are vague, `bench_in_place_zip_iter_mut` doesn't call `zip`, `bench_in_place*` don't do anything in-place... Should I fix these, or is there tooling that depend on the names not changing?

I've also noticed that `bench_rev_1` and `bench_rev_2` are remarkably fragile. It looks like poking other code in `Vec` can cause the codegen of this benchmark to switch to a version that has almost exactly half its current throughput and I have absolutely no idea why.

Here's the fast version:
```asm
  0.69 │110:   movdqu -0x20(%rbx,%rdx,4),%xmm0
  1.76 │       movdqu -0x10(%rbx,%rdx,4),%xmm1
  0.71 │       pshufd $0x1b,%xmm1,%xmm1
  0.60 │       pshufd $0x1b,%xmm0,%xmm0
  3.68 │       movdqu %xmm1,-0x30(%rcx)
 14.36 │       movdqu %xmm0,-0x20(%rcx)
 13.88 │       movdqu -0x40(%rbx,%rdx,4),%xmm0
  6.64 │       movdqu -0x30(%rbx,%rdx,4),%xmm1
  0.76 │       pshufd $0x1b,%xmm1,%xmm1
  0.77 │       pshufd $0x1b,%xmm0,%xmm0
  1.87 │       movdqu %xmm1,-0x10(%rcx)
 13.01 │       movdqu %xmm0,(%rcx)
 38.81 │       add    $0x40,%rcx
  0.92 │       add    $0xfffffffffffffff0,%rdx
  1.22 │     ↑ jne    110
```
And the slow one:
```asm
  0.42 │9a880:   movdqa     %xmm2,%xmm1
  4.03 │9a884:   movq       -0x8(%rbx,%rsi,4),%xmm4
  8.49 │9a88a:   pshufd     $0xe1,%xmm4,%xmm4
  2.58 │9a88f:   movq       -0x10(%rbx,%rsi,4),%xmm5
  7.02 │9a895:   pshufd     $0xe1,%xmm5,%xmm5
  4.79 │9a89a:   punpcklqdq %xmm5,%xmm4
  5.77 │9a89e:   movdqu     %xmm4,-0x18(%rdx)
 15.74 │9a8a3:   movq       -0x18(%rbx,%rsi,4),%xmm4
  3.91 │9a8a9:   pshufd     $0xe1,%xmm4,%xmm4
  5.04 │9a8ae:   movq       -0x20(%rbx,%rsi,4),%xmm5
  5.29 │9a8b4:   pshufd     $0xe1,%xmm5,%xmm5
  4.60 │9a8b9:   punpcklqdq %xmm5,%xmm4
  9.81 │9a8bd:   movdqu     %xmm4,-0x8(%rdx)
 11.05 │9a8c2:   paddq      %xmm3,%xmm0
  0.86 │9a8c6:   paddq      %xmm3,%xmm2
  5.89 │9a8ca:   add        $0x20,%rdx
  0.12 │9a8ce:   add        $0xfffffffffffffff8,%rsi
  1.16 │9a8d2:   add        $0x2,%rdi
  2.96 │9a8d6: → jne        9a880 <<alloc::vec::Vec<T,A> as core::iter::traits::collect::Extend<&T>>::extend+0xd0>
```
-rw-r--r--library/alloc/benches/vec.rs93
1 files changed, 25 insertions, 68 deletions
diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs
index 7a098219ce4..48709e89823 100644
--- a/library/alloc/benches/vec.rs
+++ b/library/alloc/benches/vec.rs
@@ -4,23 +4,13 @@ use test::{black_box, Bencher};
 
 #[bench]
 fn bench_new(b: &mut Bencher) {
-    b.iter(|| {
-        let v: Vec<u32> = Vec::new();
-        assert_eq!(v.len(), 0);
-        assert_eq!(v.capacity(), 0);
-        v
-    })
+    b.iter(|| Vec::<u32>::new())
 }
 
 fn do_bench_with_capacity(b: &mut Bencher, src_len: usize) {
     b.bytes = src_len as u64;
 
-    b.iter(|| {
-        let v: Vec<u32> = Vec::with_capacity(src_len);
-        assert_eq!(v.len(), 0);
-        assert_eq!(v.capacity(), src_len);
-        v
-    })
+    b.iter(|| Vec::<u32>::with_capacity(src_len))
 }
 
 #[bench]
@@ -46,12 +36,7 @@ fn bench_with_capacity_1000(b: &mut Bencher) {
 fn do_bench_from_fn(b: &mut Bencher, src_len: usize) {
     b.bytes = src_len as u64;
 
-    b.iter(|| {
-        let dst = (0..src_len).collect::<Vec<_>>();
-        assert_eq!(dst.len(), src_len);
-        assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
-        dst
-    })
+    b.iter(|| (0..src_len).collect::<Vec<_>>())
 }
 
 #[bench]
@@ -77,12 +62,7 @@ fn bench_from_fn_1000(b: &mut Bencher) {
 fn do_bench_from_elem(b: &mut Bencher, src_len: usize) {
     b.bytes = src_len as u64;
 
-    b.iter(|| {
-        let dst: Vec<usize> = repeat(5).take(src_len).collect();
-        assert_eq!(dst.len(), src_len);
-        assert!(dst.iter().all(|x| *x == 5));
-        dst
-    })
+    b.iter(|| repeat(5).take(src_len).collect::<Vec<usize>>())
 }
 
 #[bench]
@@ -110,12 +90,7 @@ fn do_bench_from_slice(b: &mut Bencher, src_len: usize) {
 
     b.bytes = src_len as u64;
 
-    b.iter(|| {
-        let dst = src.clone()[..].to_vec();
-        assert_eq!(dst.len(), src_len);
-        assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
-        dst
-    });
+    b.iter(|| src.as_slice().to_vec());
 }
 
 #[bench]
@@ -144,9 +119,7 @@ fn do_bench_from_iter(b: &mut Bencher, src_len: usize) {
     b.bytes = src_len as u64;
 
     b.iter(|| {
-        let dst: Vec<_> = FromIterator::from_iter(src.clone());
-        assert_eq!(dst.len(), src_len);
-        assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
+        let dst: Vec<_> = FromIterator::from_iter(src.iter().cloned());
         dst
     });
 }
@@ -180,8 +153,6 @@ fn do_bench_extend(b: &mut Bencher, dst_len: usize, src_len: usize) {
     b.iter(|| {
         let mut dst = dst.clone();
         dst.extend(src.clone());
-        assert_eq!(dst.len(), dst_len + src_len);
-        assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
         dst
     });
 }
@@ -230,8 +201,6 @@ fn do_bench_extend_from_slice(b: &mut Bencher, dst_len: usize, src_len: usize) {
     b.iter(|| {
         let mut dst = dst.clone();
         dst.extend_from_slice(&src);
-        assert_eq!(dst.len(), dst_len + src_len);
-        assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
         dst
     });
 }
@@ -290,12 +259,7 @@ fn do_bench_clone(b: &mut Bencher, src_len: usize) {
 
     b.bytes = src_len as u64;
 
-    b.iter(|| {
-        let dst = src.clone();
-        assert_eq!(dst.len(), src_len);
-        assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
-        dst
-    });
+    b.iter(|| src.clone());
 }
 
 #[bench]
@@ -329,8 +293,7 @@ fn do_bench_clone_from(b: &mut Bencher, times: usize, dst_len: usize, src_len: u
 
         for _ in 0..times {
             dst.clone_from(&src);
-            assert_eq!(dst.len(), src_len);
-            assert!(dst.iter().enumerate().all(|(i, x)| dst_len + i == *x));
+            dst = black_box(dst);
         }
         dst
     });
@@ -463,11 +426,10 @@ macro_rules! bench_in_place {
             fn $fname(b: &mut Bencher) {
                 b.iter(|| {
                     let src: Vec<$type> = black_box(vec![$init; $count]);
-                    let mut sink = src.into_iter()
+                    src.into_iter()
                         .enumerate()
                         .map(|(idx, e)| idx as $type ^ e)
-                        .collect::<Vec<$type>>();
-                    black_box(sink.as_mut_ptr())
+                        .collect::<Vec<$type>>()
                 });
             }
         )+
@@ -527,7 +489,6 @@ fn bench_in_place_zip_recycle(b: &mut Bencher) {
             .enumerate()
             .map(|(i, (d, s))| d.wrapping_add(i as u8) ^ s)
             .collect::<Vec<_>>();
-        assert_eq!(mangled.len(), 1000);
         data = black_box(mangled);
     });
 }
@@ -614,23 +575,6 @@ fn bench_nest_chain_chain_collect(b: &mut Bencher) {
     });
 }
 
-pub fn example_plain_slow(l: &[u32]) -> Vec<u32> {
-    let mut result = Vec::with_capacity(l.len());
-    result.extend(l.iter().rev());
-    result
-}
-
-pub fn map_fast(l: &[(u32, u32)]) -> Vec<u32> {
-    let mut result = Vec::with_capacity(l.len());
-    for i in 0..l.len() {
-        unsafe {
-            *result.get_unchecked_mut(i) = l[i].0;
-            result.set_len(i);
-        }
-    }
-    result
-}
-
 #[bench]
 fn bench_range_map_collect(b: &mut Bencher) {
     b.iter(|| (0..LEN).map(|_| u32::default()).collect::<Vec<_>>());
@@ -669,7 +613,11 @@ fn bench_rev_1(b: &mut Bencher) {
 #[bench]
 fn bench_rev_2(b: &mut Bencher) {
     let data = black_box([0; LEN]);
-    b.iter(|| example_plain_slow(&data));
+    b.iter(|| {
+        let mut v = Vec::<u32>::with_capacity(data.len());
+        v.extend(data.iter().rev());
+        v
+    });
 }
 
 #[bench]
@@ -685,7 +633,16 @@ fn bench_map_regular(b: &mut Bencher) {
 #[bench]
 fn bench_map_fast(b: &mut Bencher) {
     let data = black_box([(0, 0); LEN]);
-    b.iter(|| map_fast(&data));
+    b.iter(|| {
+        let mut result = Vec::with_capacity(data.len());
+        for i in 0..data.len() {
+            unsafe {
+                *result.get_unchecked_mut(i) = data[i].0;
+                result.set_len(i);
+            }
+        }
+        result
+    });
 }
 
 fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) {