about summary refs log tree commit diff
path: root/library/alloc/src/vec
diff options
context:
space:
mode:
authorJoshua Wong <joshuawong@anticentri.st>2024-04-12 18:15:02 -0500
committerJoshua Wong <joshuawong@anticentri.st>2024-05-18 18:30:20 -0500
commitc585541e671bc76d60163356c435bfb32ef858b4 (patch)
treec61d7cd25dc5719844b1876ab198a0beed53bb09 /library/alloc/src/vec
parenteb1a5c9bb3df2ce5f003d45f835da0d61d8742cd (diff)
downloadrust-c585541e671bc76d60163356c435bfb32ef858b4.tar.gz
rust-c585541e671bc76d60163356c435bfb32ef858b4.zip
optimize in-place collection of `Vec`
LLVM does not know that the multiplication never overflows, which causes
it to generate unnecessary instructions. Use `usize::unchecked_mul`, so
that it can fold the `dst_cap` calculation when `size_of::<I::SRC>() ==
size_of::<T>()`.

Running:

```
rustc -C llvm-args=-x86-asm-syntax=intel -O src/lib.rs --emit asm`
```

```rust

pub struct Foo([usize; 3]);

pub fn unwrap_copy(v: Vec<Foo>) -> Vec<[usize; 3]> {
    v.into_iter().map(|f| f.0).collect()
}
```

Before this commit:

```
define void @unwrap_copy(ptr noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
  %me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
  %me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
  %me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
  %me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
  %me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
  %_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
  %0 = udiv i64 %_19.i.idx, 24
  %_16.i.i = mul i64 %me.sroa.0.0.copyload.i, 24
  %dst_cap.i.i = udiv i64 %_16.i.i, 24
  store i64 %dst_cap.i.i, ptr %_0, align 8
  %1 = getelementptr inbounds i8, ptr %_0, i64 8
  store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
  %2 = getelementptr inbounds i8, ptr %_0, i64 16
  store i64 %0, ptr %2, align 8
  ret void
}
```

After:

```
define void @unwrap_copy(ptr noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
  %me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
  %me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
  %me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
  %me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
  %me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
  %_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
  %0 = udiv i64 %_19.i.idx, 24
  store i64 %me.sroa.0.0.copyload.i, ptr %_0, align 8
  %1 = getelementptr inbounds i8, ptr %_0, i64 8
  store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
  %2 = getelementptr inbounds i8, ptr %_0, i64 16
  store i64 %0, ptr %2, align 8, !alias.scope !9, !noalias !14
  ret void
}
```

Note that there is still one more `mul,udiv` pair that I couldn't get
rid of. The root cause is the same issue as #121239, the `nuw` gets
stripped off of `ptr::sub_ptr`.
Diffstat (limited to 'library/alloc/src/vec')
-rw-r--r--library/alloc/src/vec/in_place_collect.rs9
1 files changed, 6 insertions, 3 deletions
diff --git a/library/alloc/src/vec/in_place_collect.rs b/library/alloc/src/vec/in_place_collect.rs
index 88aa1b1b0e0..5b0fde93b91 100644
--- a/library/alloc/src/vec/in_place_collect.rs
+++ b/library/alloc/src/vec/in_place_collect.rs
@@ -259,7 +259,8 @@ where
             inner.cap,
             inner.buf.cast::<T>(),
             inner.end as *const T,
-            inner.cap * mem::size_of::<I::Src>() / mem::size_of::<T>(),
+            // SAFETY: the multiplication can not overflow, since `inner.cap * size_of::<I::SRC>()` is the size of the allocation.
+            inner.cap.unchecked_mul(mem::size_of::<I::Src>()) / mem::size_of::<T>(),
         )
     };
 
@@ -373,8 +374,10 @@ where
         // - unlike most internal iteration methods, it only takes a &mut self
         // - it lets us thread the write pointer through its innards and get it back in the end
         let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf };
-        let sink =
-            self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).unwrap();
+        let sink = match self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)) {
+            Ok(sink) => sink,
+            Err(never) => match never {},
+        };
         // iteration succeeded, don't drop head
         unsafe { ManuallyDrop::new(sink).dst.sub_ptr(dst_buf) }
     }