diff options
| author | bors <bors@rust-lang.org> | 2014-04-16 03:36:27 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2014-04-16 03:36:27 -0700 |
| commit | f39ba69aaa0b50d7aaa130306f3f12d89583d501 (patch) | |
| tree | 3a2ed31ecb5ac65b25704fa4a815e506762ea5a6 /src/libstd | |
| parent | 61f788c772a1e0cefb19c54f12fbf42a65464334 (diff) | |
| parent | be334d582435a05ea56c1ca7fcb2e512cfc51f24 (diff) | |
| download | rust-f39ba69aaa0b50d7aaa130306f3f12d89583d501.tar.gz rust-f39ba69aaa0b50d7aaa130306f3f12d89583d501.zip | |
auto merge of #13539 : Aatch/rust/vector-copy-faster, r=thestinger
LLVM wasn't recognising the loops as memcpy loops and was therefore failing to optimise them properly. While improving LLVM is the "proper" way to fix this, I think that these cases are important enough to warrant a little low-level optimisation. Fixes #13472 r? @thestinger --- Benchmark Results: ``` --- Before --- test clone_owned ... bench: 6126104 ns/iter (+/- 285962) = 170 MB/s test clone_owned_to_owned ... bench: 6125054 ns/iter (+/- 271197) = 170 MB/s test clone_str ... bench: 80586 ns/iter (+/- 11489) = 13011 MB/s test clone_vec ... bench: 3903220 ns/iter (+/- 658556) = 268 MB/s test test_memcpy ... bench: 69401 ns/iter (+/- 2168) = 15108 MB/s --- After --- test clone_owned ... bench: 70839 ns/iter (+/- 4931) = 14801 MB/s test clone_owned_to_owned ... bench: 70286 ns/iter (+/- 4836) = 14918 MB/s test clone_str ... bench: 78519 ns/iter (+/- 5511) = 13353 MB/s test clone_vec ... bench: 71415 ns/iter (+/- 1999) = 14682 MB/s test test_memcpy ... bench: 70980 ns/iter (+/- 2126) = 14772 MB/s ```
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/slice.rs | 25 | ||||
| -rw-r--r-- | src/libstd/vec.rs | 18 |
2 files changed, 38 insertions, 5 deletions
diff --git a/src/libstd/slice.rs b/src/libstd/slice.rs index aedaa5b3c15..153e21c780c 100644 --- a/src/libstd/slice.rs +++ b/src/libstd/slice.rs @@ -760,9 +760,25 @@ impl<'a, T: Clone> CloneableVector<T> for &'a [T] { /// Returns a copy of `v`. #[inline] fn to_owned(&self) -> ~[T] { - let mut result = with_capacity(self.len()); - for e in self.iter() { - result.push((*e).clone()); + let len = self.len(); + let mut result = with_capacity(len); + // Unsafe code so this can be optimised to a memcpy (or something + // similarly fast) when T is Copy. LLVM is easily confused, so any + // extra operations during the loop can prevent this optimisation + unsafe { + let mut i = 0; + let p = result.as_mut_ptr(); + // Use try_finally here otherwise the write to length + // inside the loop stops LLVM from optimising this. + try_finally( + &mut i, (), + |i, ()| while *i < len { + mem::move_val_init( + &mut(*p.offset(*i as int)), + self.unsafe_ref(*i).clone()); + *i += 1; + }, + |i| result.set_len(*i)); } result } @@ -2584,7 +2600,8 @@ pub mod bytes { impl<A: Clone> Clone for ~[A] { #[inline] fn clone(&self) -> ~[A] { - self.iter().map(|item| item.clone()).collect() + // Use the fast to_owned on &[A] for cloning + self.as_slice().to_owned() } fn clone_from(&mut self, source: &~[A]) { diff --git a/src/libstd/vec.rs b/src/libstd/vec.rs index a69120de00f..96cbac8869e 100644 --- a/src/libstd/vec.rs +++ b/src/libstd/vec.rs @@ -311,7 +311,23 @@ impl<T: Clone> Vec<T> { impl<T:Clone> Clone for Vec<T> { fn clone(&self) -> Vec<T> { - self.iter().map(|x| x.clone()).collect() + let len = self.len; + let mut vector = Vec::with_capacity(len); + // Unsafe code so this can be optimised to a memcpy (or something + // similarly fast) when T is Copy. LLVM is easily confused, so any + // extra operations during the loop can prevent this optimisation + { + let this_slice = self.as_slice(); + while vector.len < len { + unsafe { + mem::move_val_init( + vector.as_mut_slice().unsafe_mut_ref(vector.len), + this_slice.unsafe_ref(vector.len).clone()); + } + vector.len += 1; + } + } + vector } fn clone_from(&mut self, other: &Vec<T>) { |
