about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-04-16 03:36:27 -0700
committerbors <bors@rust-lang.org>2014-04-16 03:36:27 -0700
commitf39ba69aaa0b50d7aaa130306f3f12d89583d501 (patch)
tree3a2ed31ecb5ac65b25704fa4a815e506762ea5a6 /src/libstd
parent61f788c772a1e0cefb19c54f12fbf42a65464334 (diff)
parentbe334d582435a05ea56c1ca7fcb2e512cfc51f24 (diff)
downloadrust-f39ba69aaa0b50d7aaa130306f3f12d89583d501.tar.gz
rust-f39ba69aaa0b50d7aaa130306f3f12d89583d501.zip
auto merge of #13539 : Aatch/rust/vector-copy-faster, r=thestinger
LLVM wasn't recognising the loops as memcpy loops and was therefore failing to optimise them properly. While improving LLVM is the "proper" way to fix this, I think that these cases are important enough to warrant a little low-level optimisation.

Fixes #13472 

r? @thestinger 

---

Benchmark Results:

```
--- Before ---
test clone_owned          ... bench:   6126104 ns/iter (+/- 285962) = 170 MB/s
test clone_owned_to_owned ... bench:   6125054 ns/iter (+/- 271197) = 170 MB/s
test clone_str            ... bench:     80586 ns/iter (+/- 11489) = 13011 MB/s
test clone_vec            ... bench:   3903220 ns/iter (+/- 658556) = 268 MB/s
test test_memcpy          ... bench:     69401 ns/iter (+/- 2168) = 15108 MB/s

--- After ---
test clone_owned          ... bench:     70839 ns/iter (+/- 4931) = 14801 MB/s
test clone_owned_to_owned ... bench:     70286 ns/iter (+/- 4836) = 14918 MB/s
test clone_str            ... bench:     78519 ns/iter (+/- 5511) = 13353 MB/s
test clone_vec            ... bench:     71415 ns/iter (+/- 1999) = 14682 MB/s
test test_memcpy          ... bench:     70980 ns/iter (+/- 2126) = 14772 MB/s
```
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/slice.rs25
-rw-r--r--src/libstd/vec.rs18
2 files changed, 38 insertions, 5 deletions
diff --git a/src/libstd/slice.rs b/src/libstd/slice.rs
index aedaa5b3c15..153e21c780c 100644
--- a/src/libstd/slice.rs
+++ b/src/libstd/slice.rs
@@ -760,9 +760,25 @@ impl<'a, T: Clone> CloneableVector<T> for &'a [T] {
     /// Returns a copy of `v`.
     #[inline]
     fn to_owned(&self) -> ~[T] {
-        let mut result = with_capacity(self.len());
-        for e in self.iter() {
-            result.push((*e).clone());
+        let len = self.len();
+        let mut result = with_capacity(len);
+        // Unsafe code so this can be optimised to a memcpy (or something
+        // similarly fast) when T is Copy. LLVM is easily confused, so any
+        // extra operations during the loop can prevent this optimisation
+        unsafe {
+            let mut i = 0;
+            let p = result.as_mut_ptr();
+            // Use try_finally here otherwise the write to length
+            // inside the loop stops LLVM from optimising this.
+            try_finally(
+                &mut i, (),
+                |i, ()| while *i < len {
+                    mem::move_val_init(
+                        &mut(*p.offset(*i as int)),
+                        self.unsafe_ref(*i).clone());
+                    *i += 1;
+                },
+                |i| result.set_len(*i));
         }
         result
     }
@@ -2584,7 +2600,8 @@ pub mod bytes {
 impl<A: Clone> Clone for ~[A] {
     #[inline]
     fn clone(&self) -> ~[A] {
-        self.iter().map(|item| item.clone()).collect()
+        // Use the fast to_owned on &[A] for cloning
+        self.as_slice().to_owned()
     }
 
     fn clone_from(&mut self, source: &~[A]) {
diff --git a/src/libstd/vec.rs b/src/libstd/vec.rs
index a69120de00f..96cbac8869e 100644
--- a/src/libstd/vec.rs
+++ b/src/libstd/vec.rs
@@ -311,7 +311,23 @@ impl<T: Clone> Vec<T> {
 
 impl<T:Clone> Clone for Vec<T> {
     fn clone(&self) -> Vec<T> {
-        self.iter().map(|x| x.clone()).collect()
+        let len = self.len;
+        let mut vector = Vec::with_capacity(len);
+        // Unsafe code so this can be optimised to a memcpy (or something
+        // similarly fast) when T is Copy. LLVM is easily confused, so any
+        // extra operations during the loop can prevent this optimisation
+        {
+            let this_slice = self.as_slice();
+            while vector.len < len {
+                unsafe {
+                    mem::move_val_init(
+                        vector.as_mut_slice().unsafe_mut_ref(vector.len),
+                        this_slice.unsafe_ref(vector.len).clone());
+                }
+                vector.len += 1;
+            }
+        }
+        vector
     }
 
     fn clone_from(&mut self, other: &Vec<T>) {