about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2016-10-13 19:56:53 -0700
committerGitHub <noreply@github.com>2016-10-13 19:56:53 -0700
commit17af6b94b2ea39414e4449c17cc4ab066d790e8f (patch)
tree3404cbcba80552c31b8104b7888941b497cab05c
parent098d22845933814a92497cbfaa8eb4a9a36b117f (diff)
parent401f1c45db5343dc0189b4f89d4160bba24facfb (diff)
downloadrust-17af6b94b2ea39414e4449c17cc4ab066d790e8f.tar.gz
rust-17af6b94b2ea39414e4449c17cc4ab066d790e8f.zip
Auto merge of #36743 - SimonSapin:dedup-by, r=alexcrichton
Add Vec::dedup_by and Vec::dedup_by_key
-rw-r--r--src/libcollections/vec.rs209
-rw-r--r--src/libcollectionstest/lib.rs1
-rw-r--r--src/libcollectionstest/slice.rs41
-rw-r--r--src/libcollectionstest/vec.rs55
4 files changed, 182 insertions, 124 deletions
diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs
index 54fd19dbe30..2bb311cc5ae 100644
--- a/src/libcollections/vec.rs
+++ b/src/libcollections/vec.rs
@@ -781,6 +781,130 @@ impl<T> Vec<T> {
         }
     }
 
+    /// Removes consecutive elements in the vector that resolve to the same key.
+    ///
+    /// If the vector is sorted, this removes all duplicates.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(dedup_by)]
+    ///
+    /// let mut vec = vec![10, 20, 21, 30, 20];
+    ///
+    /// vec.dedup_by_key(|i| *i / 10);
+    ///
+    /// assert_eq!(vec, [10, 20, 30, 20]);
+    /// ```
+    #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")]
+    #[inline]
+    pub fn dedup_by_key<F, K>(&mut self, mut key: F) where F: FnMut(&mut T) -> K, K: PartialEq {
+        self.dedup_by(|a, b| key(a) == key(b))
+    }
+
+    /// Removes consecutive elements in the vector that resolve to the same key.
+    ///
+    /// If the vector is sorted, this removes all duplicates.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(dedup_by)]
+    /// use std::ascii::AsciiExt;
+    ///
+    /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"];
+    ///
+    /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b));
+    ///
+    /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
+    /// ```
+    #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")]
+    pub fn dedup_by<F>(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool {
+        unsafe {
+            // Although we have a mutable reference to `self`, we cannot make
+            // *arbitrary* changes. The `PartialEq` comparisons could panic, so we
+            // must ensure that the vector is in a valid state at all time.
+            //
+            // The way that we handle this is by using swaps; we iterate
+            // over all the elements, swapping as we go so that at the end
+            // the elements we wish to keep are in the front, and those we
+            // wish to reject are at the back. We can then truncate the
+            // vector. This operation is still O(n).
+            //
+            // Example: We start in this state, where `r` represents "next
+            // read" and `w` represents "next_write`.
+            //
+            //           r
+            //     +---+---+---+---+---+---+
+            //     | 0 | 1 | 1 | 2 | 3 | 3 |
+            //     +---+---+---+---+---+---+
+            //           w
+            //
+            // Comparing self[r] against self[w-1], this is not a duplicate, so
+            // we swap self[r] and self[w] (no effect as r==w) and then increment both
+            // r and w, leaving us with:
+            //
+            //               r
+            //     +---+---+---+---+---+---+
+            //     | 0 | 1 | 1 | 2 | 3 | 3 |
+            //     +---+---+---+---+---+---+
+            //               w
+            //
+            // Comparing self[r] against self[w-1], this value is a duplicate,
+            // so we increment `r` but leave everything else unchanged:
+            //
+            //                   r
+            //     +---+---+---+---+---+---+
+            //     | 0 | 1 | 1 | 2 | 3 | 3 |
+            //     +---+---+---+---+---+---+
+            //               w
+            //
+            // Comparing self[r] against self[w-1], this is not a duplicate,
+            // so swap self[r] and self[w] and advance r and w:
+            //
+            //                       r
+            //     +---+---+---+---+---+---+
+            //     | 0 | 1 | 2 | 1 | 3 | 3 |
+            //     +---+---+---+---+---+---+
+            //                   w
+            //
+            // Not a duplicate, repeat:
+            //
+            //                           r
+            //     +---+---+---+---+---+---+
+            //     | 0 | 1 | 2 | 3 | 1 | 3 |
+            //     +---+---+---+---+---+---+
+            //                       w
+            //
+            // Duplicate, advance r. End of vec. Truncate to w.
+
+            let ln = self.len();
+            if ln <= 1 {
+                return;
+            }
+
+            // Avoid bounds checks by using raw pointers.
+            let p = self.as_mut_ptr();
+            let mut r: usize = 1;
+            let mut w: usize = 1;
+
+            while r < ln {
+                let p_r = p.offset(r as isize);
+                let p_wm1 = p.offset((w - 1) as isize);
+                if !same_bucket(&mut *p_r, &mut *p_wm1) {
+                    if r != w {
+                        let p_w = p_wm1.offset(1);
+                        mem::swap(&mut *p_r, &mut *p_w);
+                    }
+                    w += 1;
+                }
+                r += 1;
+            }
+
+            self.truncate(w);
+        }
+    }
+
     /// Appends an element to the back of a collection.
     ///
     /// # Panics
@@ -1155,90 +1279,9 @@ impl<T: PartialEq> Vec<T> {
     /// assert_eq!(vec, [1, 2, 3, 2]);
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
+    #[inline]
     pub fn dedup(&mut self) {
-        unsafe {
-            // Although we have a mutable reference to `self`, we cannot make
-            // *arbitrary* changes. The `PartialEq` comparisons could panic, so we
-            // must ensure that the vector is in a valid state at all time.
-            //
-            // The way that we handle this is by using swaps; we iterate
-            // over all the elements, swapping as we go so that at the end
-            // the elements we wish to keep are in the front, and those we
-            // wish to reject are at the back. We can then truncate the
-            // vector. This operation is still O(n).
-            //
-            // Example: We start in this state, where `r` represents "next
-            // read" and `w` represents "next_write`.
-            //
-            //           r
-            //     +---+---+---+---+---+---+
-            //     | 0 | 1 | 1 | 2 | 3 | 3 |
-            //     +---+---+---+---+---+---+
-            //           w
-            //
-            // Comparing self[r] against self[w-1], this is not a duplicate, so
-            // we swap self[r] and self[w] (no effect as r==w) and then increment both
-            // r and w, leaving us with:
-            //
-            //               r
-            //     +---+---+---+---+---+---+
-            //     | 0 | 1 | 1 | 2 | 3 | 3 |
-            //     +---+---+---+---+---+---+
-            //               w
-            //
-            // Comparing self[r] against self[w-1], this value is a duplicate,
-            // so we increment `r` but leave everything else unchanged:
-            //
-            //                   r
-            //     +---+---+---+---+---+---+
-            //     | 0 | 1 | 1 | 2 | 3 | 3 |
-            //     +---+---+---+---+---+---+
-            //               w
-            //
-            // Comparing self[r] against self[w-1], this is not a duplicate,
-            // so swap self[r] and self[w] and advance r and w:
-            //
-            //                       r
-            //     +---+---+---+---+---+---+
-            //     | 0 | 1 | 2 | 1 | 3 | 3 |
-            //     +---+---+---+---+---+---+
-            //                   w
-            //
-            // Not a duplicate, repeat:
-            //
-            //                           r
-            //     +---+---+---+---+---+---+
-            //     | 0 | 1 | 2 | 3 | 1 | 3 |
-            //     +---+---+---+---+---+---+
-            //                       w
-            //
-            // Duplicate, advance r. End of vec. Truncate to w.
-
-            let ln = self.len();
-            if ln <= 1 {
-                return;
-            }
-
-            // Avoid bounds checks by using raw pointers.
-            let p = self.as_mut_ptr();
-            let mut r: usize = 1;
-            let mut w: usize = 1;
-
-            while r < ln {
-                let p_r = p.offset(r as isize);
-                let p_wm1 = p.offset((w - 1) as isize);
-                if *p_r != *p_wm1 {
-                    if r != w {
-                        let p_w = p_wm1.offset(1);
-                        mem::swap(&mut *p_r, &mut *p_w);
-                    }
-                    w += 1;
-                }
-                r += 1;
-            }
-
-            self.truncate(w);
-        }
+        self.dedup_by(|a, b| a == b)
     }
 }
 
diff --git a/src/libcollectionstest/lib.rs b/src/libcollectionstest/lib.rs
index c2b34647c32..88fb6540a9a 100644
--- a/src/libcollectionstest/lib.rs
+++ b/src/libcollectionstest/lib.rs
@@ -16,6 +16,7 @@
 #![feature(collections)]
 #![feature(collections_bound)]
 #![feature(const_fn)]
+#![feature(dedup_by)]
 #![feature(enumset)]
 #![feature(pattern)]
 #![feature(rand)]
diff --git a/src/libcollectionstest/slice.rs b/src/libcollectionstest/slice.rs
index 9580714075a..130c16d7c15 100644
--- a/src/libcollectionstest/slice.rs
+++ b/src/libcollectionstest/slice.rs
@@ -316,47 +316,6 @@ fn test_clear() {
 }
 
 #[test]
-fn test_dedup() {
-    fn case(a: Vec<i32>, b: Vec<i32>) {
-        let mut v = a;
-        v.dedup();
-        assert_eq!(v, b);
-    }
-    case(vec![], vec![]);
-    case(vec![1], vec![1]);
-    case(vec![1, 1], vec![1]);
-    case(vec![1, 2, 3], vec![1, 2, 3]);
-    case(vec![1, 1, 2, 3], vec![1, 2, 3]);
-    case(vec![1, 2, 2, 3], vec![1, 2, 3]);
-    case(vec![1, 2, 3, 3], vec![1, 2, 3]);
-    case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]);
-}
-
-#[test]
-fn test_dedup_unique() {
-    let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
-    v0.dedup();
-    let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
-    v1.dedup();
-    let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
-    v2.dedup();
-    // If the boxed pointers were leaked or otherwise misused, valgrind
-    // and/or rt should raise errors.
-}
-
-#[test]
-fn test_dedup_shared() {
-    let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
-    v0.dedup();
-    let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
-    v1.dedup();
-    let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
-    v2.dedup();
-    // If the pointers were leaked or otherwise misused, valgrind and/or
-    // rt should raise errors.
-}
-
-#[test]
 fn test_retain() {
     let mut v = vec![1, 2, 3, 4, 5];
     v.retain(is_odd);
diff --git a/src/libcollectionstest/vec.rs b/src/libcollectionstest/vec.rs
index ee2b898d5c2..8417be289eb 100644
--- a/src/libcollectionstest/vec.rs
+++ b/src/libcollectionstest/vec.rs
@@ -8,6 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+use std::ascii::AsciiExt;
 use std::borrow::Cow;
 use std::iter::{FromIterator, repeat};
 use std::mem::size_of;
@@ -214,6 +215,60 @@ fn test_retain() {
 }
 
 #[test]
+fn test_dedup() {
+    fn case(a: Vec<i32>, b: Vec<i32>) {
+        let mut v = a;
+        v.dedup();
+        assert_eq!(v, b);
+    }
+    case(vec![], vec![]);
+    case(vec![1], vec![1]);
+    case(vec![1, 1], vec![1]);
+    case(vec![1, 2, 3], vec![1, 2, 3]);
+    case(vec![1, 1, 2, 3], vec![1, 2, 3]);
+    case(vec![1, 2, 2, 3], vec![1, 2, 3]);
+    case(vec![1, 2, 3, 3], vec![1, 2, 3]);
+    case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]);
+}
+
+#[test]
+fn test_dedup_by_key() {
+    fn case(a: Vec<i32>, b: Vec<i32>) {
+        let mut v = a;
+        v.dedup_by_key(|i| *i / 10);
+        assert_eq!(v, b);
+    }
+    case(vec![], vec![]);
+    case(vec![10], vec![10]);
+    case(vec![10, 11], vec![10]);
+    case(vec![10, 20, 30], vec![10, 20, 30]);
+    case(vec![10, 11, 20, 30], vec![10, 20, 30]);
+    case(vec![10, 20, 21, 30], vec![10, 20, 30]);
+    case(vec![10, 20, 30, 31], vec![10, 20, 30]);
+    case(vec![10, 11, 20, 21, 22, 30, 31], vec![10, 20, 30]);
+}
+
+#[test]
+fn test_dedup_by() {
+    let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"];
+    vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b));
+
+    assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
+}
+
+#[test]
+fn test_dedup_unique() {
+    let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
+    v0.dedup();
+    let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
+    v1.dedup();
+    let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
+    v2.dedup();
+    // If the boxed pointers were leaked or otherwise misused, valgrind
+    // and/or rt should raise errors.
+}
+
+#[test]
 fn zero_sized_values() {
     let mut v = Vec::new();
     assert_eq!(v.len(), 0);