about summary refs log tree commit diff
path: root/src/liballoc
diff options
context:
space:
mode:
authorDylan DPC <dylan.dpc@gmail.com>2020-05-06 13:22:05 +0200
committerGitHub <noreply@github.com>2020-05-06 13:22:05 +0200
commit78a25cb10e28207398b16fecbcd92accb6a65e9e (patch)
treec3f12f254a2ad16a8dec4871970276c9a5986ca1 /src/liballoc
parent8da5869fb738e51438dd1e0697c1b2c84eb11c59 (diff)
parent873022797ae7f09872738c7367d8d658a1a34ad5 (diff)
downloadrust-78a25cb10e28207398b16fecbcd92accb6a65e9e.tar.gz
rust-78a25cb10e28207398b16fecbcd92accb6a65e9e.zip
Rollup merge of #71510 - ssomers:btreemap_iter_intertwined, r=Mark-Simulacrum
Btreemap iter intertwined

3 commits:

1. Introduced benchmarks for `BTreeMap::iter()`. Benchmarks named `iter_20` were of the whole iteration process, so I renamed them. Also the benchmarks of `range` that I wrote earlier weren't very good. I included an (awkwardly named) one that compares `iter()` to `range(..)` on the same set, because the contrast is surprising:
```
 name                                           ns/iter
 btree::map::range_unbounded_unbounded          28,176
 btree::map::range_unbounded_vs_iter            89,369
```
Both dig up the same pair of leaf edges. `range(..)` also checks that some keys are correctly ordered, the only thing `iter()` does more is to copy the map's length.

2. Slightly refactoring the code to what I find more readable (not in chronological order of discovery), boosts performance:
```
>cargo-benchcmp.exe benchcmp a1 a2 --threshold 5
 name                                   a1 ns/iter  a2 ns/iter  diff ns/iter   diff %  speedup
 btree::map::find_rand_100              18          17                    -1   -5.56%   x 1.06
 btree::map::first_and_last_10k         64          71                     7   10.94%   x 0.90
 btree::map::iter_0                     2,939       2,209               -730  -24.84%   x 1.33
 btree::map::iter_1                     6,845       2,696             -4,149  -60.61%   x 2.54
 btree::map::iter_100                   8,556       3,672             -4,884  -57.08%   x 2.33
 btree::map::iter_10k                   9,292       5,884             -3,408  -36.68%   x 1.58
 btree::map::iter_1m                    10,268      6,510             -3,758  -36.60%   x 1.58
 btree::map::iteration_mut_100000       478,575     453,050          -25,525   -5.33%   x 1.06
 btree::map::range_unbounded_unbounded  28,176      36,169             7,993   28.37%   x 0.78
 btree::map::range_unbounded_vs_iter    89,369      38,290           -51,079  -57.16%   x 2.33
 btree::set::clone_100_and_remove_all   4,801       4,245               -556  -11.58%   x 1.13
 btree::set::clone_10k_and_remove_all   529,450     496,030          -33,420   -6.31%   x 1.07
```
But you can tell from the `range_unbounded_*` lines that, despite an unwarranted, vengeful attack on the range_unbounded_unbounded benchmark, this change still doesn't allow `iter()` to catch up with `range(..)`.

3. I guess that `range(..)` copes so well because it intertwines the leftmost and rightmost descend towards leaf edges, doing the two root node accesses close together, perhaps exploiting a CPU's internal pipelining? So the third commit distils a version of `range_search` (which we can't use directly because of the `Ord` bound), and we get another boost:
```
cargo-benchcmp.exe benchcmp a2 a3 --threshold 5
 name                                   a2 ns/iter  a3 ns/iter  diff ns/iter   diff %  speedup
 btree::map::first_and_last_100         40          43                     3    7.50%   x 0.93
 btree::map::first_and_last_10k         71          64                    -7   -9.86%   x 1.11
 btree::map::iter_0                     2,209       1,719               -490  -22.18%   x 1.29
 btree::map::iter_1                     2,696       2,205               -491  -18.21%   x 1.22
 btree::map::iter_100                   3,672       2,943               -729  -19.85%   x 1.25
 btree::map::iter_10k                   5,884       3,929             -1,955  -33.23%   x 1.50
 btree::map::iter_1m                    6,510       5,532               -978  -15.02%   x 1.18
 btree::map::iteration_mut_100000       453,050     476,667           23,617    5.21%   x 0.95
 btree::map::range_included_excluded    405,075     371,297          -33,778   -8.34%   x 1.09
 btree::map::range_included_included    427,577     397,440          -30,137   -7.05%   x 1.08
 btree::map::range_unbounded_unbounded  36,169      28,175            -7,994  -22.10%   x 1.28
 btree::map::range_unbounded_vs_iter    38,290      30,838            -7,452  -19.46%   x 1.24
```
But I think this is just fake news from the microbenchmarking media. `iter()` is still trying to catch up with `range(..)`. And we can sure do without another function. So I would skip this 3rd commit.

r? @Mark-Simulacrum
Diffstat (limited to 'src/liballoc')
-rw-r--r--src/liballoc/benches/btree/map.rs118
-rw-r--r--src/liballoc/collections/btree/map.rs76
2 files changed, 119 insertions, 75 deletions
diff --git a/src/liballoc/benches/btree/map.rs b/src/liballoc/benches/btree/map.rs
index 83cdebf0e3f..38d19c59ad1 100644
--- a/src/liballoc/benches/btree/map.rs
+++ b/src/liballoc/benches/btree/map.rs
@@ -1,6 +1,6 @@
 use std::collections::BTreeMap;
 use std::iter::Iterator;
-use std::ops::Bound::{Excluded, Unbounded};
+use std::ops::RangeBounds;
 use std::vec::Vec;
 
 use rand::{seq::SliceRandom, thread_rng, Rng};
@@ -117,7 +117,7 @@ map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap}
 map_find_seq_bench! {find_seq_100,    100,    BTreeMap}
 map_find_seq_bench! {find_seq_10_000, 10_000, BTreeMap}
 
-fn bench_iter(b: &mut Bencher, size: i32) {
+fn bench_iteration(b: &mut Bencher, size: i32) {
     let mut map = BTreeMap::<i32, i32>::new();
     let mut rng = thread_rng();
 
@@ -133,21 +133,21 @@ fn bench_iter(b: &mut Bencher, size: i32) {
 }
 
 #[bench]
-pub fn iter_20(b: &mut Bencher) {
-    bench_iter(b, 20);
+pub fn iteration_20(b: &mut Bencher) {
+    bench_iteration(b, 20);
 }
 
 #[bench]
-pub fn iter_1000(b: &mut Bencher) {
-    bench_iter(b, 1000);
+pub fn iteration_1000(b: &mut Bencher) {
+    bench_iteration(b, 1000);
 }
 
 #[bench]
-pub fn iter_100000(b: &mut Bencher) {
-    bench_iter(b, 100000);
+pub fn iteration_100000(b: &mut Bencher) {
+    bench_iteration(b, 100000);
 }
 
-fn bench_iter_mut(b: &mut Bencher, size: i32) {
+fn bench_iteration_mut(b: &mut Bencher, size: i32) {
     let mut map = BTreeMap::<i32, i32>::new();
     let mut rng = thread_rng();
 
@@ -163,18 +163,18 @@ fn bench_iter_mut(b: &mut Bencher, size: i32) {
 }
 
 #[bench]
-pub fn iter_mut_20(b: &mut Bencher) {
-    bench_iter_mut(b, 20);
+pub fn iteration_mut_20(b: &mut Bencher) {
+    bench_iteration_mut(b, 20);
 }
 
 #[bench]
-pub fn iter_mut_1000(b: &mut Bencher) {
-    bench_iter_mut(b, 1000);
+pub fn iteration_mut_1000(b: &mut Bencher) {
+    bench_iteration_mut(b, 1000);
 }
 
 #[bench]
-pub fn iter_mut_100000(b: &mut Bencher) {
-    bench_iter_mut(b, 100000);
+pub fn iteration_mut_100000(b: &mut Bencher) {
+    bench_iteration_mut(b, 100000);
 }
 
 fn bench_first_and_last(b: &mut Bencher, size: i32) {
@@ -202,57 +202,83 @@ pub fn first_and_last_10k(b: &mut Bencher) {
     bench_first_and_last(b, 10_000);
 }
 
-#[bench]
-pub fn range_excluded_excluded(b: &mut Bencher) {
-    let size = 144;
-    let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
+const BENCH_RANGE_SIZE: i32 = 145;
+const BENCH_RANGE_COUNT: i32 = BENCH_RANGE_SIZE * (BENCH_RANGE_SIZE - 1) / 2;
+
+fn bench_range<F, R>(b: &mut Bencher, f: F)
+where
+    F: Fn(i32, i32) -> R,
+    R: RangeBounds<i32>,
+{
+    let map: BTreeMap<_, _> = (0..BENCH_RANGE_SIZE).map(|i| (i, i)).collect();
     b.iter(|| {
-        for first in 0..size {
-            for last in first + 1..size {
-                black_box(map.range((Excluded(first), Excluded(last))));
+        let mut c = 0;
+        for i in 0..BENCH_RANGE_SIZE {
+            for j in i + 1..BENCH_RANGE_SIZE {
+                black_box(map.range(f(i, j)));
+                c += 1;
             }
         }
+        debug_assert_eq!(c, BENCH_RANGE_COUNT);
     });
 }
 
 #[bench]
-pub fn range_excluded_unbounded(b: &mut Bencher) {
-    let size = 144;
-    let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
-    b.iter(|| {
-        for first in 0..size {
-            black_box(map.range((Excluded(first), Unbounded)));
-        }
-    });
+pub fn range_included_excluded(b: &mut Bencher) {
+    bench_range(b, |i, j| i..j);
 }
 
 #[bench]
 pub fn range_included_included(b: &mut Bencher) {
-    let size = 144;
-    let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
-    b.iter(|| {
-        for first in 0..size {
-            for last in first..size {
-                black_box(map.range(first..=last));
-            }
-        }
-    });
+    bench_range(b, |i, j| i..=j);
 }
 
 #[bench]
 pub fn range_included_unbounded(b: &mut Bencher) {
-    let size = 144;
+    bench_range(b, |i, _| i..);
+}
+
+#[bench]
+pub fn range_unbounded_unbounded(b: &mut Bencher) {
+    bench_range(b, |_, _| ..);
+}
+
+fn bench_iter(b: &mut Bencher, repeats: i32, size: i32) {
     let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
     b.iter(|| {
-        for first in 0..size {
-            black_box(map.range(first..));
+        for _ in 0..repeats {
+            black_box(map.iter());
         }
     });
 }
 
+/// Contrast range_unbounded_unbounded with `iter()`.
 #[bench]
-pub fn range_unbounded_unbounded(b: &mut Bencher) {
-    let size = 144;
-    let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
-    b.iter(|| map.range(..));
+pub fn range_unbounded_vs_iter(b: &mut Bencher) {
+    bench_iter(b, BENCH_RANGE_COUNT, BENCH_RANGE_SIZE);
+}
+
+#[bench]
+pub fn iter_0(b: &mut Bencher) {
+    bench_iter(b, 1_000, 0);
+}
+
+#[bench]
+pub fn iter_1(b: &mut Bencher) {
+    bench_iter(b, 1_000, 1);
+}
+
+#[bench]
+pub fn iter_100(b: &mut Bencher) {
+    bench_iter(b, 1_000, 100);
+}
+
+#[bench]
+pub fn iter_10k(b: &mut Bencher) {
+    bench_iter(b, 1_000, 10_000);
+}
+
+#[bench]
+pub fn iter_1m(b: &mut Bencher) {
+    bench_iter(b, 1_000, 1_000_000);
 }
diff --git a/src/liballoc/collections/btree/map.rs b/src/liballoc/collections/btree/map.rs
index b8f1a4199c6..98a94d695f7 100644
--- a/src/liballoc/collections/btree/map.rs
+++ b/src/liballoc/collections/btree/map.rs
@@ -1540,16 +1540,10 @@ impl<K, V> IntoIterator for BTreeMap<K, V> {
 
     fn into_iter(self) -> IntoIter<K, V> {
         let mut me = ManuallyDrop::new(self);
-        if let Some(root) = me.root.as_mut() {
-            let root1 = unsafe { ptr::read(root).into_ref() };
-            let root2 = unsafe { ptr::read(root).into_ref() };
-            let len = me.length;
-
-            IntoIter {
-                front: Some(root1.first_leaf_edge()),
-                back: Some(root2.last_leaf_edge()),
-                length: len,
-            }
+        if let Some(root) = me.root.take() {
+            let (f, b) = full_range_search(root.into_ref());
+
+            IntoIter { front: Some(f), back: Some(b), length: me.length }
         } else {
             IntoIter { front: None, back: None, length: 0 }
         }
@@ -2037,6 +2031,7 @@ where
     }
 }
 
+/// Finds the leaf edges delimiting a specified range in or underneath a node.
 fn range_search<BorrowType, K, V, Q: ?Sized, R: RangeBounds<Q>>(
     root: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
     range: R,
@@ -2122,6 +2117,33 @@ where
     }
 }
 
+/// Equivalent to `range_search(k, v, ..)` without the `Ord` bound.
+fn full_range_search<BorrowType, K, V>(
+    root: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
+) -> (
+    Handle<NodeRef<BorrowType, K, V, marker::Leaf>, marker::Edge>,
+    Handle<NodeRef<BorrowType, K, V, marker::Leaf>, marker::Edge>,
+) {
+    // We duplicate the root NodeRef here -- we will never access it in a way
+    // that overlaps references obtained from the root.
+    let mut min_node = unsafe { ptr::read(&root) };
+    let mut max_node = root;
+    loop {
+        let front = min_node.first_edge();
+        let back = max_node.last_edge();
+        match (front.force(), back.force()) {
+            (Leaf(f), Leaf(b)) => {
+                return (f, b);
+            }
+            (Internal(min_int), Internal(max_int)) => {
+                min_node = min_int.descend();
+                max_node = max_int.descend();
+            }
+            _ => unreachable!("BTreeMap has different depths"),
+        };
+    }
+}
+
 impl<K, V> BTreeMap<K, V> {
     /// Gets an iterator over the entries of the map, sorted by key.
     ///
@@ -2146,12 +2168,12 @@ impl<K, V> BTreeMap<K, V> {
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn iter(&self) -> Iter<'_, K, V> {
-        Iter {
-            range: Range {
-                front: self.root.as_ref().map(|r| r.as_ref().first_leaf_edge()),
-                back: self.root.as_ref().map(|r| r.as_ref().last_leaf_edge()),
-            },
-            length: self.length,
+        if let Some(root) = &self.root {
+            let (f, b) = full_range_search(root.as_ref());
+
+            Iter { range: Range { front: Some(f), back: Some(b) }, length: self.length }
+        } else {
+            Iter { range: Range { front: None, back: None }, length: 0 }
         }
     }
 
@@ -2178,19 +2200,15 @@ impl<K, V> BTreeMap<K, V> {
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn iter_mut(&mut self) -> IterMut<'_, K, V> {
-        IterMut {
-            range: if let Some(root) = &mut self.root {
-                let root1 = root.as_mut();
-                let root2 = unsafe { ptr::read(&root1) };
-                RangeMut {
-                    front: Some(root1.first_leaf_edge()),
-                    back: Some(root2.last_leaf_edge()),
-                    _marker: PhantomData,
-                }
-            } else {
-                RangeMut { front: None, back: None, _marker: PhantomData }
-            },
-            length: self.length,
+        if let Some(root) = &mut self.root {
+            let (f, b) = full_range_search(root.as_mut());
+
+            IterMut {
+                range: RangeMut { front: Some(f), back: Some(b), _marker: PhantomData },
+                length: self.length,
+            }
+        } else {
+            IterMut { range: RangeMut { front: None, back: None, _marker: PhantomData }, length: 0 }
         }
     }