diff options
| author | bors <bors@rust-lang.org> | 2021-09-07 02:24:11 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-09-07 02:24:11 +0000 |
| commit | ffaf857045f4f4d8bb563e0a5077f9b065f42916 (patch) | |
| tree | c4ddda3dc84bfd079a01e805fe3b865a04698cab | |
| parent | 11bbb5231349a0a144d86d5c0c21061a06d1969d (diff) | |
| parent | a03287bbf765ce7ac0e2ae9e64d8ade168ece301 (diff) | |
| download | rust-ffaf857045f4f4d8bb563e0a5077f9b065f42916.tar.gz rust-ffaf857045f4f4d8bb563e0a5077f9b065f42916.zip | |
Auto merge of #88448 - xu-cheng:btree-blk-build, r=Mark-Simulacrum
BTreeMap/BTreeSet::from_iter: use bulk building to improve the performance Bulk building is a common technique to increase the performance of building a fresh btree map. Instead of inserting items one-by-one, we sort all the items beforehand then create the BtreeMap in bulk. Benchmark ``` ./x.py bench library/alloc --test-args btree::map::from_iter ``` * Before ``` test btree::map::from_iter_rand_100 ... bench: 3,694 ns/iter (+/- 840) test btree::map::from_iter_rand_10_000 ... bench: 1,033,446 ns/iter (+/- 192,950) test btree::map::from_iter_seq_100 ... bench: 5,689 ns/iter (+/- 1,259) test btree::map::from_iter_seq_10_000 ... bench: 861,033 ns/iter (+/- 118,815) ``` * After ``` test btree::map::from_iter_rand_100 ... bench: 3,033 ns/iter (+/- 707) test btree::map::from_iter_rand_10_000 ... bench: 775,958 ns/iter (+/- 105,152) test btree::map::from_iter_seq_100 ... bench: 2,969 ns/iter (+/- 336) test btree::map::from_iter_seq_10_000 ... bench: 258,292 ns/iter (+/- 29,364) ```
| -rw-r--r-- | library/alloc/benches/btree/map.rs | 50 | ||||
| -rw-r--r-- | library/alloc/src/collections/btree/dedup_sorted_iter.rs | 47 | ||||
| -rw-r--r-- | library/alloc/src/collections/btree/map.rs | 36 | ||||
| -rw-r--r-- | library/alloc/src/collections/btree/mod.rs | 1 | ||||
| -rw-r--r-- | library/alloc/src/collections/btree/set.rs | 27 |
5 files changed, 151 insertions, 10 deletions
diff --git a/library/alloc/benches/btree/map.rs b/library/alloc/benches/btree/map.rs index 920a5ca7db0..c304f748847 100644 --- a/library/alloc/benches/btree/map.rs +++ b/library/alloc/benches/btree/map.rs @@ -54,6 +54,50 @@ macro_rules! map_insert_seq_bench { }; } +macro_rules! map_from_iter_rand_bench { + ($name: ident, $n: expr, $map: ident) => { + #[bench] + pub fn $name(b: &mut Bencher) { + let n: usize = $n; + // setup + let mut rng = thread_rng(); + let mut vec = Vec::with_capacity(n); + + for _ in 0..n { + let i = rng.gen::<usize>() % n; + vec.push((i, i)); + } + + // measure + b.iter(|| { + let map: $map<_, _> = vec.iter().copied().collect(); + black_box(map); + }); + } + }; +} + +macro_rules! map_from_iter_seq_bench { + ($name: ident, $n: expr, $map: ident) => { + #[bench] + pub fn $name(b: &mut Bencher) { + let n: usize = $n; + // setup + let mut vec = Vec::with_capacity(n); + + for i in 0..n { + vec.push((i, i)); + } + + // measure + b.iter(|| { + let map: $map<_, _> = vec.iter().copied().collect(); + black_box(map); + }); + } + }; +} + macro_rules! map_find_rand_bench { ($name: ident, $n: expr, $map: ident) => { #[bench] @@ -111,6 +155,12 @@ map_insert_rand_bench! {insert_rand_10_000, 10_000, BTreeMap} map_insert_seq_bench! {insert_seq_100, 100, BTreeMap} map_insert_seq_bench! {insert_seq_10_000, 10_000, BTreeMap} +map_from_iter_rand_bench! {from_iter_rand_100, 100, BTreeMap} +map_from_iter_rand_bench! {from_iter_rand_10_000, 10_000, BTreeMap} + +map_from_iter_seq_bench! {from_iter_seq_100, 100, BTreeMap} +map_from_iter_seq_bench! {from_iter_seq_10_000, 10_000, BTreeMap} + map_find_rand_bench! {find_rand_100, 100, BTreeMap} map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap} diff --git a/library/alloc/src/collections/btree/dedup_sorted_iter.rs b/library/alloc/src/collections/btree/dedup_sorted_iter.rs new file mode 100644 index 00000000000..60bf83b8387 --- /dev/null +++ b/library/alloc/src/collections/btree/dedup_sorted_iter.rs @@ -0,0 +1,47 @@ +use core::iter::Peekable; + +/// A iterator for deduping the key of a sorted iterator. +/// When encountering the duplicated key, only the last key-value pair is yielded. +/// +/// Used by [`BTreeMap::bulk_build_from_sorted_iter`]. +pub struct DedupSortedIter<K, V, I> +where + I: Iterator<Item = (K, V)>, +{ + iter: Peekable<I>, +} + +impl<K, V, I> DedupSortedIter<K, V, I> +where + I: Iterator<Item = (K, V)>, +{ + pub fn new(iter: I) -> Self { + Self { iter: iter.peekable() } + } +} + +impl<K, V, I> Iterator for DedupSortedIter<K, V, I> +where + K: Eq, + I: Iterator<Item = (K, V)>, +{ + type Item = (K, V); + + fn next(&mut self) -> Option<(K, V)> { + loop { + let next = match self.iter.next() { + Some(next) => next, + None => return None, + }; + + let peeked = match self.iter.peek() { + Some(peeked) => peeked, + None => return Some(next), + }; + + if next.0 != peeked.0 { + return Some(next); + } + } + } +} diff --git a/library/alloc/src/collections/btree/map.rs b/library/alloc/src/collections/btree/map.rs index 70a838a35f9..501a604e7f7 100644 --- a/library/alloc/src/collections/btree/map.rs +++ b/library/alloc/src/collections/btree/map.rs @@ -1,3 +1,4 @@ +use crate::vec::Vec; use core::borrow::Borrow; use core::cmp::Ordering; use core::fmt::{self, Debug}; @@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds}; use core::ptr; use super::borrow::DormantMutRef; +use super::dedup_sorted_iter::DedupSortedIter; use super::navigate::{LazyLeafRange, LeafRange}; use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root}; use super::search::SearchResult::*; @@ -1285,6 +1287,18 @@ impl<K, V> BTreeMap<K, V> { pub fn into_values(self) -> IntoValues<K, V> { IntoValues { inner: self.into_iter() } } + + /// Makes a `BTreeMap` from a sorted iterator. + pub(crate) fn bulk_build_from_sorted_iter<I>(iter: I) -> Self + where + K: Ord, + I: Iterator<Item = (K, V)>, + { + let mut root = Root::new(); + let mut length = 0; + root.bulk_push(DedupSortedIter::new(iter), &mut length); + BTreeMap { root: Some(root), length } + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -1909,9 +1923,15 @@ impl<K, V> FusedIterator for RangeMut<'_, K, V> {} #[stable(feature = "rust1", since = "1.0.0")] impl<K: Ord, V> FromIterator<(K, V)> for BTreeMap<K, V> { fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> BTreeMap<K, V> { - let mut map = BTreeMap::new(); - map.extend(iter); - map + let mut inputs: Vec<_> = iter.into_iter().collect(); + + if inputs.is_empty() { + return BTreeMap::new(); + } + + // use stable sort to preserve the insertion order. + inputs.sort_by(|a, b| a.0.cmp(&b.0)); + BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter()) } } @@ -2020,8 +2040,14 @@ impl<K: Ord, V, const N: usize> From<[(K, V); N]> for BTreeMap<K, V> { /// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into(); /// assert_eq!(map1, map2); /// ``` - fn from(arr: [(K, V); N]) -> Self { - core::array::IntoIter::new(arr).collect() + fn from(mut arr: [(K, V); N]) -> Self { + if N == 0 { + return BTreeMap::new(); + } + + // use stable sort to preserve the insertion order. + arr.sort_by(|a, b| a.0.cmp(&b.0)); + BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr)) } } diff --git a/library/alloc/src/collections/btree/mod.rs b/library/alloc/src/collections/btree/mod.rs index f74172c7d97..9571b3d594d 100644 --- a/library/alloc/src/collections/btree/mod.rs +++ b/library/alloc/src/collections/btree/mod.rs @@ -1,5 +1,6 @@ mod append; mod borrow; +mod dedup_sorted_iter; mod fix; pub mod map; mod mem; diff --git a/library/alloc/src/collections/btree/set.rs b/library/alloc/src/collections/btree/set.rs index ff0db22e0cc..c664e201aec 100644 --- a/library/alloc/src/collections/btree/set.rs +++ b/library/alloc/src/collections/btree/set.rs @@ -1,6 +1,7 @@ // This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface // to TreeMap +use crate::vec::Vec; use core::borrow::Borrow; use core::cmp::Ordering::{Equal, Greater, Less}; use core::cmp::{max, min}; @@ -1056,9 +1057,17 @@ impl<T> BTreeSet<T> { #[stable(feature = "rust1", since = "1.0.0")] impl<T: Ord> FromIterator<T> for BTreeSet<T> { fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> BTreeSet<T> { - let mut set = BTreeSet::new(); - set.extend(iter); - set + let mut inputs: Vec<_> = iter.into_iter().collect(); + + if inputs.is_empty() { + return BTreeSet::new(); + } + + // use stable sort to preserve the insertion order. + inputs.sort(); + let iter = inputs.into_iter().map(|k| (k, ())); + let map = BTreeMap::bulk_build_from_sorted_iter(iter); + BTreeSet { map } } } @@ -1071,8 +1080,16 @@ impl<T: Ord, const N: usize> From<[T; N]> for BTreeSet<T> { /// let set2: BTreeSet<_> = [1, 2, 3, 4].into(); /// assert_eq!(set1, set2); /// ``` - fn from(arr: [T; N]) -> Self { - core::array::IntoIter::new(arr).collect() + fn from(mut arr: [T; N]) -> Self { + if N == 0 { + return BTreeSet::new(); + } + + // use stable sort to preserve the insertion order. + arr.sort(); + let iter = core::array::IntoIter::new(arr).map(|k| (k, ())); + let map = BTreeMap::bulk_build_from_sorted_iter(iter); + BTreeSet { map } } } |
