about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2016-12-06 21:05:31 +0000
committerbors <bors@rust-lang.org>2016-12-06 21:05:31 +0000
commit5f128ed10f040c52e25b16c40235288822044c8c (patch)
tree82847cc0eed7630af3dc0cd73a56e970169281dd
parentb5d0f90929ddaae89609e9bb229a9b8a27e27615 (diff)
parent2c5d2403d770e624bbb0b9ce8e970efa914a8602 (diff)
downloadrust-5f128ed10f040c52e25b16c40235288822044c8c.tar.gz
rust-5f128ed10f040c52e25b16c40235288822044c8c.zip
Auto merge of #38017 - arthurprs:hm-extend, r=bluss
Smarter HashMap/HashSet pre-allocation for extend/from_iter

HashMap/HashSet from_iter and extend are making totally different assumptions.

A more balanced decision may allocate half the lower hint (rounding up). For "well defined" iterators this effectively limits the worst case to two resizes (the initial reserve + one resize).

cc #36579
cc @bluss
-rw-r--r--src/libstd/collections/hash/map.rs17
-rw-r--r--src/libstd/collections/hash/set.rs10
2 files changed, 16 insertions, 11 deletions
diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs
index f102a1bf630..368604ccb82 100644
--- a/src/libstd/collections/hash/map.rs
+++ b/src/libstd/collections/hash/map.rs
@@ -1974,10 +1974,8 @@ impl<K, V, S> FromIterator<(K, V)> for HashMap<K, V, S>
           S: BuildHasher + Default
 {
     fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> HashMap<K, V, S> {
-        let iterator = iter.into_iter();
-        let lower = iterator.size_hint().0;
-        let mut map = HashMap::with_capacity_and_hasher(lower, Default::default());
-        map.extend(iterator);
+        let mut map = HashMap::with_hasher(Default::default());
+        map.extend(iter);
         map
     }
 }
@@ -1988,6 +1986,17 @@ impl<K, V, S> Extend<(K, V)> for HashMap<K, V, S>
           S: BuildHasher
 {
     fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
+        // Keys may be already present or show multiple times in the iterator.
+        // Reserve the entire hint lower bound if the map is empty.
+        // Otherwise reserve half the hint (rounded up), so the map
+        // will only resize twice in the worst case.
+        let iter = iter.into_iter();
+        let reserve = if self.is_empty() {
+            iter.size_hint().0
+        } else {
+            (iter.size_hint().0 + 1) / 2
+        };
+        self.reserve(reserve);
         for (k, v) in iter {
             self.insert(k, v);
         }
diff --git a/src/libstd/collections/hash/set.rs b/src/libstd/collections/hash/set.rs
index 1ec7a4a7b63..72af612f569 100644
--- a/src/libstd/collections/hash/set.rs
+++ b/src/libstd/collections/hash/set.rs
@@ -663,10 +663,8 @@ impl<T, S> FromIterator<T> for HashSet<T, S>
           S: BuildHasher + Default
 {
     fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> HashSet<T, S> {
-        let iterator = iter.into_iter();
-        let lower = iterator.size_hint().0;
-        let mut set = HashSet::with_capacity_and_hasher(lower, Default::default());
-        set.extend(iterator);
+        let mut set = HashSet::with_hasher(Default::default());
+        set.extend(iter);
         set
     }
 }
@@ -677,9 +675,7 @@ impl<T, S> Extend<T> for HashSet<T, S>
           S: BuildHasher
 {
     fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
-        for k in iter {
-            self.insert(k);
-        }
+        self.map.extend(iter.into_iter().map(|k| (k, ())));
     }
 }