about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAngelicosPhosphoros <xuzin.timur@gmail.com>2022-05-20 15:38:04 +0300
committerAngelicosPhosphoros <xuzin.timur@gmail.com>2022-05-20 18:46:24 +0300
commitde97d7393fd20215cdc1a2cacfa84290df4ae460 (patch)
tree05078be60065b3bc6c52efcc47a31b9ab4883b73
parentcd73afadae5b0163f9285f1b5edbbd1c84fde410 (diff)
downloadrust-de97d7393fd20215cdc1a2cacfa84290df4ae460.tar.gz
rust-de97d7393fd20215cdc1a2cacfa84290df4ae460.zip
Add complexity estimation of iterating over HashSet and HashMap
It is not obvious (at least for me) that complexity of iteration over hash tables depends on capacity and not length. Especially comparing with other containers like Vec or String. I think, this behaviour is worth mentioning.

I run benchmark which tests iteration time for maps with length 50 and different capacities and get this results:
```
capacity - time
64       - 203.87 ns
256      - 351.78 ns
1024     - 607.87 ns
4096     - 965.82 ns
16384    - 3.1188 us
```

If you want to dig why it behaves such way, you can look current implementation in [hashbrown code](https://github.com/rust-lang/hashbrown/blob/f3a9f211d06f78c5beb81ac22ea08fdc269e068f/src/raw/mod.rs#L1933).

Benchmarks code would be presented in PR related to this commit.
-rw-r--r--library/std/src/collections/hash/map.rs40
-rw-r--r--library/std/src/collections/hash/set.rs10
2 files changed, 50 insertions, 0 deletions
diff --git a/library/std/src/collections/hash/map.rs b/library/std/src/collections/hash/map.rs
index 977714281fb..969f5dde4f0 100644
--- a/library/std/src/collections/hash/map.rs
+++ b/library/std/src/collections/hash/map.rs
@@ -344,6 +344,11 @@ impl<K, V, S> HashMap<K, V, S> {
     ///     println!("{key}");
     /// }
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, iterating over keys takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn keys(&self) -> Keys<'_, K, V> {
         Keys { inner: self.iter() }
@@ -370,6 +375,11 @@ impl<K, V, S> HashMap<K, V, S> {
     /// vec.sort_unstable();
     /// assert_eq!(vec, ["a", "b", "c"]);
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, iterating over keys takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[inline]
     #[rustc_lint_query_instability]
     #[stable(feature = "map_into_keys_values", since = "1.54.0")]
@@ -395,6 +405,11 @@ impl<K, V, S> HashMap<K, V, S> {
     ///     println!("{val}");
     /// }
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, iterating over values takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn values(&self) -> Values<'_, K, V> {
         Values { inner: self.iter() }
@@ -422,6 +437,11 @@ impl<K, V, S> HashMap<K, V, S> {
     ///     println!("{val}");
     /// }
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, iterating over values takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[stable(feature = "map_values_mut", since = "1.10.0")]
     pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> {
         ValuesMut { inner: self.iter_mut() }
@@ -448,6 +468,11 @@ impl<K, V, S> HashMap<K, V, S> {
     /// vec.sort_unstable();
     /// assert_eq!(vec, [1, 2, 3]);
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, iterating over values takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[inline]
     #[rustc_lint_query_instability]
     #[stable(feature = "map_into_keys_values", since = "1.54.0")]
@@ -473,6 +498,11 @@ impl<K, V, S> HashMap<K, V, S> {
     ///     println!("key: {key} val: {val}");
     /// }
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, iterating over map takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[rustc_lint_query_instability]
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn iter(&self) -> Iter<'_, K, V> {
@@ -503,6 +533,11 @@ impl<K, V, S> HashMap<K, V, S> {
     ///     println!("key: {key} val: {val}");
     /// }
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, iterating over map takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[rustc_lint_query_instability]
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn iter_mut(&mut self) -> IterMut<'_, K, V> {
@@ -633,6 +668,11 @@ impl<K, V, S> HashMap<K, V, S> {
     /// map.retain(|&k, _| k % 2 == 0);
     /// assert_eq!(map.len(), 4);
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, this operation takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[inline]
     #[rustc_lint_query_instability]
     #[stable(feature = "retain_hash_collection", since = "1.18.0")]
diff --git a/library/std/src/collections/hash/set.rs b/library/std/src/collections/hash/set.rs
index 13bba0a6fd8..4ac0e081c2e 100644
--- a/library/std/src/collections/hash/set.rs
+++ b/library/std/src/collections/hash/set.rs
@@ -184,6 +184,11 @@ impl<T, S> HashSet<T, S> {
     ///     println!("{x}");
     /// }
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, iterating over set takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[inline]
     #[rustc_lint_query_instability]
     #[stable(feature = "rust1", since = "1.0.0")]
@@ -312,6 +317,11 @@ impl<T, S> HashSet<T, S> {
     /// set.retain(|&k| k % 2 == 0);
     /// assert_eq!(set.len(), 3);
     /// ```
+    ///
+    /// # Performance
+    ///
+    /// In the current implementation, this operation takes O(capacity) time
+    /// instead of O(len) because it internally visits empty buckets too.
     #[rustc_lint_query_instability]
     #[stable(feature = "retain_hash_collection", since = "1.18.0")]
     pub fn retain<F>(&mut self, f: F)