diff options
| author | Dylan MacKenzie <ecstaticmorse@gmail.com> | 2020-02-16 14:40:12 -0800 |
|---|---|---|
| committer | Dylan MacKenzie <ecstaticmorse@gmail.com> | 2020-02-19 10:51:40 -0800 |
| commit | ea3c9d27cffceb9d4c8dcfda1c96b138d7f01f3d (patch) | |
| tree | 7093720c45183814a25b22ba26cb73a2eac3c3dc /src/librustc_data_structures | |
| parent | 7710ae0e2653e0fa14ff9b7797ed29340e0284a8 (diff) | |
| download | rust-ea3c9d27cffceb9d4c8dcfda1c96b138d7f01f3d.tar.gz rust-ea3c9d27cffceb9d4c8dcfda1c96b138d7f01f3d.zip | |
Implement an insertion-order preserving, efficient multi-map
Diffstat (limited to 'src/librustc_data_structures')
| -rw-r--r-- | src/librustc_data_structures/sorted_map.rs | 4 | ||||
| -rw-r--r-- | src/librustc_data_structures/sorted_map/index_map.rs | 218 | ||||
| -rw-r--r-- | src/librustc_data_structures/sorted_map/tests.rs | 28 |
3 files changed, 249 insertions, 1 deletions
diff --git a/src/librustc_data_structures/sorted_map.rs b/src/librustc_data_structures/sorted_map.rs index 08706aac11e..8c42b74b85a 100644 --- a/src/librustc_data_structures/sorted_map.rs +++ b/src/librustc_data_structures/sorted_map.rs @@ -4,6 +4,10 @@ use std::iter::FromIterator; use std::mem; use std::ops::{Bound, Index, IndexMut, RangeBounds}; +mod index_map; + +pub use index_map::SortedIndexMultiMap; + /// `SortedMap` is a data structure with similar characteristics as BTreeMap but /// slightly different trade-offs: lookup, insertion, and removal are O(log(N)) /// and elements can be iterated in order cheaply. diff --git a/src/librustc_data_structures/sorted_map/index_map.rs b/src/librustc_data_structures/sorted_map/index_map.rs new file mode 100644 index 00000000000..b7005ccdc99 --- /dev/null +++ b/src/librustc_data_structures/sorted_map/index_map.rs @@ -0,0 +1,218 @@ +//! A variant of `SortedMap` that preserves insertion order. + +use std::borrow::Borrow; +use std::hash::{Hash, Hasher}; +use std::iter::FromIterator; + +use crate::stable_hasher::{HashStable, StableHasher}; +use rustc_index::vec::{Idx, IndexVec}; + +/// An indexed multi-map that preserves insertion order while permitting both `O(log n)` lookup of +/// an item by key and `O(1)` lookup by index. +/// +/// This data structure is a hybrid of an [`IndexVec`] and a [`SortedMap`]. Like `IndexVec`, +/// `SortedIndexMultiMap` assigns a typed index to each item while preserving insertion order. +/// Like `SortedMap`, `SortedIndexMultiMap` has efficient lookup of items by key. However, this +/// is accomplished by sorting an array of item indices instead of the items themselves. +/// +/// Unlike `SortedMap`, this data structure can hold multiple equivalent items at once, so the +/// `get_by_key` method and its variants return an iterator instead of an `Option`. Equivalent +/// items will be yielded in insertion order. +/// +/// Unlike a general-purpose map like `BTreeSet` or `HashSet`, `SortedMap` and +/// `SortedIndexMultiMap` require `O(n)` time to insert a single item. This is because we may need +/// to insert into the middle of the sorted array. Users should avoid mutating this data structure +/// in-place. +/// +/// [`IndexVec`]: ../../rustc_index/vec/struct.IndexVec.html +/// [`SortedMap`]: ../sorted_map/struct.SortedMap.html +#[derive(Clone, Debug)] +pub struct SortedIndexMultiMap<I: Idx, K, V> { + /// The elements of the map in insertion order. + items: IndexVec<I, (K, V)>, + + /// Indices of the items in the set, sorted by the item's key. + idx_sorted_by_item_key: Vec<I>, +} + +impl<I: Idx, K: Ord, V> SortedIndexMultiMap<I, K, V> { + pub fn new() -> Self { + SortedIndexMultiMap { items: IndexVec::new(), idx_sorted_by_item_key: Vec::new() } + } + + pub fn len(&self) -> usize { + self.items.len() + } + + pub fn is_empty(&self) -> bool { + self.items.is_empty() + } + + /// Returns an iterator over the items in the map in insertion order. + pub fn into_iter(self) -> impl DoubleEndedIterator<Item = (K, V)> { + self.items.into_iter() + } + + /// Returns an iterator over the items in the map in insertion order along with their indices. + pub fn into_iter_enumerated(self) -> impl DoubleEndedIterator<Item = (I, (K, V))> { + self.items.into_iter_enumerated() + } + + /// Returns an iterator over the items in the map in insertion order. + pub fn iter(&self) -> impl '_ + DoubleEndedIterator<Item = (&K, &V)> { + self.items.iter().map(|(ref k, ref v)| (k, v)) + } + + /// Returns an iterator over the items in the map in insertion order along with their indices. + pub fn iter_enumerated(&self) -> impl '_ + DoubleEndedIterator<Item = (I, (&K, &V))> { + self.items.iter_enumerated().map(|(i, (ref k, ref v))| (i, (k, v))) + } + + /// Returns the item in the map with the given index. + pub fn get(&self, idx: I) -> Option<&(K, V)> { + self.items.get(idx) + } + + /// Returns an iterator over the items in the map that are equal to `key`. + /// + /// If there are multiple items that are equivalent to `key`, they will be yielded in + /// insertion order. + pub fn get_by_key<Q: 'a>(&'a self, key: &Q) -> impl 'a + Iterator<Item = &'a V> + where + Q: Ord + ?Sized, + K: Borrow<Q>, + { + self.get_by_key_enumerated(key).map(|(_, v)| v) + } + + /// Returns an iterator over the items in the map that are equal to `key` along with their + /// indices. + /// + /// If there are multiple items that are equivalent to `key`, they will be yielded in + /// insertion order. + pub fn get_by_key_enumerated<Q>(&self, key: &Q) -> impl '_ + Iterator<Item = (I, &V)> + where + Q: Ord + ?Sized, + K: Borrow<Q>, + { + // FIXME: This should be in the standard library as `equal_range`. See rust-lang/rfcs#2184. + match self.binary_search_idx(key) { + Err(_) => self.idxs_to_items_enumerated(&[]), + + Ok(idx) => { + let start = self.find_lower_bound(key, idx); + let end = self.find_upper_bound(key, idx); + self.idxs_to_items_enumerated(&self.idx_sorted_by_item_key[start..end]) + } + } + } + + fn binary_search_idx<Q>(&self, key: &Q) -> Result<usize, usize> + where + Q: Ord + ?Sized, + K: Borrow<Q>, + { + self.idx_sorted_by_item_key.binary_search_by(|&idx| self.items[idx].0.borrow().cmp(key)) + } + + /// Returns the index into the `idx_sorted_by_item_key` array of the first item equal to + /// `key`. + /// + /// `initial` must be an index into that same array for an item that is equal to `key`. + fn find_lower_bound<Q>(&self, key: &Q, initial: usize) -> usize + where + Q: Ord + ?Sized, + K: Borrow<Q>, + { + debug_assert!(self.items[self.idx_sorted_by_item_key[initial]].0.borrow() == key); + + // FIXME: At present, this uses linear search, meaning lookup is only `O(log n)` if duplicate + // entries are rare. It would be better to start with a linear search for the common case but + // fall back to an exponential search if many duplicates are found. This applies to + // `upper_bound` as well. + let mut start = initial; + while start != 0 && self.items[self.idx_sorted_by_item_key[start - 1]].0.borrow() == key { + start -= 1; + } + + start + } + + /// Returns the index into the `idx_sorted_by_item_key` array of the first item greater than + /// `key`, or `self.len()` if no such item exists. + /// + /// `initial` must be an index into that same array for an item that is equal to `key`. + fn find_upper_bound<Q>(&self, key: &Q, initial: usize) -> usize + where + Q: Ord + ?Sized, + K: Borrow<Q>, + { + debug_assert!(self.items[self.idx_sorted_by_item_key[initial]].0.borrow() == key); + + // See the FIXME for `find_lower_bound`. + let mut end = initial + 1; + let len = self.items.len(); + while end < len && self.items[self.idx_sorted_by_item_key[end]].0.borrow() == key { + end += 1; + } + + end + } + + fn idxs_to_items_enumerated(&'a self, idxs: &'a [I]) -> impl 'a + Iterator<Item = (I, &'a V)> { + idxs.iter().map(move |&idx| (idx, &self.items[idx].1)) + } +} + +impl<I: Idx, K: Eq, V: Eq> Eq for SortedIndexMultiMap<I, K, V> {} +impl<I: Idx, K: PartialEq, V: PartialEq> PartialEq for SortedIndexMultiMap<I, K, V> { + fn eq(&self, other: &Self) -> bool { + // No need to compare the sorted index. If the items are the same, the index will be too. + self.items == other.items + } +} + +impl<I: Idx, K, V> Hash for SortedIndexMultiMap<I, K, V> +where + K: Hash, + V: Hash, +{ + fn hash<H: Hasher>(&self, hasher: &mut H) { + self.items.hash(hasher) + } +} +impl<I: Idx, K, V, C> HashStable<C> for SortedIndexMultiMap<I, K, V> +where + K: HashStable<C>, + V: HashStable<C>, +{ + fn hash_stable(&self, ctx: &mut C, hasher: &mut StableHasher) { + self.items.hash_stable(ctx, hasher) + } +} + +impl<I: Idx, K: Ord, V> FromIterator<(K, V)> for SortedIndexMultiMap<I, K, V> { + fn from_iter<J>(iter: J) -> Self + where + J: IntoIterator<Item = (K, V)>, + { + let items = IndexVec::from_iter(iter); + let mut idx_sorted_by_item_key: Vec<_> = items.indices().collect(); + + // `sort_by_key` is stable, so insertion order is preserved for duplicate items. + idx_sorted_by_item_key.sort_by_key(|&idx| &items[idx].0); + + SortedIndexMultiMap { items, idx_sorted_by_item_key } + } +} + +impl<I: Idx, K, V> std::ops::Index<I> for SortedIndexMultiMap<I, K, V> { + type Output = V; + + fn index(&self, idx: I) -> &Self::Output { + &self.items[idx].1 + } +} + +#[cfg(tests)] +mod tests; diff --git a/src/librustc_data_structures/sorted_map/tests.rs b/src/librustc_data_structures/sorted_map/tests.rs index 692e1deb4be..7d91e1fdcef 100644 --- a/src/librustc_data_structures/sorted_map/tests.rs +++ b/src/librustc_data_structures/sorted_map/tests.rs @@ -1,4 +1,30 @@ -use super::SortedMap; +use super::{SortedIndexMultiMap, SortedMap}; + +#[test] +fn test_sorted_index_multi_map() { + let entries: Vec<_> = vec![(2, 0), (1, 0), (2, 1), (3, 0), (2, 2)]; + let set: SortedIndexMultiMap<usize, _, _> = entries.iter().copied().collect(); + + // Insertion order is preserved. + assert!(entries.iter().map(|(ref k, ref v)| (k, v)).eq(set.iter())); + + // Indexing + for (i, expect) in entries.iter().enumerate() { + assert_eq!(set[i], expect.1); + } + + // `get_by_key` works. + assert_eq!(set.get_by_key(&3).copied().collect::<Vec<_>>(), vec![0]); + assert!(set.get_by_key(&4).next().is_none()); + + // `get_by_key` returns items in insertion order. + let twos: Vec<_> = set.get_by_key_enumerated(&2).collect(); + let idxs: Vec<usize> = twos.iter().map(|(i, _)| *i).collect(); + let values: Vec<usize> = twos.iter().map(|(_, &v)| v).collect(); + + assert_eq!(idxs, vec![0, 2, 4]); + assert_eq!(values, vec![0, 1, 2]); +} #[test] fn test_insert_and_iter() { |
