diff options
| -rw-r--r-- | src/bootstrap/src/utils/cache.rs | 46 | ||||
| -rw-r--r-- | src/bootstrap/src/utils/cache/tests.rs | 52 |
2 files changed, 98 insertions, 0 deletions
diff --git a/src/bootstrap/src/utils/cache.rs b/src/bootstrap/src/utils/cache.rs index 29342cc5a2c..1c8cc4025df 100644 --- a/src/bootstrap/src/utils/cache.rs +++ b/src/bootstrap/src/utils/cache.rs @@ -1,3 +1,17 @@ +//! This module helps you efficiently store and retrieve values using interning. +//! +//! Interning is a neat trick that keeps only one copy of identical values, saving memory +//! and making comparisons super fast. Here, we provide the `Interned<T>` struct and the `Internable` trait +//! to make interning easy for different data types. +//! +//! The `Interner` struct handles caching for common types like `String`, `PathBuf`, and `Vec<String>`, +//! while the `Cache` struct acts as a write-once storage for linking computation steps with their results. +//! +//! # Thread Safety +//! +//! We use `Mutex` to make sure interning and retrieval are thread-safe. But keep in mind—once a value is +//! interned, it sticks around for the entire lifetime of the program. + use std::any::{Any, TypeId}; use std::borrow::Borrow; use std::cell::RefCell; @@ -12,6 +26,9 @@ use std::{fmt, mem}; use crate::core::builder::Step; +/// Represents an interned value of type `T`, allowing for efficient comparisons and retrieval. +/// +/// This struct stores a unique index referencing the interned value within an internal cache. pub struct Interned<T>(usize, PhantomData<*const T>); impl<T: Internable + Default> Default for Interned<T> { @@ -111,6 +128,10 @@ impl<T: Internable + Ord> Ord for Interned<T> { } } +/// A structure for managing the interning of values of type `T`. +/// +/// `TyIntern<T>` maintains a mapping between values and their interned representations, +/// ensuring that duplicate values are not stored multiple times. struct TyIntern<T: Clone + Eq> { items: Vec<T>, set: HashMap<T, Interned<T>>, @@ -123,6 +144,9 @@ impl<T: Hash + Clone + Eq> Default for TyIntern<T> { } impl<T: Hash + Clone + Eq> TyIntern<T> { + /// Interns a borrowed value, ensuring it is stored uniquely. + /// + /// If the value has been previously interned, the same `Interned<T>` instance is returned. fn intern_borrow<B>(&mut self, item: &B) -> Interned<T> where B: Eq + Hash + ToOwned<Owned = T> + ?Sized, @@ -138,6 +162,9 @@ impl<T: Hash + Clone + Eq> TyIntern<T> { interned } + /// Interns an owned value, storing it uniquely. + /// + /// If the value has been previously interned, the existing `Interned<T>` is returned. fn intern(&mut self, item: T) -> Interned<T> { if let Some(i) = self.set.get(&item) { return *i; @@ -148,11 +175,16 @@ impl<T: Hash + Clone + Eq> TyIntern<T> { interned } + /// Retrieves a reference to the interned value associated with the given `Interned<T>` instance. fn get(&self, i: Interned<T>) -> &T { &self.items[i.0] } } +/// A global interner for managing interned values of common types. +/// +/// This structure maintains caches for `String`, `PathBuf`, and `Vec<String>`, ensuring efficient storage +/// and retrieval of frequently used values. #[derive(Default)] pub struct Interner { strs: Mutex<TyIntern<String>>, @@ -160,6 +192,10 @@ pub struct Interner { lists: Mutex<TyIntern<Vec<String>>>, } +/// Defines the behavior required for a type to be internable. +/// +/// Types implementing this trait must provide access to a static cache and define an `intern` method +/// that ensures values are stored uniquely. trait Internable: Clone + Eq + Hash + 'static { fn intern_cache() -> &'static Mutex<TyIntern<Self>>; @@ -187,11 +223,15 @@ impl Internable for Vec<String> { } impl Interner { + /// Interns a string reference, ensuring it is stored uniquely. + /// + /// If the string has been previously interned, the same `Interned<String>` instance is returned. pub fn intern_str(&self, s: &str) -> Interned<String> { self.strs.lock().unwrap().intern_borrow(s) } } +/// A global instance of `Interner` that caches common interned values. pub static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::default); /// This is essentially a `HashMap` which allows storing any type in its input and @@ -209,10 +249,12 @@ pub struct Cache( ); impl Cache { + /// Creates a new empty cache. pub fn new() -> Cache { Cache(RefCell::new(HashMap::new())) } + /// Stores the result of a computation step in the cache. pub fn put<S: Step>(&self, step: S, value: S::Output) { let mut cache = self.0.borrow_mut(); let type_id = TypeId::of::<S>(); @@ -225,6 +267,7 @@ impl Cache { stepcache.insert(step, value); } + /// Retrieves a cached result for the given step, if available. pub fn get<S: Step>(&self, step: &S) -> Option<S::Output> { let mut cache = self.0.borrow_mut(); let type_id = TypeId::of::<S>(); @@ -255,3 +298,6 @@ impl Cache { self.0.borrow().contains_key(&TypeId::of::<S>()) } } + +#[cfg(test)] +mod tests; diff --git a/src/bootstrap/src/utils/cache/tests.rs b/src/bootstrap/src/utils/cache/tests.rs new file mode 100644 index 00000000000..28f5563a589 --- /dev/null +++ b/src/bootstrap/src/utils/cache/tests.rs @@ -0,0 +1,52 @@ +use std::path::PathBuf; + +use crate::utils::cache::{INTERNER, Internable, TyIntern}; + +#[test] +fn test_string_interning() { + let s1 = INTERNER.intern_str("Hello"); + let s2 = INTERNER.intern_str("Hello"); + let s3 = INTERNER.intern_str("world"); + + assert_eq!(s1, s2, "Same strings should be interned to the same instance"); + assert_ne!(s1, s3, "Different strings should have different interned values"); +} + +#[test] +fn test_path_interning() { + let p1 = PathBuf::from("/tmp/file").intern(); + let p2 = PathBuf::from("/tmp/file").intern(); + let p3 = PathBuf::from("/tmp/other").intern(); + + assert_eq!(p1, p2); + assert_ne!(p1, p3); +} + +#[test] +fn test_vec_interning() { + let v1 = vec!["a".to_string(), "b".to_string()].intern(); + let v2 = vec!["a".to_string(), "b".to_string()].intern(); + let v3 = vec!["c".to_string()].intern(); + + assert_eq!(v1, v2); + assert_ne!(v1, v3); +} + +#[test] +fn test_interned_equality() { + let s1 = INTERNER.intern_str("test"); + let s2 = INTERNER.intern_str("test"); + + assert_eq!(s1, s2); + assert_eq!(s1, "test"); +} + +#[test] +fn test_ty_intern_intern_borrow() { + let mut interner = TyIntern::default(); + let s1 = interner.intern_borrow("borrowed"); + let s2 = interner.intern("borrowed".to_string()); + + assert_eq!(s1, s2); + assert_eq!(interner.get(s1), "borrowed"); +} |
