tinymist_analysis/adt/
interner.rs

1//! Global `Arc`-based object interning infrastructure.
2//!
3//! Eventually this should probably be replaced with salsa-based interning.
4//!
5//! todo: This is less efficient as the arc object will change its reference
6//! count every time it is cloned. todo: we may be able to optimize use by
7//! following approach:
8//! ```plain
9//! fn run_analyze(f) {
10//!   let local = thread_local_intern();
11//!   let res = f(local);
12//!   std::thread::spawn(move || gc(local));
13//!   return res
14//! }
15//! ```
16//! However, this is out of scope for now.
17
18use std::{
19    fmt::{self, Debug, Display},
20    hash::{BuildHasherDefault, Hash, Hasher},
21    ops::Deref,
22    sync::{LazyLock, OnceLock},
23};
24
25use dashmap::{DashMap, SharedValue};
26use ecow::{EcoString, EcoVec};
27use hashbrown::{HashMap, hash_map::RawEntryMut};
28use parking_lot::Mutex;
29use rustc_hash::FxHasher;
30use triomphe::Arc;
31use typst::{foundations::Str, syntax::ast::Ident};
32
33type InternMap<T> = DashMap<Arc<T>, (), BuildHasherDefault<FxHasher>>;
34type Guard<T> = dashmap::RwLockWriteGuard<
35    'static,
36    HashMap<Arc<T>, SharedValue<()>, BuildHasherDefault<FxHasher>>,
37>;
38
39// https://news.ycombinator.com/item?id=22220342
40
41pub struct Interned<T: Internable + ?Sized> {
42    arc: Arc<T>,
43}
44
45impl<T: Internable> Interned<T> {
46    pub fn new(obj: T) -> Self {
47        let (mut shard, hash) = Self::select(&obj);
48        // Atomically,
49        // - check if `obj` is already in the map
50        //   - if so, clone its `Arc` and return it
51        //   - if not, box it up, insert it, and return a clone
52        // This needs to be atomic (locking the shard) to avoid races with other thread,
53        // which could insert the same object between us looking it up and
54        // inserting it.
55        match shard.raw_entry_mut().from_key_hashed_nocheck(hash, &obj) {
56            RawEntryMut::Occupied(occ) => Self {
57                arc: occ.key().clone(),
58            },
59            RawEntryMut::Vacant(vac) => {
60                T::storage().alloc().increment();
61                Self {
62                    arc: vac
63                        .insert_hashed_nocheck(hash, Arc::new(obj), SharedValue::new(()))
64                        .0
65                        .clone(),
66                }
67            }
68        }
69    }
70}
71
72// Note: It is dangerous to keep interned object temporarily (u128)
73// Case:
74// ```
75// insert(hash(Interned::new_str("a"))) == true
76// insert(hash(Interned::new_str("a"))) == true
77// ```
78impl Interned<str> {
79    pub fn new_str(s: &str) -> Self {
80        let (mut shard, hash) = Self::select(s);
81        // Atomically,
82        // - check if `obj` is already in the map
83        //   - if so, clone its `Arc` and return it
84        //   - if not, box it up, insert it, and return a clone
85        // This needs to be atomic (locking the shard) to avoid races with other thread,
86        // which could insert the same object between us looking it up and
87        // inserting it.
88        match shard.raw_entry_mut().from_key_hashed_nocheck(hash, s) {
89            RawEntryMut::Occupied(occ) => Self {
90                arc: occ.key().clone(),
91            },
92            RawEntryMut::Vacant(vac) => {
93                str::storage().alloc().increment();
94
95                Self {
96                    arc: vac
97                        .insert_hashed_nocheck(hash, Arc::from(s), SharedValue::new(()))
98                        .0
99                        .clone(),
100                }
101            }
102        }
103    }
104}
105
106static EMPTY: LazyLock<Interned<str>> = LazyLock::new(|| Interned::new_str(""));
107impl Default for Interned<str> {
108    fn default() -> Self {
109        EMPTY.clone()
110    }
111}
112
113impl Interned<str> {
114    pub fn empty() -> &'static Self {
115        &EMPTY
116    }
117}
118
119impl From<&str> for Interned<str> {
120    fn from(s: &str) -> Self {
121        Interned::new_str(s)
122    }
123}
124
125impl From<Str> for Interned<str> {
126    fn from(s: Str) -> Self {
127        Interned::new_str(&s)
128    }
129}
130
131impl From<EcoString> for Interned<str> {
132    fn from(s: EcoString) -> Self {
133        Interned::new_str(&s)
134    }
135}
136
137impl From<&EcoString> for Interned<str> {
138    fn from(s: &EcoString) -> Self {
139        Interned::new_str(s)
140    }
141}
142
143impl From<Ident<'_>> for Interned<str> {
144    fn from(s: Ident<'_>) -> Self {
145        Interned::new_str(s.get())
146    }
147}
148
149impl From<&Interned<str>> for EcoString {
150    fn from(s: &Interned<str>) -> Self {
151        s.as_ref().into()
152    }
153}
154
155impl<T: Internable> From<T> for Interned<T> {
156    fn from(s: T) -> Self {
157        Interned::new(s)
158    }
159}
160
161impl<T: Internable + Clone> From<&T> for Interned<T> {
162    fn from(s: &T) -> Self {
163        Interned::new(s.clone())
164    }
165}
166
167impl<T: Internable + ?Sized> Interned<T> {
168    #[inline]
169    fn select(obj: &T) -> (Guard<T>, u64) {
170        let storage = T::storage().get();
171        let hash = {
172            let mut hasher = std::hash::BuildHasher::build_hasher(storage.hasher());
173            obj.hash(&mut hasher);
174            hasher.finish()
175        };
176        let shard_idx = storage.determine_shard(hash as usize);
177        let shard = &storage.shards()[shard_idx];
178        (shard.write(), hash)
179    }
180}
181
182impl<T: Internable + ?Sized> Drop for Interned<T> {
183    #[inline]
184    fn drop(&mut self) {
185        // When the last `Ref` is dropped, remove the object from the global map.
186        if Arc::count(&self.arc) == 2 {
187            // Only `self` and the global map point to the object.
188
189            self.drop_slow();
190        }
191    }
192}
193
194impl<T: Internable + ?Sized> Interned<T> {
195    #[cold]
196    fn drop_slow(&mut self) {
197        let (mut shard, hash) = Self::select(&self.arc);
198
199        if Arc::count(&self.arc) != 2 {
200            // Another thread has interned another copy
201            return;
202        }
203
204        match shard
205            .raw_entry_mut()
206            .from_key_hashed_nocheck(hash, &self.arc)
207        {
208            RawEntryMut::Occupied(occ) => occ.remove(),
209            RawEntryMut::Vacant(_) => unreachable!(),
210        };
211
212        T::storage().alloc().decrement();
213
214        // Shrink the backing storage if the shard is less than 50% occupied.
215        if shard.len() * 2 < shard.capacity() {
216            shard.shrink_to_fit();
217        }
218    }
219}
220
221/// Compares interned `Ref`s using pointer equality.
222impl<T: Internable> PartialEq for Interned<T> {
223    // NOTE: No `?Sized` because `ptr_eq` doesn't work right with trait objects.
224
225    #[inline]
226    fn eq(&self, other: &Self) -> bool {
227        Arc::ptr_eq(&self.arc, &other.arc)
228    }
229}
230
231impl<T: Internable> Eq for Interned<T> {}
232
233impl<T: Internable + PartialOrd> PartialOrd for Interned<T> {
234    #[inline]
235    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
236        self.as_ref().partial_cmp(other.as_ref())
237    }
238}
239
240impl<T: Internable + Ord> Ord for Interned<T> {
241    #[inline]
242    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
243        if self == other {
244            std::cmp::Ordering::Equal
245        } else {
246            self.as_ref().cmp(other.as_ref())
247        }
248    }
249}
250
251impl PartialOrd for Interned<str> {
252    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
253        Some(self.cmp(other))
254    }
255}
256
257impl Ord for Interned<str> {
258    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
259        if self == other {
260            std::cmp::Ordering::Equal
261        } else {
262            self.as_ref().cmp(other.as_ref())
263        }
264    }
265}
266
267impl PartialEq for Interned<str> {
268    fn eq(&self, other: &Self) -> bool {
269        Arc::ptr_eq(&self.arc, &other.arc)
270    }
271}
272
273impl Eq for Interned<str> {}
274
275impl serde::Serialize for Interned<str> {
276    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
277        self.arc.serialize(serializer)
278    }
279}
280
281impl<'de> serde::Deserialize<'de> for Interned<str> {
282    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
283        struct StrVisitor;
284
285        impl serde::de::Visitor<'_> for StrVisitor {
286            type Value = Interned<str>;
287
288            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
289                formatter.write_str("a string")
290            }
291
292            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Self::Value, E> {
293                Ok(Interned::new_str(v))
294            }
295        }
296
297        deserializer.deserialize_str(StrVisitor)
298    }
299}
300
301impl<T: Internable + ?Sized> Hash for Interned<T> {
302    fn hash<H: Hasher>(&self, state: &mut H) {
303        // NOTE: Cast disposes vtable pointer / slice/str length.
304        state.write_usize(Arc::as_ptr(&self.arc) as *const () as usize)
305    }
306}
307
308impl<T: Internable + ?Sized> AsRef<T> for Interned<T> {
309    #[inline]
310    fn as_ref(&self) -> &T {
311        &self.arc
312    }
313}
314
315impl<T: Internable + ?Sized> Deref for Interned<T> {
316    type Target = T;
317
318    #[inline]
319    fn deref(&self) -> &Self::Target {
320        &self.arc
321    }
322}
323
324impl<T: Internable + ?Sized> Clone for Interned<T> {
325    fn clone(&self) -> Self {
326        Self {
327            arc: self.arc.clone(),
328        }
329    }
330}
331
332impl<T: Debug + Internable + ?Sized> Debug for Interned<T> {
333    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
334        (*self.arc).fmt(f)
335    }
336}
337
338impl<T: Display + Internable + ?Sized> Display for Interned<T> {
339    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
340        (*self.arc).fmt(f)
341    }
342}
343
344pub static MAPS: Mutex<EcoVec<(&'static str, usize, Arc<AllocStats>)>> = Mutex::new(EcoVec::new());
345
346pub struct InternStorage<T: ?Sized> {
347    alloc: OnceLock<Arc<AllocStats>>,
348    map: OnceLock<InternMap<T>>,
349}
350
351#[allow(clippy::new_without_default)] // this a const fn, so it can't be default
352impl<T: InternSize + ?Sized> InternStorage<T> {
353    const SIZE: usize = T::INTERN_SIZE;
354
355    pub const fn new() -> Self {
356        Self {
357            alloc: OnceLock::new(),
358            map: OnceLock::new(),
359        }
360    }
361}
362
363impl<T: Internable + ?Sized> InternStorage<T> {
364    fn alloc(&self) -> &Arc<AllocStats> {
365        self.alloc.get_or_init(Arc::default)
366    }
367
368    fn get(&self) -> &InternMap<T> {
369        self.map.get_or_init(|| {
370            MAPS.lock()
371                .push((std::any::type_name::<T>(), Self::SIZE, self.alloc().clone()));
372            DashMap::default()
373        })
374    }
375}
376
377pub trait InternSize {
378    const INTERN_SIZE: usize;
379}
380
381impl<T: Sized> InternSize for T {
382    const INTERN_SIZE: usize = std::mem::size_of::<T>();
383}
384
385impl InternSize for str {
386    const INTERN_SIZE: usize = std::mem::size_of::<usize>() * 2;
387}
388
389pub trait Internable: InternSize + Hash + Eq + 'static {
390    fn storage() -> &'static InternStorage<Self>;
391}
392
393/// Implements `Internable` for a given list of types, making them usable with
394/// `Interned`.
395#[macro_export]
396#[doc(hidden)]
397macro_rules! _impl_internable {
398    ( $($t:ty),+ $(,)? ) => { $(
399        impl $crate::adt::interner::Internable for $t {
400            fn storage() -> &'static $crate::adt::interner::InternStorage<Self> {
401                static STORAGE: $crate::adt::interner::InternStorage<$t> = $crate::adt::interner::InternStorage::new();
402                &STORAGE
403            }
404        }
405    )+ };
406}
407
408pub use crate::_impl_internable as impl_internable;
409use crate::stats::AllocStats;
410
411impl_internable!(str,);