tinymist_std/adt/
fmap.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
//! A map that shards items by their fingerprint.

use std::{collections::HashMap, num::NonZeroU32};

use crate::hash::Fingerprint;

/// A global upper bound on the shard size.
/// If there are too many shards, the memory overhead is unacceptable.
const MAX_SHARD_SIZE: u32 = 512;

/// Returns a read-only default shard size.
fn default_shard_size() -> NonZeroU32 {
    static ITEM_SHARD_SIZE: std::sync::OnceLock<NonZeroU32> = std::sync::OnceLock::new();

    /// By testing, we found that the optimal shard size is 2 * number of
    /// threads.
    fn determine_default_shard_size() -> NonZeroU32 {
        // This detection is from rayon.
        let thread_cnt = {
            std::thread::available_parallelism()
                .map(|n| n.get())
                .unwrap_or(1)
        };

        // A valid shard size is a power of two.
        let size = (thread_cnt.next_power_of_two() * 2) as u32;
        // Perform early non-zero check to avoid panics.
        NonZeroU32::new(size.min(MAX_SHARD_SIZE)).unwrap()
    }

    *ITEM_SHARD_SIZE.get_or_init(determine_default_shard_size)
}

type FMapBase<V> = parking_lot::RwLock<HashMap<Fingerprint, V>>;

/// A map that shards items by their fingerprint. This is faster
/// than the dashmap in some cases.
///
/// It is fast since a fingerprint could split items into different shards
/// efficiently.
///
/// Note: If a fingerprint is not calculated from a hash function, it is not
/// guaranteed that the fingerprint is evenly distributed. Thus, in that case,
/// the performance of this map is not guaranteed.
pub struct FingerprintMap<V> {
    mask: u32,
    shards: Vec<parking_lot::RwLock<HashMap<Fingerprint, V>>>,
}

impl<V> Default for FingerprintMap<V> {
    fn default() -> Self {
        Self::new(default_shard_size())
    }
}

impl<V> FingerprintMap<V> {
    /// Creates a new `FingerprintMap` with the given shard size.
    pub fn new(shard_size: NonZeroU32) -> Self {
        let shard_size = shard_size.get().next_power_of_two();
        let shard_size = shard_size.min(MAX_SHARD_SIZE);

        assert!(
            shard_size.is_power_of_two(),
            "shard size must be a power of two"
        );
        assert!(shard_size > 0, "shard size must be greater than zero");
        Self {
            mask: shard_size - 1,
            shards: (0..shard_size)
                .map(|_| parking_lot::RwLock::new(HashMap::new()))
                .collect(),
        }
    }

    /// Iterates over all items in the map.
    pub fn into_items(self) -> impl Iterator<Item = (Fingerprint, V)> {
        self.shards
            .into_iter()
            .flat_map(|shard| shard.into_inner().into_iter())
    }

    /// Gets the shard
    pub fn shard(&self, fg: Fingerprint) -> &FMapBase<V> {
        let shards = &self.shards;
        let route_idx = (fg.lower32() & self.mask) as usize;

        // check that the route index is within the bounds of the shards
        debug_assert!(route_idx < shards.len());
        // SAFETY: `fg` is a valid index into `shards`, as shards size is never changed
        // and mask is always a power of two.
        unsafe { shards.get_unchecked(route_idx) }
    }

    /// Gets the mutable shard slice useful for parallel iteration
    pub fn as_mut_slice(&mut self) -> &mut [FMapBase<V>] {
        &mut self.shards
    }

    /// Checks if the map is empty.
    pub fn contains_key(&self, fg: &Fingerprint) -> bool {
        self.shard(*fg).read().contains_key(fg)
    }
}

#[cfg(test)]
mod tests {
    #[test]
    fn test_default_shard_size() {
        let size = super::default_shard_size().get();

        eprintln!("size = {size}");

        assert!(size > 0);
        assert_eq!(size & (size - 1), 0);
    }
}