| //! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V |
| //! must be Copy. |
| //! |
| //! VecCache supports efficient concurrent put/get across the key space, with write-once semantics |
| //! (i.e., a given key can only be put once). Subsequent puts will panic. |
| //! |
| //! This is currently used for query caching. |
| |
| use std::fmt::{self, Debug}; |
| use std::marker::PhantomData; |
| use std::ops::{Index, IndexMut}; |
| use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering}; |
| |
| use rustc_index::Idx; |
| |
| #[cfg(test)] |
| mod tests; |
| |
| struct Slot<V> { |
| // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell. |
| value: V, |
| // This is both an index and a once-lock. |
| // |
| // 0: not yet initialized. |
| // 1: lock held, initializing. |
| // 2..u32::MAX - 2: initialized. |
| index_and_lock: AtomicU32, |
| } |
| |
| /// This uniquely identifies a single `Slot<V>` entry in the buckets map, and provides accessors for |
| /// either getting the value or putting a value. |
| #[derive(Copy, Clone, Debug)] |
| struct SlotIndex { |
| // the index of the bucket in VecCache (0 to 20) |
| bucket_idx: BucketIndex, |
| // the index of the slot within the bucket |
| index_in_bucket: usize, |
| } |
| |
| // This makes sure the counts are consistent with what we allocate, precomputing each bucket a |
| // compile-time. Visiting all powers of two is enough to hit all the buckets. |
| // |
| // We confirm counts are accurate in the slot_index_exhaustive test. |
| const ENTRIES_BY_BUCKET: [usize; BUCKETS] = { |
| let mut entries = [0; BUCKETS]; |
| let mut key = 0; |
| loop { |
| let si = SlotIndex::from_index(key); |
| entries[si.bucket_idx.to_usize()] = si.bucket_idx.capacity(); |
| if key == 0 { |
| key = 1; |
| } else if key == (1 << 31) { |
| break; |
| } else { |
| key <<= 1; |
| } |
| } |
| entries |
| }; |
| |
| const BUCKETS: usize = 21; |
| |
| impl SlotIndex { |
| /// Unpacks a flat 32-bit index into a [`BucketIndex`] and a slot offset within that bucket. |
| #[inline] |
| const fn from_index(idx: u32) -> Self { |
| let (bucket_idx, index_in_bucket) = BucketIndex::from_flat_index(idx as usize); |
| SlotIndex { bucket_idx, index_in_bucket } |
| } |
| |
| // SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and |
| // `self` comes from SlotIndex::from_index |
| #[inline] |
| unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot<V>>; 21]) -> Option<(V, u32)> { |
| let bucket = &buckets[self.bucket_idx]; |
| let ptr = bucket.load(Ordering::Acquire); |
| // Bucket is not yet initialized: then we obviously won't find this entry in that bucket. |
| if ptr.is_null() { |
| return None; |
| } |
| debug_assert!(self.index_in_bucket < self.bucket_idx.capacity()); |
| // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this |
| // must be inbounds. |
| let slot = unsafe { ptr.add(self.index_in_bucket) }; |
| |
| // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for |
| // AtomicU32 access. |
| let index_and_lock = unsafe { &(*slot).index_and_lock }; |
| let current = index_and_lock.load(Ordering::Acquire); |
| let index = match current { |
| 0 => return None, |
| // Treat "initializing" as actually just not initialized at all. |
| // The only reason this is a separate state is that `complete` calls could race and |
| // we can't allow that, but from load perspective there's no difference. |
| 1 => return None, |
| _ => current - 2, |
| }; |
| |
| // SAFETY: |
| // * slot is a valid pointer (buckets are always valid for the index we get). |
| // * value is initialized since we saw a >= 2 index above. |
| // * `V: Copy`, so safe to read. |
| let value = unsafe { (*slot).value }; |
| Some((value, index)) |
| } |
| |
| fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> { |
| let ptr = bucket.load(Ordering::Acquire); |
| if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr } |
| } |
| |
| #[cold] |
| #[inline(never)] |
| fn initialize_bucket<V>(bucket: &AtomicPtr<Slot<V>>, bucket_idx: BucketIndex) -> *mut Slot<V> { |
| static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); |
| |
| // If we are initializing the bucket, then acquire a global lock. |
| // |
| // This path is quite cold, so it's cheap to use a global lock. This ensures that we never |
| // have multiple allocations for the same bucket. |
| let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner()); |
| |
| let ptr = bucket.load(Ordering::Acquire); |
| |
| // OK, now under the allocator lock, if we're still null then it's definitely us that will |
| // initialize this bucket. |
| if ptr.is_null() { |
| let bucket_layout = |
| std::alloc::Layout::array::<Slot<V>>(bucket_idx.capacity()).unwrap(); |
| // This is more of a sanity check -- this code is very cold, so it's safe to pay a |
| // little extra cost here. |
| assert!(bucket_layout.size() > 0); |
| // SAFETY: Just checked that size is non-zero. |
| let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot<V>>() }; |
| if allocated.is_null() { |
| std::alloc::handle_alloc_error(bucket_layout); |
| } |
| bucket.store(allocated, Ordering::Release); |
| allocated |
| } else { |
| // Otherwise some other thread initialized this bucket after we took the lock. In that |
| // case, just return early. |
| ptr |
| } |
| } |
| |
| /// Returns true if this successfully put into the map. |
| #[inline] |
| fn put<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) -> bool { |
| let bucket = &buckets[self.bucket_idx]; |
| let ptr = self.bucket_ptr(bucket); |
| |
| debug_assert!(self.index_in_bucket < self.bucket_idx.capacity()); |
| // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this |
| // must be inbounds. |
| let slot = unsafe { ptr.add(self.index_in_bucket) }; |
| |
| // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for |
| // AtomicU32 access. |
| let index_and_lock = unsafe { &(*slot).index_and_lock }; |
| match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) { |
| Ok(_) => { |
| // We have acquired the initialization lock. It is our job to write `value` and |
| // then set the lock to the real index. |
| |
| unsafe { |
| (&raw mut (*slot).value).write(value); |
| } |
| |
| index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release); |
| |
| true |
| } |
| |
| // Treat "initializing" as the caller's fault. Callers are responsible for ensuring that |
| // there are no races on initialization. In the compiler's current usage for query |
| // caches, that's the "active query map" which ensures each query actually runs once |
| // (even if concurrently started). |
| Err(1) => panic!("caller raced calls to put()"), |
| |
| // This slot was already populated. Also ignore, currently this is the same as |
| // "initializing". |
| Err(_) => false, |
| } |
| } |
| |
| /// Inserts into the map, given that the slot is unique, so it won't race with other threads. |
| #[inline] |
| unsafe fn put_unique<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) { |
| let bucket = &buckets[self.bucket_idx]; |
| let ptr = self.bucket_ptr(bucket); |
| |
| debug_assert!(self.index_in_bucket < self.bucket_idx.capacity()); |
| // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this |
| // must be inbounds. |
| let slot = unsafe { ptr.add(self.index_in_bucket) }; |
| |
| // SAFETY: We known our slot is unique as a precondition of this function, so this can't race. |
| unsafe { |
| (&raw mut (*slot).value).write(value); |
| } |
| |
| // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for |
| // AtomicU32 access. |
| let index_and_lock = unsafe { &(*slot).index_and_lock }; |
| |
| index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release); |
| } |
| } |
| |
| /// In-memory cache for queries whose keys are densely-numbered IDs |
| /// (e.g `CrateNum`, `LocalDefId`), and can therefore be used as indices |
| /// into a dense vector of cached values. |
| /// |
| /// (As of [#124780] the underlying storage is not an actual `Vec`, but rather |
| /// a series of increasingly-large buckets, for improved performance when the |
| /// parallel frontend is using multiple threads.) |
| /// |
| /// Each entry in the cache stores the query's return value (`V`), and also |
| /// an associated index (`I`), which in practice is a `DepNodeIndex` used for |
| /// query dependency tracking. |
| /// |
| /// [#124780]: https://github.com/rust-lang/rust/pull/124780 |
| pub struct VecCache<K: Idx, V, I> { |
| // Entries per bucket: |
| // Bucket 0: 4096 2^12 |
| // Bucket 1: 4096 2^12 |
| // Bucket 2: 8192 |
| // Bucket 3: 16384 |
| // ... |
| // Bucket 19: 1073741824 |
| // Bucket 20: 2147483648 |
| // The total number of entries if all buckets are initialized is 2^32. |
| buckets: [AtomicPtr<Slot<V>>; BUCKETS], |
| |
| // In the compiler's current usage these are only *read* during incremental and self-profiling. |
| // They are an optimization over iterating the full buckets array. |
| present: [AtomicPtr<Slot<()>>; BUCKETS], |
| len: AtomicUsize, |
| |
| key: PhantomData<(K, I)>, |
| } |
| |
| impl<K: Idx, V, I> Default for VecCache<K, V, I> { |
| fn default() -> Self { |
| VecCache { |
| buckets: Default::default(), |
| key: PhantomData, |
| len: Default::default(), |
| present: Default::default(), |
| } |
| } |
| } |
| |
| // SAFETY: No access to `V` is made. |
| unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> { |
| fn drop(&mut self) { |
| // We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`, |
| // we are also guaranteed to just need to deallocate any large arrays (not iterate over |
| // contents). |
| // |
| // Confirm no need to deallocate individual entries. Note that `V: Copy` is asserted on |
| // insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating |
| // the bounds into struct definitions everywhere. |
| assert!(!std::mem::needs_drop::<K>()); |
| assert!(!std::mem::needs_drop::<V>()); |
| |
| for (idx, bucket) in BucketIndex::enumerate_buckets(&self.buckets) { |
| let bucket = bucket.load(Ordering::Acquire); |
| if !bucket.is_null() { |
| let layout = std::alloc::Layout::array::<Slot<V>>(ENTRIES_BY_BUCKET[idx]).unwrap(); |
| unsafe { |
| std::alloc::dealloc(bucket.cast(), layout); |
| } |
| } |
| } |
| |
| for (idx, bucket) in BucketIndex::enumerate_buckets(&self.present) { |
| let bucket = bucket.load(Ordering::Acquire); |
| if !bucket.is_null() { |
| let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap(); |
| unsafe { |
| std::alloc::dealloc(bucket.cast(), layout); |
| } |
| } |
| } |
| } |
| } |
| |
| impl<K, V, I> VecCache<K, V, I> |
| where |
| K: Eq + Idx + Copy + Debug, |
| V: Copy, |
| I: Idx + Copy, |
| { |
| #[inline(always)] |
| pub fn lookup(&self, key: &K) -> Option<(V, I)> { |
| let key = u32::try_from(key.index()).unwrap(); |
| let slot_idx = SlotIndex::from_index(key); |
| match unsafe { slot_idx.get(&self.buckets) } { |
| Some((value, idx)) => Some((value, I::new(idx as usize))), |
| None => None, |
| } |
| } |
| |
| #[inline] |
| pub fn complete(&self, key: K, value: V, index: I) { |
| let key = u32::try_from(key.index()).unwrap(); |
| let slot_idx = SlotIndex::from_index(key); |
| if slot_idx.put(&self.buckets, value, index.index() as u32) { |
| let present_idx = self.len.fetch_add(1, Ordering::Relaxed); |
| let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap()); |
| // SAFETY: We should always be uniquely putting due to `len` fetch_add returning unique values. |
| // We can't get here if `len` overflows because `put` will not succeed u32::MAX + 1 times |
| // as it will run out of slots. |
| unsafe { slot.put_unique(&self.present, (), key) }; |
| } |
| } |
| |
| pub fn for_each(&self, f: &mut dyn FnMut(&K, &V, I)) { |
| for idx in 0..self.len.load(Ordering::Acquire) { |
| let key = SlotIndex::from_index(idx as u32); |
| match unsafe { key.get(&self.present) } { |
| // This shouldn't happen in our current usage (iter is really only |
| // used long after queries are done running), but if we hit this in practice it's |
| // probably fine to just break early. |
| None => unreachable!(), |
| Some(((), key)) => { |
| let key = K::new(key as usize); |
| // unwrap() is OK: present entries are always written only after we put the real |
| // entry. |
| let value = self.lookup(&key).unwrap(); |
| f(&key, &value.0, value.1); |
| } |
| } |
| } |
| } |
| |
| pub fn len(&self) -> usize { |
| self.len.load(Ordering::Acquire) |
| } |
| } |
| |
| /// Index into an array of buckets. |
| /// |
| /// Using an enum lets us tell the compiler that values range from 0 to 20, |
| /// allowing array bounds checks to be optimized away, |
| /// without having to resort to pattern types or other unstable features. |
| #[derive(Clone, Copy, PartialEq, Eq)] |
| #[repr(usize)] |
| enum BucketIndex { |
| // tidy-alphabetical-start |
| Bucket00, |
| Bucket01, |
| Bucket02, |
| Bucket03, |
| Bucket04, |
| Bucket05, |
| Bucket06, |
| Bucket07, |
| Bucket08, |
| Bucket09, |
| Bucket10, |
| Bucket11, |
| Bucket12, |
| Bucket13, |
| Bucket14, |
| Bucket15, |
| Bucket16, |
| Bucket17, |
| Bucket18, |
| Bucket19, |
| Bucket20, |
| // tidy-alphabetical-end |
| } |
| |
| impl Debug for BucketIndex { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| Debug::fmt(&self.to_usize(), f) |
| } |
| } |
| |
| impl BucketIndex { |
| /// Capacity of bucket 0 (and also of bucket 1). |
| const BUCKET_0_CAPACITY: usize = 1 << (Self::NONZERO_BUCKET_SHIFT_ADJUST + 1); |
| /// Adjustment factor from the highest-set-bit-position of a flat index, |
| /// to its corresponding bucket number. |
| /// |
| /// For example, the first flat-index in bucket 2 is 8192. |
| /// Its highest-set-bit-position is `(8192).ilog2() == 13`, and subtracting |
| /// the adjustment factor of 11 gives the bucket number of 2. |
| const NONZERO_BUCKET_SHIFT_ADJUST: usize = 11; |
| |
| #[inline(always)] |
| const fn to_usize(self) -> usize { |
| self as usize |
| } |
| |
| #[inline(always)] |
| const fn from_raw(raw: usize) -> Self { |
| match raw { |
| // tidy-alphabetical-start |
| 00 => Self::Bucket00, |
| 01 => Self::Bucket01, |
| 02 => Self::Bucket02, |
| 03 => Self::Bucket03, |
| 04 => Self::Bucket04, |
| 05 => Self::Bucket05, |
| 06 => Self::Bucket06, |
| 07 => Self::Bucket07, |
| 08 => Self::Bucket08, |
| 09 => Self::Bucket09, |
| 10 => Self::Bucket10, |
| 11 => Self::Bucket11, |
| 12 => Self::Bucket12, |
| 13 => Self::Bucket13, |
| 14 => Self::Bucket14, |
| 15 => Self::Bucket15, |
| 16 => Self::Bucket16, |
| 17 => Self::Bucket17, |
| 18 => Self::Bucket18, |
| 19 => Self::Bucket19, |
| 20 => Self::Bucket20, |
| // tidy-alphabetical-end |
| _ => panic!("bucket index out of range"), |
| } |
| } |
| |
| /// Total number of slots in this bucket. |
| #[inline(always)] |
| const fn capacity(self) -> usize { |
| match self { |
| Self::Bucket00 => Self::BUCKET_0_CAPACITY, |
| // Bucket 1 has a capacity of `1 << (1 + 11) == pow(2, 12) == 4096`. |
| // Bucket 2 has a capacity of `1 << (2 + 11) == pow(2, 13) == 8192`. |
| _ => 1 << (self.to_usize() + Self::NONZERO_BUCKET_SHIFT_ADJUST), |
| } |
| } |
| |
| /// Converts a flat index in the range `0..=u32::MAX` into a bucket index, |
| /// and a slot offset within that bucket. |
| /// |
| /// Panics if `flat > u32::MAX`. |
| #[inline(always)] |
| const fn from_flat_index(flat: usize) -> (Self, usize) { |
| if flat > u32::MAX as usize { |
| panic!(); |
| } |
| |
| // If the index is in bucket 0, the conversion is trivial. |
| // This also avoids calling `ilog2` when `flat == 0`. |
| if flat < Self::BUCKET_0_CAPACITY { |
| return (Self::Bucket00, flat); |
| } |
| |
| // General-case conversion for a non-zero bucket index. |
| // |
| // | bucket | slot |
| // flat | ilog2 | index | offset |
| // ------------------------------ |
| // 4096 | 12 | 1 | 0 |
| // 4097 | 12 | 1 | 1 |
| // ... |
| // 8191 | 12 | 1 | 4095 |
| // 8192 | 13 | 2 | 0 |
| let highest_bit_pos = flat.ilog2() as usize; |
| let bucket_index = |
| BucketIndex::from_raw(highest_bit_pos - Self::NONZERO_BUCKET_SHIFT_ADJUST); |
| |
| // Clear the highest-set bit (which selects the bucket) to get the |
| // slot offset within this bucket. |
| let slot_offset = flat - (1 << highest_bit_pos); |
| |
| (bucket_index, slot_offset) |
| } |
| |
| #[inline(always)] |
| fn iter_all() -> impl ExactSizeIterator<Item = Self> { |
| (0usize..BUCKETS).map(BucketIndex::from_raw) |
| } |
| |
| #[inline(always)] |
| fn enumerate_buckets<T>(buckets: &[T; BUCKETS]) -> impl ExactSizeIterator<Item = (Self, &T)> { |
| BucketIndex::iter_all().zip(buckets) |
| } |
| } |
| |
| impl<T> Index<BucketIndex> for [T; BUCKETS] { |
| type Output = T; |
| |
| #[inline(always)] |
| fn index(&self, index: BucketIndex) -> &Self::Output { |
| // The optimizer should be able to see that see that a bucket index is |
| // always in-bounds, and omit the runtime bounds check. |
| &self[index.to_usize()] |
| } |
| } |
| |
| impl<T> IndexMut<BucketIndex> for [T; BUCKETS] { |
| #[inline(always)] |
| fn index_mut(&mut self, index: BucketIndex) -> &mut Self::Output { |
| &mut self[index.to_usize()] |
| } |
| } |