blob: 5250524eb36d7e4d52de3c6f7a0ed48e2b7a79e2 [file] [log] [blame]
/*
* Copyright (c) 2019-2026 Valve Corporation
* Copyright (c) 2019-2026 LunarG, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "state_tracker/subresource_adapter.h"
#include "containers/range.h"
#include "generated/sync_validation_types.h"
#include "containers/limits.h"
#include <set>
namespace vvl {
class Buffer;
} // namespace vvl
namespace syncval {
using ImageRangeGen = subresource_adapter::ImageRangeGenerator;
// The resource tag index is relative to the command buffer or queue in which it's found
using QueueId = uint32_t;
constexpr static QueueId kQueueIdInvalid = QueueId(vvl::kNoIndex32);
using ResourceUsageTag = size_t;
// TODO: in the current implementation invalid tag is used not only as initial value
// but also in some other scenarios (e.g. error reporting classifies layout transition
// based on tag validity). Clarify when tag can be invalid and document this.
constexpr static ResourceUsageTag kInvalidTag = std::numeric_limits<ResourceUsageTag>::max();
using ResourceUsageRange = vvl::range<ResourceUsageTag>;
using ResourceAddress = VkDeviceSize;
using AccessRange = vvl::range<ResourceAddress>;
// Usage tag extended with resource handle information
struct ResourceUsageTagEx {
ResourceUsageTag tag = kInvalidTag;
uint32_t handle_index = vvl::kNoIndex32;
};
AccessRange MakeRange(VkDeviceSize start, VkDeviceSize size);
AccessRange MakeRange(const vvl::Buffer &buffer, VkDeviceSize offset, VkDeviceSize size);
inline const SyncAccessInfo &GetAccessInfo(SyncAccessIndex access) { return GetSyncAccessInfos()[access]; }
extern const AccessRange kFullRange;
constexpr VkImageAspectFlags kDepthStencilAspects = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
// Notes:
// * Design goal is performance optimized set creation during specific SyncVal operations
// * Key must be integral.
// * We aren't interested as of this implementation in caching lookups, only inserts
// * using a raw C-style array instead of std::array intentionally for size/performance reasons
//
// The following were shown to not improve hit rate for current usage (tag set gathering). For general use YMMV.
// * More complicated index construction (at >> LogSize ^ at)
// * Multi-way LRU eviction caching (equivalent hit rate to 1-way direct replacement of same total cache slots) but with
// higher complexity.
template <typename IntegralKey, size_t LogSize = 4U, IntegralKey kInvalidKey = IntegralKey(0)>
class CachedInsertSet : public std::set<IntegralKey> {
public:
using Base = std::set<IntegralKey>;
using key_type = typename Base::key_type;
using Index = unsigned;
static constexpr Index kSize = 1 << LogSize;
static constexpr key_type kMask = static_cast<key_type>(kSize) - 1;
void CachedInsert(const key_type key) {
// 1-way direct replacement
const Index index = static_cast<Index>(key & kMask); // Simplest
if (entries_[index] != key) {
entries_[index] = key;
Base::insert(key);
}
}
CachedInsertSet() { std::fill(entries_, entries_ + kSize, kInvalidKey); }
private:
key_type entries_[kSize];
};
// The ThreadSafeLookupTable supports fast object lookup in multithreaded environment.
// The insertions are slow. The idea is that you have relatively small amount of
// objects and you need to put them into a container in multithreaded environment.
// In return you get an index of the inserted object. After that initial insertion all
// further operations are the queries and they are fast (single atomic load in addition to
// regular vector/hashmap lookup). You can query an object given its index or you can get
// an index of already registered object.
template <typename ObjectType>
class ThreadSafeLookupTable {
public:
ThreadSafeLookupTable() { std::atomic_store(&snapshot_, std::make_shared<const Snapshot>()); }
// Returns the object with the given index.
// The object index is from the previous call to GetOrInsert.
// This operation uses single atomic load.
ObjectType GetObject(uint32_t object_index) const {
auto snapshot = std::atomic_load(&snapshot_);
return snapshot->objects[object_index];
}
// Returns the index of the given object. If the object is seen for the first time, it is registered.
// For already registerd objects the function performs single atomic load and hash map access (fast path).
// In order to register new object the follow expensive operations are performed (slow path):
// mutex lock, repeat the search, allocate new snapshot object, copy all data from the old snapshot.
uint32_t GetIndexAndMaybeInsert(const ObjectType &object) {
//
// Fast path: object was already registered
//
auto snapshot = std::atomic_load(&snapshot_);
if (auto it = snapshot->object_to_index.find(object); it != snapshot->object_to_index.end()) {
return it->second;
}
//
// Slow path: register new object
//
std::unique_lock<std::mutex> lock(snapshot_mutex_);
// Search again since another thread could have registered the object just before we locked the mutex
snapshot = std::atomic_load(&snapshot_);
if (auto it = snapshot->object_to_index.find(object); it != snapshot->object_to_index.end()) {
return it->second;
}
// Create a new snapshot. Copy constructor copies data from the old snapshot.
// The old snapshot is not allowed to be modified (so no move).
auto new_snapshot = std::make_shared<Snapshot>(*snapshot);
// Add new object
new_snapshot->objects.emplace_back(object);
const uint32_t index = uint32_t(new_snapshot->objects.size()) - 1;
new_snapshot->object_to_index.insert(std::make_pair(object, index));
// Update snapshot holder
std::atomic_store(&snapshot_, std::shared_ptr<const Snapshot>(std::move(new_snapshot)));
return index;
}
uint32_t ObjectCount() const {
auto snapshot = std::atomic_load(&snapshot_);
return (uint32_t)snapshot->objects.size();
}
private:
struct Snapshot {
std::vector<ObjectType> objects;
vvl::unordered_map<ObjectType, uint32_t> object_to_index;
};
// Once snapshot is created it must never be modified (other threads can access it at any time).
// New objects are added by replacing entire snapshot with an updated version.
// TODO: C++ 20: use std::atomic<std::shared_ptr<T>. Until then we use std::atomic_load/atomic_store.
std::shared_ptr<const Snapshot> snapshot_;
// Locks snapshot during rare insert events
std::mutex snapshot_mutex_;
};
} // namespace syncval