blob: 5ddc83080ffdae7c2b387eed4ca91f903721fbe9 [file] [log] [blame]
/* Copyright (c) 2018-2026 The Khronos Group Inc.
* Copyright (c) 2018-2026 Valve Corporation
* Copyright (c) 2018-2026 LunarG, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vulkan/vulkan_core.h>
#include "generated/dispatch_functions.h"
#include "gpuav/resources/gpuav_state_trackers.h"
#include "gpuav/descriptor_validation/gpuav_descriptor_validation.h"
#include "gpuav/instrumentation/gpuav_instrumentation.h"
#include "gpuav/core/gpuav.h"
#include "gpuav/core/gpuav_constants.h"
#include "gpuav/shaders/gpuav_error_header.h"
#include "gpuav/resources/gpuav_vulkan_objects.h"
#include "gpuav/validation_cmd/gpuav_draw.h"
#include "profiling/profiling.h"
#include "state_tracker/last_bound_state.h"
namespace gpuav {
CommandBufferSubState::CommandBufferSubState(Validator &gpuav, vvl::CommandBuffer &cb)
: vvl::CommandBufferSubState(cb), gpu_resources_manager(gpuav, false), cmd_errors_counts_buffer_(gpuav), gpuav_(gpuav) {
Location loc(vvl::Func::vkAllocateCommandBuffers);
AllocateResources(loc);
}
CommandBufferSubState::~CommandBufferSubState() {}
void CommandBufferSubState::AllocateResources(const Location &loc) {
VkResult result = VK_SUCCESS;
// Instrumentation descriptor set layout
if (instrumentation_desc_set_layout_ == VK_NULL_HANDLE) {
assert(!gpuav_.instrumentation_bindings_.empty());
VkDescriptorSetLayoutCreateInfo instrumentation_desc_set_layout_ci = vku::InitStructHelper();
instrumentation_desc_set_layout_ci.bindingCount = static_cast<uint32_t>(gpuav_.instrumentation_bindings_.size());
instrumentation_desc_set_layout_ci.pBindings = gpuav_.instrumentation_bindings_.data();
result = DispatchCreateDescriptorSetLayout(gpuav_.device, &instrumentation_desc_set_layout_ci, nullptr,
&instrumentation_desc_set_layout_);
if (result != VK_SUCCESS) {
gpuav_.InternalError(gpuav_.device, loc, "Unable to create instrumentation descriptor set layout.");
return;
}
}
// Error output buffer
{
error_output_buffer_range_ = gpu_resources_manager.GetHostCoherentBufferRange(glsl::kErrorBufferByteSize);
if (error_output_buffer_range_.buffer == VK_NULL_HANDLE) {
return;
}
memset(error_output_buffer_range_.offset_mapped_ptr, 0, (size_t)error_output_buffer_range_.size);
if (gpuav_.gpuav_settings.shader_instrumentation.descriptor_checks) {
((uint32_t *)error_output_buffer_range_.offset_mapped_ptr)[cst::stream_output_flags_offset] =
cst::inst_buffer_oob_enabled;
}
}
// Commands errors counts buffer
{
if (cmd_errors_counts_buffer_.IsDestroyed()) {
VkBufferCreateInfo buffer_info = vku::InitStructHelper();
buffer_info.size = GetCmdErrorsCountsBufferByteSize();
buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo alloc_info = {};
alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
alloc_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
result = cmd_errors_counts_buffer_.Create(&buffer_info, &alloc_info);
if (result != VK_SUCCESS) {
return;
}
}
cmd_errors_counts_buffer_.Clear();
}
}
// Common logic after any draw/dispatch/traceRays
void CommandBufferSubState::RecordActionCommand(LastBound &last_bound, const Location &) {
PostCallSetupShaderInstrumentationResources(gpuav_, *this, last_bound);
IncrementActionCommandCount(last_bound.bind_point);
}
void CommandBufferSubState::UpdateLastBoundDescriptorSets(VkPipelineBindPoint bind_point, const Location &loc) {
descriptor::UpdateBoundDescriptors(gpuav_, *this, bind_point, loc);
}
void CommandBufferSubState::Destroy() { ResetCBState(true); }
void CommandBufferSubState::Reset(const Location &loc) {
ResetCBState(false);
// TODO: Calling AllocateResources in Reset like so is a kind of a hack,
// relying on CommandBuffer internal logic to work.
// Tried to call it in ResetCBState, hang on command buffer mutex :/
AllocateResources(loc);
}
void CommandBufferSubState::RecordPushConstants(VkPipelineLayout layout, VkShaderStageFlags stage_flags, uint32_t offset,
uint32_t size, const void *values) {
if (IsStageInPipelineBindPoint(stage_flags, VK_PIPELINE_BIND_POINT_GRAPHICS)) {
push_constant_latest_used_layout[vvl::BindPointGraphics] = layout;
} else if (IsStageInPipelineBindPoint(stage_flags, VK_PIPELINE_BIND_POINT_COMPUTE)) {
push_constant_latest_used_layout[vvl::BindPointCompute] = layout;
} else if (IsStageInPipelineBindPoint(stage_flags, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR)) {
push_constant_latest_used_layout[vvl::BindPointRayTracing] = layout;
} else {
// Need to handle new binding point
assert(false);
}
PushConstantData push_constant_data;
push_constant_data.layout = layout;
push_constant_data.stage_flags = stage_flags;
push_constant_data.offset = offset;
push_constant_data.values.resize(size);
auto byte_values = static_cast<const std::byte *>(values);
std::copy(byte_values, byte_values + size, push_constant_data.values.data());
// Always add submitted push constant values, even if the same data is already stored.
// Storing duplicated data, or data submitted by one vkCmdPushConstants call
// and overridden by a subsequent one is not a problem.
// push_constant_data_chunks is intended to be parsed from 0 to N,
// thus going through the history in order, so even though it is
// possibly suboptimal push constant data is correct.
push_constant_data_chunks.emplace_back(push_constant_data);
}
void CommandBufferSubState::ClearPushConstants() {
push_constant_data_chunks.clear();
push_constant_latest_used_layout.fill(VK_NULL_HANDLE);
}
void CommandBufferSubState::RecordEndRendering(const VkRenderingEndInfoEXT *) { valcmd::FlushValidationCmds(gpuav_, *this); }
void CommandBufferSubState::RecordEndRenderPass(const VkSubpassEndInfo *, const Location &) {
valcmd::FlushValidationCmds(gpuav_, *this);
}
// For things like vkCmdCopyImage there is no "last bound" as not shaders are attached to it
void CommandBufferSubState::AddCommandErrorLogger(const Location &loc, const LastBound *last_bound,
ErrorLoggerFunc error_logger_func) {
if (command_error_loggers_.size() == gpuav_.gpuav_settings.invalid_index_command) {
return;
}
const uint32_t label_command_i =
base.GetLabelCommands().empty() ? vvl::kNoIndex32 : uint32_t(base.GetLabelCommands().size() - 1);
command_error_loggers_.emplace_back(CommandBufferSubState::CommandErrorLogger{
loc, last_bound ? last_bound->cb_state.GetObjectList(last_bound->bind_point) : LogObjectList{VkHandle()},
std::move(error_logger_func), label_command_i});
}
void CommandBufferSubState::ResetCBState(bool should_destroy) {
// Free or return to cache GPU resources
on_instrumentation_error_logger_register_functions.clear();
on_instrumentation_desc_set_update_functions.clear();
on_instrumentation_desc_buffer_update_functions.clear();
on_instrumentation_desc_heap_update_functions.clear();
on_cb_completion_functions.clear();
on_post_cb_submission_functions.clear();
on_pre_cb_submission_functions.clear();
shared_resources_cache.Clear();
if (should_destroy) {
gpu_resources_manager.DestroyResources();
} else {
gpu_resources_manager.ReturnResources();
}
command_error_loggers_.clear();
if (should_destroy && instrumentation_desc_set_layout_ != VK_NULL_HANDLE) {
DispatchDestroyDescriptorSetLayout(gpuav_.device, instrumentation_desc_set_layout_, nullptr);
instrumentation_desc_set_layout_ = VK_NULL_HANDLE;
}
if (should_destroy) {
error_output_buffer_range_ = {};
cmd_errors_counts_buffer_.Destroy();
}
draw_index = 0;
compute_index = 0;
trace_rays_index = 0;
resource_descriptor_buffer_index_ = 0;
ClearPushConstants();
}
void CommandBufferSubState::IncrementActionCommandCount(VkPipelineBindPoint bind_point) {
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
draw_index++;
if (draw_index > gpuav_.gpuav_settings.invalid_index_command) {
draw_index = gpuav_.gpuav_settings.invalid_index_command;
}
} else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) {
compute_index++;
if (compute_index > gpuav_.gpuav_settings.invalid_index_command) {
compute_index = gpuav_.gpuav_settings.invalid_index_command;
}
} else if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
trace_rays_index++;
if (trace_rays_index > gpuav_.gpuav_settings.invalid_index_command) {
trace_rays_index = gpuav_.gpuav_settings.invalid_index_command;
}
}
}
uint32_t CommandBufferSubState::GetActionCommandIndex(VkPipelineBindPoint bind_point) const {
return (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) ? draw_index
: (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) ? compute_index
: (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) ? trace_rays_index
: 0;
}
std::string CommandBufferSubState::GetDebugLabelRegion(uint32_t label_command_i,
const std::vector<std::string> &initial_label_stack) const {
std::string debug_region_name;
if (label_command_i != vvl::kNoIndex32) {
debug_region_name = base.GetDebugRegionName(base.GetLabelCommands(), label_command_i, initial_label_stack);
} else {
// label_command_i == vvl::kNoIndex32 => when the instrumented command was recorded,
// no debug label region was yet opened in the corresponding command buffer,
// but still a region might have been started in another previously submitted
// command buffer. So just compute region name from initial_label_stack.
for (const std::string &label_name : initial_label_stack) {
if (!debug_region_name.empty()) {
debug_region_name += "::";
}
debug_region_name += label_name;
}
}
return debug_region_name;
}
struct FenceWaiter {
std::vector<VkFence> fences;
};
bool CommandBufferSubState::PreSubmit(QueueSubState &queue, const Location &loc) {
VVL_ZoneScoped;
if (!on_pre_cb_submission_functions.empty()) {
vko::CommandPool &cb_pool =
queue.shared_resources_cache.GetOrCreate<vko::CommandPool>(gpuav_, queue.base.queue_family_index, loc);
auto [per_pre_submission_cb, fence] = cb_pool.GetCommandBuffer();
if (per_pre_submission_cb == VK_NULL_HANDLE) {
return false;
}
DispatchResetCommandBuffer(per_pre_submission_cb, 0);
VkCommandBufferBeginInfo cb_bi = vku::InitStructHelper();
cb_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
DispatchBeginCommandBuffer(per_pre_submission_cb, &cb_bi);
for (auto &pre_submission_func : on_pre_cb_submission_functions) {
pre_submission_func(gpuav_, *this, per_pre_submission_cb);
}
DispatchEndCommandBuffer(per_pre_submission_cb);
VkSubmitInfo submit_info = vku::InitStructHelper();
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &per_pre_submission_cb;
const VkResult result = DispatchQueueSubmit(queue.base.VkHandle(), 1, &submit_info, fence);
if (result != VK_SUCCESS) {
gpuav_.InternalError(queue.Handle(), loc, "Failed to submit per pre submission command buffer");
}
}
return true;
}
bool CommandBufferSubState::PostSubmit(QueueSubState &queue, const Location &loc) {
VVL_ZoneScoped;
if (!on_post_cb_submission_functions.empty()) {
vko::CommandPool &cb_pool =
queue.shared_resources_cache.GetOrCreate<vko::CommandPool>(gpuav_, queue.base.queue_family_index, loc);
auto [per_post_submission_cb, fence] = cb_pool.GetCommandBuffer();
if (per_post_submission_cb == VK_NULL_HANDLE) {
return false;
}
DispatchResetCommandBuffer(per_post_submission_cb, 0);
VkCommandBufferBeginInfo cb_bi = vku::InitStructHelper();
cb_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
DispatchBeginCommandBuffer(per_post_submission_cb, &cb_bi);
for (auto &post_submission_func : on_post_cb_submission_functions) {
post_submission_func(gpuav_, *this, per_post_submission_cb);
}
DispatchEndCommandBuffer(per_post_submission_cb);
VkSubmitInfo submit_info = vku::InitStructHelper();
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &per_post_submission_cb;
const VkResult result = DispatchQueueSubmit(queue.base.VkHandle(), 1, &submit_info, fence);
if (result != VK_SUCCESS) {
gpuav_.InternalError(queue.Handle(), loc, "Failed to submit per post submission command buffer");
}
FenceWaiter &fence_waiter = queue.shared_resources_cache.GetOrCreate<FenceWaiter>();
fence_waiter.fences.emplace_back(fence);
}
return true;
}
bool CommandBufferSubState::NeedsPostProcess() { return error_output_buffer_range_.buffer != VK_NULL_HANDLE; }
// For the given command buffer, map its debug data buffers and read their contents for analysis.
void CommandBufferSubState::OnCompletion(VkQueue queue, const std::vector<std::string> &initial_label_stack, const Location &loc) {
VVL_ZoneScoped;
// CommandBuffer::Destroy can happen on an other thread,
// so when getting here after acquiring command buffer's lock,
// make sure there are still things to process
if (!NeedsPostProcess()) {
return;
}
{
auto error_output_buffer_ptr = (uint32_t *)error_output_buffer_range_.offset_mapped_ptr;
// The second word in the debug output buffer is the number of words that would have
// been written by the shader instrumentation, if there was enough room in the buffer we provided.
// The number of words actually written by the shaders is determined by the size of the buffer
// we provide via the descriptor. So, we process only the number of words that can fit in the
// buffer.
const uint32_t total_words = error_output_buffer_ptr[cst::stream_output_size_offset];
// A zero here means that the shader instrumentation didn't write anything.
if (total_words != 0) {
uint32_t *const error_records_start = &error_output_buffer_ptr[cst::stream_output_data_offset];
assert(glsl::kErrorBufferByteSize > cst::stream_output_data_offset);
uint32_t *const error_records_end =
error_output_buffer_ptr + (glsl::kErrorBufferByteSize - cst::stream_output_data_offset);
uint32_t *error_record_ptr = error_records_start;
uint32_t record_size = error_record_ptr[glsl::kHeader_ErrorRecordSizeOffset];
assert(record_size == glsl::kErrorRecordSize);
while (record_size > 0 && (error_record_ptr + record_size) <= error_records_end) {
const uint32_t error_logger_i =
error_record_ptr[glsl::kHeader_ActionIdErrorLoggerIdOffset] & glsl::kErrorLoggerId_Mask;
assert(error_logger_i < gpuav_.gpuav_settings.indices_buffer_count);
if (error_logger_i == gpuav_.gpuav_settings.invalid_index_command) {
const LogObjectList objlist(queue, VkHandle());
gpuav_.LogError(
"GPUAV-Overflow-Unknown", queue, loc,
"An error was detected, but after internal limit of %" PRIu32
" draw/dispatch/traceRays commands in a command buffer, we are unable to track which validation error "
"occured.\nThis can be adjusted setting env var VK_LAYER_GPUAV_MAX_INDICES_COUNT to a higher value.",
gpuav_.gpuav_settings.invalid_index_command);
} else {
// normal case
const CommandErrorLogger &error_logger = GetErrorLogger(error_logger_i);
const LogObjectList objlist(queue, error_logger.objlist);
std::string debug_region_name = GetDebugLabelRegion(error_logger.label_cmd_i, initial_label_stack);
Location loc_with_debug_region(error_logger.loc.Get(), debug_region_name);
error_logger.error_logger_func(error_record_ptr, loc_with_debug_region, objlist);
}
// Next record
error_record_ptr += record_size;
record_size = error_record_ptr[glsl::kHeader_ErrorRecordSizeOffset];
}
VVL_TracyPlot("GPU-AV errors count", int64_t(total_words / glsl::kErrorRecordSize));
// Clear the written size and any error messages. Note that this preserves the first word, which contains flags.
assert(glsl::kErrorBufferByteSize > cst::stream_output_data_offset);
memset(&error_output_buffer_ptr[cst::stream_output_flags_offset + 1], 0,
size_t(error_output_buffer_range_.size) - sizeof(uint32_t));
}
error_output_buffer_ptr[cst::stream_output_size_offset] = 0;
}
cmd_errors_counts_buffer_.Clear();
if (gpuav_.aborted_) {
return;
}
bool success = true;
LabelLogging label_logging = {initial_label_stack};
for (auto &on_cb_completion_func : on_cb_completion_functions) {
success = on_cb_completion_func(gpuav_, *this, label_logging, loc);
if (!success) {
break;
}
}
}
QueueSubState::QueueSubState(Validator &gpuav, vvl::Queue &q) : vvl::QueueSubState(q), gpuav_(gpuav), timeline_khr_(false) {}
QueueSubState::~QueueSubState() {
shared_resources_cache.Clear();
if (barrier_command_buffer_) {
DispatchFreeCommandBuffers(gpuav_.device, barrier_command_pool_, 1, &barrier_command_buffer_);
barrier_command_buffer_ = VK_NULL_HANDLE;
}
if (barrier_command_pool_) {
DispatchDestroyCommandPool(gpuav_.device, barrier_command_pool_, nullptr);
barrier_command_pool_ = VK_NULL_HANDLE;
}
if (barrier_sem_) {
DispatchDestroySemaphore(gpuav_.device, barrier_sem_, nullptr);
barrier_sem_ = VK_NULL_HANDLE;
}
}
// #ARNO_TODO do we still need that?
// Submit a memory barrier on graphics queues.
// Lazy-create and record the needed command buffer.
void QueueSubState::SubmitBarrier(const Location &loc, uint64_t seq) {
if (barrier_command_pool_ == VK_NULL_HANDLE) {
VkResult result = VK_SUCCESS;
VkCommandPoolCreateInfo pool_create_info = vku::InitStructHelper();
pool_create_info.queueFamilyIndex = base.queue_family_index;
result = DispatchCreateCommandPool(gpuav_.device, &pool_create_info, nullptr, &barrier_command_pool_);
if (result != VK_SUCCESS) {
gpuav_.InternalError(VkHandle(), loc, "Unable to create command pool for barrier CB.");
barrier_command_pool_ = VK_NULL_HANDLE;
return;
}
VkCommandBufferAllocateInfo buffer_alloc_info = vku::InitStructHelper();
buffer_alloc_info.commandPool = barrier_command_pool_;
buffer_alloc_info.commandBufferCount = 1;
buffer_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
result = DispatchAllocateCommandBuffers(gpuav_.device, &buffer_alloc_info, &barrier_command_buffer_);
if (result != VK_SUCCESS) {
gpuav_.InternalError(VkHandle(), loc, "Unable to create barrier command buffer.");
DispatchDestroyCommandPool(gpuav_.device, barrier_command_pool_, nullptr);
barrier_command_pool_ = VK_NULL_HANDLE;
barrier_command_buffer_ = VK_NULL_HANDLE;
return;
}
VkSemaphoreTypeCreateInfo semaphore_type_create_info = vku::InitStructHelper();
semaphore_type_create_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
semaphore_type_create_info.initialValue = 0;
VkSemaphoreCreateInfo semaphore_create_info = vku::InitStructHelper(&semaphore_type_create_info);
result = DispatchCreateSemaphore(gpuav_.device, &semaphore_create_info, nullptr, &barrier_sem_);
if (result != VK_SUCCESS) {
gpuav_.InternalError(gpuav_.device, loc, "Unable to create barrier semaphore.");
DispatchDestroyCommandPool(gpuav_.device, barrier_command_pool_, nullptr);
barrier_command_pool_ = VK_NULL_HANDLE;
barrier_command_buffer_ = VK_NULL_HANDLE;
return;
}
// Hook up command buffer dispatch
gpuav_.vk_set_device_loader_data_(gpuav_.device, barrier_command_buffer_);
// Record a global memory barrier to force availability of device memory operations to the host domain.
VkCommandBufferBeginInfo barrier_cmd_buffer_begin_info = vku::InitStructHelper();
barrier_cmd_buffer_begin_info.flags |= VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
result = DispatchBeginCommandBuffer(barrier_command_buffer_, &barrier_cmd_buffer_begin_info);
if (result == VK_SUCCESS) {
VkMemoryBarrier memory_barrier = vku::InitStructHelper();
memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
memory_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
DispatchCmdPipelineBarrier(barrier_command_buffer_, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0,
1, &memory_barrier, 0, nullptr, 0, nullptr);
DispatchEndCommandBuffer(barrier_command_buffer_);
}
}
if (barrier_command_buffer_ != VK_NULL_HANDLE) {
VkTimelineSemaphoreSubmitInfo timeline_semaphore_submit_info = vku::InitStructHelper();
timeline_semaphore_submit_info.signalSemaphoreValueCount = 1;
timeline_semaphore_submit_info.pSignalSemaphoreValues = &seq;
VkSubmitInfo submit_info = vku::InitStructHelper(&timeline_semaphore_submit_info);
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &barrier_command_buffer_;
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &barrier_sem_;
DispatchQueueSubmit(VkHandle(), 1, &submit_info, VK_NULL_HANDLE);
}
}
void QueueSubState::PreSubmit(std::vector<vvl::QueueSubmission> &submissions) {
bool success = true;
for (const auto &submission : submissions) {
auto loc = submission.loc.Get();
for (auto &cb_submission : submission.cb_submissions) {
auto guard = cb_submission.cb->ReadLock();
auto &gpu_cb = SubState(*cb_submission.cb);
success = gpu_cb.PreSubmit(*this, loc);
if (!success) {
return;
}
for (auto *secondary_cb : gpu_cb.base.linked_command_buffers) {
auto secondary_guard = secondary_cb->ReadLock();
auto &secondary_gpu_cb = SubState(*secondary_cb);
success = secondary_gpu_cb.PreSubmit(*this, loc);
if (!success) {
return;
}
}
}
}
}
void QueueSubState::PostSubmit(std::deque<vvl::QueueSubmission> &submissions) {
bool success = true;
for (const auto &submission : submissions) {
auto loc = submission.loc.Get();
for (auto &cb_submission : submission.cb_submissions) {
auto guard = cb_submission.cb->ReadLock();
auto &gpu_cb = SubState(*cb_submission.cb);
success = gpu_cb.PostSubmit(*this, loc);
if (!success) {
return;
}
for (auto *secondary_cb : gpu_cb.base.linked_command_buffers) {
auto secondary_guard = secondary_cb->ReadLock();
auto &secondary_gpu_cb = SubState(*secondary_cb);
success = secondary_gpu_cb.PostSubmit(*this, loc);
if (!success) {
return;
}
}
}
}
if (!submissions.empty() && submissions.back().is_last_submission) {
auto loc = submissions.back().loc.Get();
SubmitBarrier(loc, submissions.back().seq);
}
}
void QueueSubState::Retire(vvl::QueueSubmission &submission) {
VVL_ZoneScoped;
if (submission.loc.Get().function == vvl::Func::vkQueuePresentKHR) {
// Present batch does not have any GPU-AV work to post process, skip it.
// This is also needed for correctness. QueuePresent does not have a PostSubmit call
// that signals barrier_sem_. The following timeline wait must not be called.
return;
}
retiring_.emplace_back(submission.cb_submissions);
if (submission.is_last_submission) {
VkSemaphoreWaitInfo wait_info = vku::InitStructHelper();
wait_info.semaphoreCount = 1;
wait_info.pSemaphores = &barrier_sem_;
wait_info.pValues = &submission.seq;
if (timeline_khr_) {
DispatchWaitSemaphoresKHR(gpuav_.device, &wait_info, 1'000'000'000);
} else {
DispatchWaitSemaphores(gpuav_.device, &wait_info, 1'000'000'000);
}
FenceWaiter *fence_waiter = shared_resources_cache.TryGet<FenceWaiter>();
if (fence_waiter && !fence_waiter->fences.empty()) {
DispatchWaitForFences(gpuav_.device, uint32_t(fence_waiter->fences.size()), fence_waiter->fences.data(), VK_TRUE,
UINT64_MAX);
fence_waiter->fences.clear();
}
for (std::vector<vvl::CommandBufferSubmission> &cb_submissions : retiring_) {
for (vvl::CommandBufferSubmission &cb_submission : cb_submissions) {
auto guard = cb_submission.cb->WriteLock();
auto &gpu_cb = SubState(*cb_submission.cb);
auto loc = submission.loc.Get();
gpu_cb.OnCompletion(VkHandle(), cb_submission.initial_label_stack, loc);
for (vvl::CommandBuffer *secondary_cb : gpu_cb.base.linked_command_buffers) {
auto secondary_guard = secondary_cb->WriteLock();
auto &secondary_gpu_cb = SubState(*secondary_cb);
secondary_gpu_cb.OnCompletion(VkHandle(), cb_submission.initial_label_stack, loc);
}
}
}
retiring_.clear();
}
}
ImageSubState::ImageSubState(vvl::Image &obj, DescriptorHeap &heap)
: vvl::ImageSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void ImageSubState::Destroy() { id_tracker.reset(); }
void ImageSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }
ImageViewSubState::ImageViewSubState(vvl::ImageView &obj, DescriptorHeap &heap)
: vvl::ImageViewSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void ImageViewSubState::Destroy() { id_tracker.reset(); }
void ImageViewSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }
BufferSubState::BufferSubState(vvl::Buffer &obj, DescriptorHeap &heap)
: vvl::BufferSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void BufferSubState::Destroy() { id_tracker.reset(); }
void BufferSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }
BufferViewSubState::BufferViewSubState(vvl::BufferView &obj, DescriptorHeap &heap)
: vvl::BufferViewSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void BufferViewSubState::Destroy() { id_tracker.reset(); }
void BufferViewSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }
SamplerSubState::SamplerSubState(vvl::Sampler &obj, DescriptorHeap &heap)
: vvl::SamplerSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void SamplerSubState::Destroy() { id_tracker.reset(); }
void SamplerSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }
AccelerationStructureNVSubState::AccelerationStructureNVSubState(vvl::AccelerationStructureNV &obj, DescriptorHeap &heap)
: vvl::AccelerationStructureNVSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void AccelerationStructureNVSubState::Destroy() { id_tracker.reset(); }
void AccelerationStructureNVSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) {
id_tracker.reset();
}
AccelerationStructureKHRSubState::AccelerationStructureKHRSubState(vvl::AccelerationStructureKHR &obj, DescriptorHeap &heap)
: vvl::AccelerationStructureKHRSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void AccelerationStructureKHRSubState::Destroy() { id_tracker.reset(); }
void AccelerationStructureKHRSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) {
id_tracker.reset();
}
TensorSubState::TensorSubState(vvl::Tensor &obj, DescriptorHeap &heap)
: vvl::TensorSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void TensorSubState::Destroy() { id_tracker.reset(); }
void TensorSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }
TensorViewSubState::TensorViewSubState(vvl::TensorView &obj, DescriptorHeap &heap)
: vvl::TensorViewSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}
void TensorViewSubState::Destroy() { id_tracker.reset(); }
void TensorViewSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }
ShaderObjectSubState::ShaderObjectSubState(vvl::ShaderObject &obj) : vvl::ShaderObjectSubState(obj) {}
PipelineSubState::PipelineSubState(Validator &gpuav, vvl::Pipeline &pipeline) : vvl::PipelineSubState(pipeline), gpuav_(gpuav) {}
VkPipelineLayout PipelineSubState::GetPipelineLayoutUnion(const Location &loc, vvl::DescriptorMode mode) const {
std::unique_lock<std::mutex> recreated_layout_lock(recreated_layout_mutex);
if (recreated_layout != VK_NULL_HANDLE) {
return recreated_layout;
}
const std::shared_ptr<const vvl::PipelineLayout> pipeline_layout_state = base.PipelineLayoutState();
assert(pipeline_layout_state->set_layouts.list.size() <= gpuav_.instrumentation_desc_set_bind_index_);
if (pipeline_layout_state->set_layouts.list.size() > gpuav_.instrumentation_desc_set_bind_index_) {
gpuav_.InternalError(LogObjectList(base.VkHandle()), loc,
"Trying to recreate a pipeline layout with no room for the instrumentation descriptor set.");
return VK_NULL_HANDLE;
}
std::vector<VkDescriptorSetLayout> set_layout_handles;
set_layout_handles.reserve(gpuav_.instrumentation_desc_set_bind_index_ + 1);
std::vector<size_t> recreated_desc_set_layouts_indices;
for (size_t set_layout_i = 0; set_layout_i < pipeline_layout_state->set_layouts.list.size(); ++set_layout_i) {
const auto &set_layout = pipeline_layout_state->set_layouts.list[set_layout_i];
if (!set_layout) {
set_layout_handles.emplace_back(VK_NULL_HANDLE);
} else {
VkDescriptorSetLayout recreated_desc_set_layout = VK_NULL_HANDLE;
const VkResult result = DispatchCreateDescriptorSetLayout(gpuav_.device, set_layout->GetCreateInfo().ptr(), nullptr,
&recreated_desc_set_layout);
(void)result;
assert(result == VK_SUCCESS);
set_layout_handles.emplace_back(recreated_desc_set_layout);
recreated_desc_set_layouts_indices.emplace_back(set_layout_i);
}
}
for (size_t i = set_layout_handles.size(); i < gpuav_.instrumentation_desc_set_bind_index_; ++i) {
set_layout_handles.emplace_back(gpuav_.dummy_desc_layout_[mode]);
}
set_layout_handles.emplace_back(gpuav_.GetInstrumentationDescriptorSetLayout(mode));
VkPipelineLayoutCreateInfo pipeline_layout_ci = vku::InitStructHelper();
pipeline_layout_ci.flags = pipeline_layout_state->create_flags;
pipeline_layout_ci.setLayoutCount = uint32_t(set_layout_handles.size());
pipeline_layout_ci.pSetLayouts = set_layout_handles.data();
if (pipeline_layout_state->push_constant_ranges_layout) {
pipeline_layout_ci.pushConstantRangeCount = uint32_t(pipeline_layout_state->push_constant_ranges_layout->size());
pipeline_layout_ci.pPushConstantRanges = pipeline_layout_state->push_constant_ranges_layout->data();
}
const VkResult result = DispatchCreatePipelineLayout(gpuav_.device, &pipeline_layout_ci, nullptr, &recreated_layout);
(void)result;
assert(result == VK_SUCCESS);
for (size_t i : recreated_desc_set_layouts_indices) {
DispatchDestroyDescriptorSetLayout(gpuav_.device, set_layout_handles[i], nullptr);
}
return recreated_layout;
}
void PipelineSubState::Destroy() {
std::unique_lock<std::mutex> recreated_layout_lock(recreated_layout_mutex);
if (recreated_layout != VK_NULL_HANDLE) {
DispatchDestroyPipelineLayout(gpuav_.device, recreated_layout, nullptr);
recreated_layout = VK_NULL_HANDLE;
}
}
} // namespace gpuav