layers/gpuav/resources/gpuav_state_trackers.cpp - external/github.com/KhronosGroup/Vulkan-ValidationLayers - Git at Google

 /* Copyright (c) 2018-2026 The Khronos Group Inc.
  * Copyright (c) 2018-2026 Valve Corporation
  * Copyright (c) 2018-2026 LunarG, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <vulkan/vulkan_core.h>
 #include "generated/dispatch_functions.h"
 #include "gpuav/resources/gpuav_state_trackers.h"
 #include "gpuav/descriptor_validation/gpuav_descriptor_validation.h"
 #include "gpuav/instrumentation/gpuav_instrumentation.h"
 #include "gpuav/core/gpuav.h"
 #include "gpuav/core/gpuav_constants.h"
 #include "gpuav/shaders/gpuav_error_header.h"
 #include "gpuav/resources/gpuav_vulkan_objects.h"
 #include "gpuav/validation_cmd/gpuav_draw.h"

 #include "profiling/profiling.h"
 #include "state_tracker/last_bound_state.h"

 namespace gpuav {

 CommandBufferSubState::CommandBufferSubState(Validator &gpuav, vvl::CommandBuffer &cb)
     : vvl::CommandBufferSubState(cb), gpu_resources_manager(gpuav, false), cmd_errors_counts_buffer_(gpuav), gpuav_(gpuav) {
     Location loc(vvl::Func::vkAllocateCommandBuffers);
     AllocateResources(loc);
 }

 CommandBufferSubState::~CommandBufferSubState() {}

 void CommandBufferSubState::AllocateResources(const Location &loc) {
     VkResult result = VK_SUCCESS;

     // Instrumentation descriptor set layout
     if (instrumentation_desc_set_layout_ == VK_NULL_HANDLE) {
         assert(!gpuav_.instrumentation_bindings_.empty());
         VkDescriptorSetLayoutCreateInfo instrumentation_desc_set_layout_ci = vku::InitStructHelper();
         instrumentation_desc_set_layout_ci.bindingCount = static_cast<uint32_t>(gpuav_.instrumentation_bindings_.size());
         instrumentation_desc_set_layout_ci.pBindings = gpuav_.instrumentation_bindings_.data();
         result = DispatchCreateDescriptorSetLayout(gpuav_.device, &instrumentation_desc_set_layout_ci, nullptr,
                                                    &instrumentation_desc_set_layout_);
         if (result != VK_SUCCESS) {
             gpuav_.InternalError(gpuav_.device, loc, "Unable to create instrumentation descriptor set layout.");
             return;
         }
     }

     // Error output buffer
     {
         error_output_buffer_range_ = gpu_resources_manager.GetHostCoherentBufferRange(glsl::kErrorBufferByteSize);
         if (error_output_buffer_range_.buffer == VK_NULL_HANDLE) {
             return;
         }

         memset(error_output_buffer_range_.offset_mapped_ptr, 0, (size_t)error_output_buffer_range_.size);
         if (gpuav_.gpuav_settings.shader_instrumentation.descriptor_checks) {
             ((uint32_t *)error_output_buffer_range_.offset_mapped_ptr)[cst::stream_output_flags_offset] =
                 cst::inst_buffer_oob_enabled;
         }
     }

     // Commands errors counts buffer
     {
         if (cmd_errors_counts_buffer_.IsDestroyed()) {
             VkBufferCreateInfo buffer_info = vku::InitStructHelper();
             buffer_info.size = GetCmdErrorsCountsBufferByteSize();
             buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
             VmaAllocationCreateInfo alloc_info = {};
             alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
             alloc_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
             result = cmd_errors_counts_buffer_.Create(&buffer_info, &alloc_info);
             if (result != VK_SUCCESS) {
                 return;
             }
         }

         cmd_errors_counts_buffer_.Clear();
     }
 }

 // Common logic after any draw/dispatch/traceRays
 void CommandBufferSubState::RecordActionCommand(LastBound &last_bound, const Location &) {
     PostCallSetupShaderInstrumentationResources(gpuav_, *this, last_bound);
     IncrementActionCommandCount(last_bound.bind_point);
 }

 void CommandBufferSubState::UpdateLastBoundDescriptorSets(VkPipelineBindPoint bind_point, const Location &loc) {
     descriptor::UpdateBoundDescriptors(gpuav_, *this, bind_point, loc);
 }

 void CommandBufferSubState::Destroy() { ResetCBState(true); }

 void CommandBufferSubState::Reset(const Location &loc) {
     ResetCBState(false);
     // TODO: Calling AllocateResources in Reset like so is a kind of a hack,
     // relying on CommandBuffer internal logic to work.
     // Tried to call it in ResetCBState, hang on command buffer mutex :/
     AllocateResources(loc);
 }

 void CommandBufferSubState::RecordPushConstants(VkPipelineLayout layout, VkShaderStageFlags stage_flags, uint32_t offset,
                                                 uint32_t size, const void *values) {
     if (IsStageInPipelineBindPoint(stage_flags, VK_PIPELINE_BIND_POINT_GRAPHICS)) {
         push_constant_latest_used_layout[vvl::BindPointGraphics] = layout;
     } else if (IsStageInPipelineBindPoint(stage_flags, VK_PIPELINE_BIND_POINT_COMPUTE)) {
         push_constant_latest_used_layout[vvl::BindPointCompute] = layout;
     } else if (IsStageInPipelineBindPoint(stage_flags, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR)) {
         push_constant_latest_used_layout[vvl::BindPointRayTracing] = layout;
     } else {
         // Need to handle new binding point
         assert(false);
     }

     PushConstantData push_constant_data;
     push_constant_data.layout = layout;
     push_constant_data.stage_flags = stage_flags;
     push_constant_data.offset = offset;
     push_constant_data.values.resize(size);
     auto byte_values = static_cast<const std::byte *>(values);
     std::copy(byte_values, byte_values + size, push_constant_data.values.data());
     // Always add submitted push constant values, even if the same data is already stored.
     // Storing duplicated data, or data submitted by one vkCmdPushConstants call
     // and overridden by a subsequent one is not a problem.
     // push_constant_data_chunks is intended to be parsed from 0 to N,
     // thus going through the history in order, so even though it is
     // possibly suboptimal push constant data is correct.
     push_constant_data_chunks.emplace_back(push_constant_data);
 }

 void CommandBufferSubState::ClearPushConstants() {
     push_constant_data_chunks.clear();
     push_constant_latest_used_layout.fill(VK_NULL_HANDLE);
 }

 void CommandBufferSubState::RecordEndRendering(const VkRenderingEndInfoEXT *) { valcmd::FlushValidationCmds(gpuav_, *this); }

 void CommandBufferSubState::RecordEndRenderPass(const VkSubpassEndInfo *, const Location &) {
     valcmd::FlushValidationCmds(gpuav_, *this);
 }

 // For things like vkCmdCopyImage there is no "last bound" as not shaders are attached to it
 void CommandBufferSubState::AddCommandErrorLogger(const Location &loc, const LastBound *last_bound,
                                                   ErrorLoggerFunc error_logger_func) {
     if (command_error_loggers_.size() == gpuav_.gpuav_settings.invalid_index_command) {
         return;
     }

     const uint32_t label_command_i =
         base.GetLabelCommands().empty() ? vvl::kNoIndex32 : uint32_t(base.GetLabelCommands().size() - 1);
     command_error_loggers_.emplace_back(CommandBufferSubState::CommandErrorLogger{
         loc, last_bound ? last_bound->cb_state.GetObjectList(last_bound->bind_point) : LogObjectList{VkHandle()},
         std::move(error_logger_func), label_command_i});
 }

 void CommandBufferSubState::ResetCBState(bool should_destroy) {
     // Free or return to cache GPU resources

     on_instrumentation_error_logger_register_functions.clear();
     on_instrumentation_desc_set_update_functions.clear();
     on_instrumentation_desc_buffer_update_functions.clear();
     on_instrumentation_desc_heap_update_functions.clear();
     on_cb_completion_functions.clear();
     on_post_cb_submission_functions.clear();
     on_pre_cb_submission_functions.clear();
     shared_resources_cache.Clear();

     if (should_destroy) {
         gpu_resources_manager.DestroyResources();
     } else {
         gpu_resources_manager.ReturnResources();
     }
     command_error_loggers_.clear();

     if (should_destroy && instrumentation_desc_set_layout_ != VK_NULL_HANDLE) {
         DispatchDestroyDescriptorSetLayout(gpuav_.device, instrumentation_desc_set_layout_, nullptr);
         instrumentation_desc_set_layout_ = VK_NULL_HANDLE;
     }

     if (should_destroy) {
         error_output_buffer_range_ = {};
         cmd_errors_counts_buffer_.Destroy();
     }

     draw_index = 0;
     compute_index = 0;
     trace_rays_index = 0;

     resource_descriptor_buffer_index_ = 0;

     ClearPushConstants();
 }

 void CommandBufferSubState::IncrementActionCommandCount(VkPipelineBindPoint bind_point) {
     if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
         draw_index++;
         if (draw_index > gpuav_.gpuav_settings.invalid_index_command) {
             draw_index = gpuav_.gpuav_settings.invalid_index_command;
         }
     } else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) {
         compute_index++;
         if (compute_index > gpuav_.gpuav_settings.invalid_index_command) {
             compute_index = gpuav_.gpuav_settings.invalid_index_command;
         }
     } else if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
         trace_rays_index++;
         if (trace_rays_index > gpuav_.gpuav_settings.invalid_index_command) {
             trace_rays_index = gpuav_.gpuav_settings.invalid_index_command;
         }
     }
 }

 uint32_t CommandBufferSubState::GetActionCommandIndex(VkPipelineBindPoint bind_point) const {
     return (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS)          ? draw_index
            : (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)         ? compute_index
            : (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) ? trace_rays_index
                                                                     : 0;
 }

 std::string CommandBufferSubState::GetDebugLabelRegion(uint32_t label_command_i,
                                                        const std::vector<std::string> &initial_label_stack) const {
     std::string debug_region_name;
     if (label_command_i != vvl::kNoIndex32) {
         debug_region_name = base.GetDebugRegionName(base.GetLabelCommands(), label_command_i, initial_label_stack);
     } else {
         // label_command_i == vvl::kNoIndex32 => when the instrumented command was recorded,
         // no debug label region was yet opened in the corresponding command buffer,
         // but still a region might have been started in another previously submitted
         // command buffer. So just compute region name from initial_label_stack.
         for (const std::string &label_name : initial_label_stack) {
             if (!debug_region_name.empty()) {
                 debug_region_name += "::";
             }
             debug_region_name += label_name;
         }
     }
     return debug_region_name;
 }

 struct FenceWaiter {
     std::vector<VkFence> fences;
 };

 bool CommandBufferSubState::PreSubmit(QueueSubState &queue, const Location &loc) {
     VVL_ZoneScoped;
     if (!on_pre_cb_submission_functions.empty()) {
         vko::CommandPool &cb_pool =
             queue.shared_resources_cache.GetOrCreate<vko::CommandPool>(gpuav_, queue.base.queue_family_index, loc);
         auto [per_pre_submission_cb, fence] = cb_pool.GetCommandBuffer();
         if (per_pre_submission_cb == VK_NULL_HANDLE) {
             return false;
         }
         DispatchResetCommandBuffer(per_pre_submission_cb, 0);
         VkCommandBufferBeginInfo cb_bi = vku::InitStructHelper();
         cb_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
         DispatchBeginCommandBuffer(per_pre_submission_cb, &cb_bi);
         for (auto &pre_submission_func : on_pre_cb_submission_functions) {
             pre_submission_func(gpuav_, *this, per_pre_submission_cb);
         }
         DispatchEndCommandBuffer(per_pre_submission_cb);

         VkSubmitInfo submit_info = vku::InitStructHelper();
         submit_info.commandBufferCount = 1;
         submit_info.pCommandBuffers = &per_pre_submission_cb;
         const VkResult result = DispatchQueueSubmit(queue.base.VkHandle(), 1, &submit_info, fence);
         if (result != VK_SUCCESS) {
             gpuav_.InternalError(queue.Handle(), loc, "Failed to submit per pre submission command buffer");
         }
     }

     return true;
 }

 bool CommandBufferSubState::PostSubmit(QueueSubState &queue, const Location &loc) {
     VVL_ZoneScoped;
     if (!on_post_cb_submission_functions.empty()) {
         vko::CommandPool &cb_pool =
             queue.shared_resources_cache.GetOrCreate<vko::CommandPool>(gpuav_, queue.base.queue_family_index, loc);
         auto [per_post_submission_cb, fence] = cb_pool.GetCommandBuffer();
         if (per_post_submission_cb == VK_NULL_HANDLE) {
             return false;
         }
         DispatchResetCommandBuffer(per_post_submission_cb, 0);
         VkCommandBufferBeginInfo cb_bi = vku::InitStructHelper();
         cb_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
         DispatchBeginCommandBuffer(per_post_submission_cb, &cb_bi);
         for (auto &post_submission_func : on_post_cb_submission_functions) {
             post_submission_func(gpuav_, *this, per_post_submission_cb);
         }
         DispatchEndCommandBuffer(per_post_submission_cb);

         VkSubmitInfo submit_info = vku::InitStructHelper();
         submit_info.commandBufferCount = 1;
         submit_info.pCommandBuffers = &per_post_submission_cb;
         const VkResult result = DispatchQueueSubmit(queue.base.VkHandle(), 1, &submit_info, fence);
         if (result != VK_SUCCESS) {
             gpuav_.InternalError(queue.Handle(), loc, "Failed to submit per post submission command buffer");
         }

         FenceWaiter &fence_waiter = queue.shared_resources_cache.GetOrCreate<FenceWaiter>();
         fence_waiter.fences.emplace_back(fence);
     }

     return true;
 }

 bool CommandBufferSubState::NeedsPostProcess() { return error_output_buffer_range_.buffer != VK_NULL_HANDLE; }

 // For the given command buffer, map its debug data buffers and read their contents for analysis.
 void CommandBufferSubState::OnCompletion(VkQueue queue, const std::vector<std::string> &initial_label_stack, const Location &loc) {
     VVL_ZoneScoped;

     // CommandBuffer::Destroy can happen on an other thread,
     // so when getting here after acquiring command buffer's lock,
     // make sure there are still things to process
     if (!NeedsPostProcess()) {
         return;
     }

     {
         auto error_output_buffer_ptr = (uint32_t *)error_output_buffer_range_.offset_mapped_ptr;

         // The second word in the debug output buffer is the number of words that would have
         // been written by the shader instrumentation, if there was enough room in the buffer we provided.
         // The number of words actually written by the shaders is determined by the size of the buffer
         // we provide via the descriptor. So, we process only the number of words that can fit in the
         // buffer.
         const uint32_t total_words = error_output_buffer_ptr[cst::stream_output_size_offset];

         // A zero here means that the shader instrumentation didn't write anything.
         if (total_words != 0) {
             uint32_t *const error_records_start = &error_output_buffer_ptr[cst::stream_output_data_offset];
             assert(glsl::kErrorBufferByteSize > cst::stream_output_data_offset);
             uint32_t *const error_records_end =
                 error_output_buffer_ptr + (glsl::kErrorBufferByteSize - cst::stream_output_data_offset);

             uint32_t *error_record_ptr = error_records_start;
             uint32_t record_size = error_record_ptr[glsl::kHeader_ErrorRecordSizeOffset];
             assert(record_size == glsl::kErrorRecordSize);

             while (record_size > 0 && (error_record_ptr + record_size) <= error_records_end) {
                 const uint32_t error_logger_i =
                     error_record_ptr[glsl::kHeader_ActionIdErrorLoggerIdOffset] & glsl::kErrorLoggerId_Mask;

                 assert(error_logger_i < gpuav_.gpuav_settings.indices_buffer_count);
                 if (error_logger_i == gpuav_.gpuav_settings.invalid_index_command) {
                     const LogObjectList objlist(queue, VkHandle());
                     gpuav_.LogError(
                         "GPUAV-Overflow-Unknown", queue, loc,
                         "An error was detected, but after internal limit of %" PRIu32
                         " draw/dispatch/traceRays commands in a command buffer, we are unable to track which validation error "
                         "occured.\nThis can be adjusted setting env var VK_LAYER_GPUAV_MAX_INDICES_COUNT to a higher value.",
                         gpuav_.gpuav_settings.invalid_index_command);
                 } else {
                     // normal case
                     const CommandErrorLogger &error_logger = GetErrorLogger(error_logger_i);
                     const LogObjectList objlist(queue, error_logger.objlist);

                     std::string debug_region_name = GetDebugLabelRegion(error_logger.label_cmd_i, initial_label_stack);
                     Location loc_with_debug_region(error_logger.loc.Get(), debug_region_name);
                     error_logger.error_logger_func(error_record_ptr, loc_with_debug_region, objlist);
                 }

                 // Next record
                 error_record_ptr += record_size;
                 record_size = error_record_ptr[glsl::kHeader_ErrorRecordSizeOffset];
             }

             VVL_TracyPlot("GPU-AV errors count", int64_t(total_words / glsl::kErrorRecordSize));

             // Clear the written size and any error messages. Note that this preserves the first word, which contains flags.
             assert(glsl::kErrorBufferByteSize > cst::stream_output_data_offset);
             memset(&error_output_buffer_ptr[cst::stream_output_flags_offset + 1], 0,
                    size_t(error_output_buffer_range_.size) - sizeof(uint32_t));
         }
         error_output_buffer_ptr[cst::stream_output_size_offset] = 0;
     }

     cmd_errors_counts_buffer_.Clear();
     if (gpuav_.aborted_) {
         return;
     }

     bool success = true;
     LabelLogging label_logging = {initial_label_stack};
     for (auto &on_cb_completion_func : on_cb_completion_functions) {
         success = on_cb_completion_func(gpuav_, *this, label_logging, loc);
         if (!success) {
             break;
         }
     }
 }

 QueueSubState::QueueSubState(Validator &gpuav, vvl::Queue &q) : vvl::QueueSubState(q), gpuav_(gpuav), timeline_khr_(false) {}

 QueueSubState::~QueueSubState() {
     shared_resources_cache.Clear();

     if (barrier_command_buffer_) {
         DispatchFreeCommandBuffers(gpuav_.device, barrier_command_pool_, 1, &barrier_command_buffer_);
         barrier_command_buffer_ = VK_NULL_HANDLE;
     }
     if (barrier_command_pool_) {
         DispatchDestroyCommandPool(gpuav_.device, barrier_command_pool_, nullptr);
         barrier_command_pool_ = VK_NULL_HANDLE;
     }
     if (barrier_sem_) {
         DispatchDestroySemaphore(gpuav_.device, barrier_sem_, nullptr);
         barrier_sem_ = VK_NULL_HANDLE;
     }
 }

 // #ARNO_TODO do we still need that?
 // Submit a memory barrier on graphics queues.
 // Lazy-create and record the needed command buffer.
 void QueueSubState::SubmitBarrier(const Location &loc, uint64_t seq) {
     if (barrier_command_pool_ == VK_NULL_HANDLE) {
         VkResult result = VK_SUCCESS;

         VkCommandPoolCreateInfo pool_create_info = vku::InitStructHelper();
         pool_create_info.queueFamilyIndex = base.queue_family_index;
         result = DispatchCreateCommandPool(gpuav_.device, &pool_create_info, nullptr, &barrier_command_pool_);
         if (result != VK_SUCCESS) {
             gpuav_.InternalError(VkHandle(), loc, "Unable to create command pool for barrier CB.");
             barrier_command_pool_ = VK_NULL_HANDLE;
             return;
         }

         VkCommandBufferAllocateInfo buffer_alloc_info = vku::InitStructHelper();
         buffer_alloc_info.commandPool = barrier_command_pool_;
         buffer_alloc_info.commandBufferCount = 1;
         buffer_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
         result = DispatchAllocateCommandBuffers(gpuav_.device, &buffer_alloc_info, &barrier_command_buffer_);
         if (result != VK_SUCCESS) {
             gpuav_.InternalError(VkHandle(), loc, "Unable to create barrier command buffer.");
             DispatchDestroyCommandPool(gpuav_.device, barrier_command_pool_, nullptr);
             barrier_command_pool_ = VK_NULL_HANDLE;
             barrier_command_buffer_ = VK_NULL_HANDLE;
             return;
         }

         VkSemaphoreTypeCreateInfo semaphore_type_create_info = vku::InitStructHelper();
         semaphore_type_create_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
         semaphore_type_create_info.initialValue = 0;

         VkSemaphoreCreateInfo semaphore_create_info = vku::InitStructHelper(&semaphore_type_create_info);

         result = DispatchCreateSemaphore(gpuav_.device, &semaphore_create_info, nullptr, &barrier_sem_);
         if (result != VK_SUCCESS) {
             gpuav_.InternalError(gpuav_.device, loc, "Unable to create barrier semaphore.");
             DispatchDestroyCommandPool(gpuav_.device, barrier_command_pool_, nullptr);
             barrier_command_pool_ = VK_NULL_HANDLE;
             barrier_command_buffer_ = VK_NULL_HANDLE;
             return;
         }

         // Hook up command buffer dispatch
         gpuav_.vk_set_device_loader_data_(gpuav_.device, barrier_command_buffer_);

         // Record a global memory barrier to force availability of device memory operations to the host domain.
         VkCommandBufferBeginInfo barrier_cmd_buffer_begin_info = vku::InitStructHelper();
         barrier_cmd_buffer_begin_info.flags |= VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
         result = DispatchBeginCommandBuffer(barrier_command_buffer_, &barrier_cmd_buffer_begin_info);
         if (result == VK_SUCCESS) {
             VkMemoryBarrier memory_barrier = vku::InitStructHelper();
             memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
             memory_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
             DispatchCmdPipelineBarrier(barrier_command_buffer_, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0,
                                        1, &memory_barrier, 0, nullptr, 0, nullptr);
             DispatchEndCommandBuffer(barrier_command_buffer_);
         }
     }

     if (barrier_command_buffer_ != VK_NULL_HANDLE) {
         VkTimelineSemaphoreSubmitInfo timeline_semaphore_submit_info = vku::InitStructHelper();
         timeline_semaphore_submit_info.signalSemaphoreValueCount = 1;
         timeline_semaphore_submit_info.pSignalSemaphoreValues = &seq;

         VkSubmitInfo submit_info = vku::InitStructHelper(&timeline_semaphore_submit_info);

         submit_info.commandBufferCount = 1;
         submit_info.pCommandBuffers = &barrier_command_buffer_;
         submit_info.signalSemaphoreCount = 1;
         submit_info.pSignalSemaphores = &barrier_sem_;

         DispatchQueueSubmit(VkHandle(), 1, &submit_info, VK_NULL_HANDLE);
     }
 }

 void QueueSubState::PreSubmit(std::vector<vvl::QueueSubmission> &submissions) {
     bool success = true;
     for (const auto &submission : submissions) {
         auto loc = submission.loc.Get();
         for (auto &cb_submission : submission.cb_submissions) {
             auto guard = cb_submission.cb->ReadLock();
             auto &gpu_cb = SubState(*cb_submission.cb);
             success = gpu_cb.PreSubmit(*this, loc);
             if (!success) {
                 return;
             }
             for (auto *secondary_cb : gpu_cb.base.linked_command_buffers) {
                 auto secondary_guard = secondary_cb->ReadLock();
                 auto &secondary_gpu_cb = SubState(*secondary_cb);
                 success = secondary_gpu_cb.PreSubmit(*this, loc);
                 if (!success) {
                     return;
                 }
             }
         }
     }
 }

 void QueueSubState::PostSubmit(std::deque<vvl::QueueSubmission> &submissions) {
     bool success = true;
     for (const auto &submission : submissions) {
         auto loc = submission.loc.Get();
         for (auto &cb_submission : submission.cb_submissions) {
             auto guard = cb_submission.cb->ReadLock();
             auto &gpu_cb = SubState(*cb_submission.cb);
             success = gpu_cb.PostSubmit(*this, loc);
             if (!success) {
                 return;
             }
             for (auto *secondary_cb : gpu_cb.base.linked_command_buffers) {
                 auto secondary_guard = secondary_cb->ReadLock();
                 auto &secondary_gpu_cb = SubState(*secondary_cb);
                 success = secondary_gpu_cb.PostSubmit(*this, loc);
                 if (!success) {
                     return;
                 }
             }
         }
     }

     if (!submissions.empty() && submissions.back().is_last_submission) {
         auto loc = submissions.back().loc.Get();
         SubmitBarrier(loc, submissions.back().seq);
     }
 }

 void QueueSubState::Retire(vvl::QueueSubmission &submission) {
     VVL_ZoneScoped;
     if (submission.loc.Get().function == vvl::Func::vkQueuePresentKHR) {
         // Present batch does not have any GPU-AV work to post process, skip it.
         // This is also needed for correctness. QueuePresent does not have a PostSubmit call
         // that signals barrier_sem_. The following timeline wait must not be called.
         return;
     }
     retiring_.emplace_back(submission.cb_submissions);
     if (submission.is_last_submission) {
         VkSemaphoreWaitInfo wait_info = vku::InitStructHelper();
         wait_info.semaphoreCount = 1;
         wait_info.pSemaphores = &barrier_sem_;
         wait_info.pValues = &submission.seq;

         if (timeline_khr_) {
             DispatchWaitSemaphoresKHR(gpuav_.device, &wait_info, 1'000'000'000);
         } else {
             DispatchWaitSemaphores(gpuav_.device, &wait_info, 1'000'000'000);
         }

         FenceWaiter *fence_waiter = shared_resources_cache.TryGet<FenceWaiter>();
         if (fence_waiter && !fence_waiter->fences.empty()) {
             DispatchWaitForFences(gpuav_.device, uint32_t(fence_waiter->fences.size()), fence_waiter->fences.data(), VK_TRUE,
                                   UINT64_MAX);
             fence_waiter->fences.clear();
         }

         for (std::vector<vvl::CommandBufferSubmission> &cb_submissions : retiring_) {
             for (vvl::CommandBufferSubmission &cb_submission : cb_submissions) {
                 auto guard = cb_submission.cb->WriteLock();
                 auto &gpu_cb = SubState(*cb_submission.cb);
                 auto loc = submission.loc.Get();
                 gpu_cb.OnCompletion(VkHandle(), cb_submission.initial_label_stack, loc);
                 for (vvl::CommandBuffer *secondary_cb : gpu_cb.base.linked_command_buffers) {
                     auto secondary_guard = secondary_cb->WriteLock();
                     auto &secondary_gpu_cb = SubState(*secondary_cb);
                     secondary_gpu_cb.OnCompletion(VkHandle(), cb_submission.initial_label_stack, loc);
                 }
             }
         }
         retiring_.clear();
     }
 }

 ImageSubState::ImageSubState(vvl::Image &obj, DescriptorHeap &heap)
     : vvl::ImageSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void ImageSubState::Destroy() { id_tracker.reset(); }

 void ImageSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }

 ImageViewSubState::ImageViewSubState(vvl::ImageView &obj, DescriptorHeap &heap)
     : vvl::ImageViewSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void ImageViewSubState::Destroy() { id_tracker.reset(); }

 void ImageViewSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }

 BufferSubState::BufferSubState(vvl::Buffer &obj, DescriptorHeap &heap)
     : vvl::BufferSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void BufferSubState::Destroy() { id_tracker.reset(); }

 void BufferSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }

 BufferViewSubState::BufferViewSubState(vvl::BufferView &obj, DescriptorHeap &heap)
     : vvl::BufferViewSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void BufferViewSubState::Destroy() { id_tracker.reset(); }

 void BufferViewSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }

 SamplerSubState::SamplerSubState(vvl::Sampler &obj, DescriptorHeap &heap)
     : vvl::SamplerSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void SamplerSubState::Destroy() { id_tracker.reset(); }

 void SamplerSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }

 AccelerationStructureNVSubState::AccelerationStructureNVSubState(vvl::AccelerationStructureNV &obj, DescriptorHeap &heap)
     : vvl::AccelerationStructureNVSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void AccelerationStructureNVSubState::Destroy() { id_tracker.reset(); }

 void AccelerationStructureNVSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) {
     id_tracker.reset();
 }

 AccelerationStructureKHRSubState::AccelerationStructureKHRSubState(vvl::AccelerationStructureKHR &obj, DescriptorHeap &heap)
     : vvl::AccelerationStructureKHRSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void AccelerationStructureKHRSubState::Destroy() { id_tracker.reset(); }

 void AccelerationStructureKHRSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) {
     id_tracker.reset();
 }

 TensorSubState::TensorSubState(vvl::Tensor &obj, DescriptorHeap &heap)
     : vvl::TensorSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void TensorSubState::Destroy() { id_tracker.reset(); }

 void TensorSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }

 TensorViewSubState::TensorViewSubState(vvl::TensorView &obj, DescriptorHeap &heap)
     : vvl::TensorViewSubState(obj), id_tracker(std::in_place, heap, obj.Handle()) {}

 void TensorViewSubState::Destroy() { id_tracker.reset(); }

 void TensorViewSubState::NotifyInvalidate(const vvl::StateObject::NodeList &invalid_nodes, bool unlink) { id_tracker.reset(); }

 ShaderObjectSubState::ShaderObjectSubState(vvl::ShaderObject &obj) : vvl::ShaderObjectSubState(obj) {}

 PipelineSubState::PipelineSubState(Validator &gpuav, vvl::Pipeline &pipeline) : vvl::PipelineSubState(pipeline), gpuav_(gpuav) {}

 VkPipelineLayout PipelineSubState::GetPipelineLayoutUnion(const Location &loc, vvl::DescriptorMode mode) const {
     std::unique_lock<std::mutex> recreated_layout_lock(recreated_layout_mutex);
     if (recreated_layout != VK_NULL_HANDLE) {
         return recreated_layout;
     }

     const std::shared_ptr<const vvl::PipelineLayout> pipeline_layout_state = base.PipelineLayoutState();
     assert(pipeline_layout_state->set_layouts.list.size() <= gpuav_.instrumentation_desc_set_bind_index_);
     if (pipeline_layout_state->set_layouts.list.size() > gpuav_.instrumentation_desc_set_bind_index_) {
         gpuav_.InternalError(LogObjectList(base.VkHandle()), loc,
                              "Trying to recreate a pipeline layout with no room for the instrumentation descriptor set.");
         return VK_NULL_HANDLE;
     }

     std::vector<VkDescriptorSetLayout> set_layout_handles;
     set_layout_handles.reserve(gpuav_.instrumentation_desc_set_bind_index_ + 1);
     std::vector<size_t> recreated_desc_set_layouts_indices;

     for (size_t set_layout_i = 0; set_layout_i < pipeline_layout_state->set_layouts.list.size(); ++set_layout_i) {
         const auto &set_layout = pipeline_layout_state->set_layouts.list[set_layout_i];
         if (!set_layout) {
             set_layout_handles.emplace_back(VK_NULL_HANDLE);
         } else {
             VkDescriptorSetLayout recreated_desc_set_layout = VK_NULL_HANDLE;

             const VkResult result = DispatchCreateDescriptorSetLayout(gpuav_.device, set_layout->GetCreateInfo().ptr(), nullptr,
                                                                       &recreated_desc_set_layout);
             (void)result;
             assert(result == VK_SUCCESS);

             set_layout_handles.emplace_back(recreated_desc_set_layout);
             recreated_desc_set_layouts_indices.emplace_back(set_layout_i);
         }
     }

     for (size_t i = set_layout_handles.size(); i < gpuav_.instrumentation_desc_set_bind_index_; ++i) {
         set_layout_handles.emplace_back(gpuav_.dummy_desc_layout_[mode]);
     }
     set_layout_handles.emplace_back(gpuav_.GetInstrumentationDescriptorSetLayout(mode));

     VkPipelineLayoutCreateInfo pipeline_layout_ci = vku::InitStructHelper();
     pipeline_layout_ci.flags = pipeline_layout_state->create_flags;
     pipeline_layout_ci.setLayoutCount = uint32_t(set_layout_handles.size());
     pipeline_layout_ci.pSetLayouts = set_layout_handles.data();
     if (pipeline_layout_state->push_constant_ranges_layout) {
         pipeline_layout_ci.pushConstantRangeCount = uint32_t(pipeline_layout_state->push_constant_ranges_layout->size());
         pipeline_layout_ci.pPushConstantRanges = pipeline_layout_state->push_constant_ranges_layout->data();
     }

     const VkResult result = DispatchCreatePipelineLayout(gpuav_.device, &pipeline_layout_ci, nullptr, &recreated_layout);
     (void)result;
     assert(result == VK_SUCCESS);

     for (size_t i : recreated_desc_set_layouts_indices) {
         DispatchDestroyDescriptorSetLayout(gpuav_.device, set_layout_handles[i], nullptr);
     }

     return recreated_layout;
 }

 void PipelineSubState::Destroy() {
     std::unique_lock<std::mutex> recreated_layout_lock(recreated_layout_mutex);
     if (recreated_layout != VK_NULL_HANDLE) {
         DispatchDestroyPipelineLayout(gpuav_.device, recreated_layout, nullptr);
         recreated_layout = VK_NULL_HANDLE;
     }
 }

 }  // namespace gpuav