blob: cf3587f7404cc4c0ab7a1f97dd9fc14c8f9ed12a [file] [log] [blame]
/* Copyright (c) 2018-2026 The Khronos Group Inc.
* Copyright (c) 2018-2026 Valve Corporation
* Copyright (c) 2018-2026 LunarG, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vulkan/vulkan_core.h>
#include "gpuav/core/gpuav.h"
#include "gpuav/core/gpuav_validation_pipeline.h"
#include "gpuav/validation_cmd/gpuav_validation_cmd_common.h"
#include "gpuav/resources/gpuav_vulkan_objects.h"
#include "gpuav/resources/gpuav_state_trackers.h"
#include "gpuav/resources/gpuav_shader_resources.h"
#include "gpuav/shaders/gpuav_error_header.h"
#include "gpuav/shaders/validation_cmd/push_data.h"
#include "gpuav/shaders/validation_cmd/build_acceleration_structures.h"
#include "generated/gpuav_offline_spirv.h"
#include "error_message/error_strings.h"
#include "containers/limits.h"
#include "utils/math_utils.h"
#include "utils/ray_tracing_utils.h"
#include "profiling/profiling.h"
namespace gpuav {
namespace valcmd {
struct TraceRaysValidationShader {
static size_t GetSpirvSize() { return validation_cmd_trace_rays_comp_size * sizeof(uint32_t); }
static const uint32_t* GetSpirv() { return validation_cmd_trace_rays_comp; }
glsl::TraceRaysPushData push_constants{};
static std::vector<VkDescriptorSetLayoutBinding> GetDescriptorSetLayoutBindings() { return {}; }
std::vector<VkWriteDescriptorSet> GetDescriptorWrites() const { return {}; }
};
void TraceRaysIndirect(Validator& gpuav, const Location& loc, CommandBufferSubState& cb_state, const LastBound& last_bound,
VkDeviceAddress indirect_data_address) {
if (!gpuav.gpuav_settings.validate_indirect_trace_rays_buffers) {
return;
}
valpipe::RestorablePipelineState restorable_state(cb_state, VK_PIPELINE_BIND_POINT_COMPUTE);
ValidationCommandsGpuavState& val_cmd_gpuav_state =
gpuav.shared_resources_cache.GetOrCreate<ValidationCommandsGpuavState>(gpuav, loc);
valpipe::ComputePipeline<TraceRaysValidationShader>& validation_pipeline =
gpuav.shared_resources_cache.GetOrCreate<valpipe::ComputePipeline<TraceRaysValidationShader>>(
gpuav, loc, val_cmd_gpuav_state.error_logging_desc_set_layout_);
if (!validation_pipeline.valid) {
gpuav.InternalError(cb_state.VkHandle(), loc, "Failed to create TraceRaysValidationShader.");
return;
}
// Setup shader resources
// ---
{
const uint64_t ray_query_dimension_max_width =
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupCount[0]) *
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupSize[0]);
const uint64_t ray_query_dimension_max_height =
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupCount[1]) *
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupSize[1]);
const uint64_t ray_query_dimension_max_depth =
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupCount[2]) *
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupSize[2]);
TraceRaysValidationShader shader_resources;
shader_resources.push_constants.indirect_data = indirect_data_address;
shader_resources.push_constants.trace_rays_width_limit =
static_cast<uint32_t>(std::min<uint64_t>(ray_query_dimension_max_width, vvl::kU32Max));
shader_resources.push_constants.trace_rays_height_limit =
static_cast<uint32_t>(std::min<uint64_t>(ray_query_dimension_max_height, vvl::kU32Max));
shader_resources.push_constants.trace_rays_depth_limit =
static_cast<uint32_t>(std::min<uint64_t>(ray_query_dimension_max_depth, vvl::kU32Max));
shader_resources.push_constants.max_ray_dispatch_invocation_count =
gpuav.phys_dev_ext_props.ray_tracing_props_khr.maxRayDispatchInvocationCount;
if (!BindShaderResources(validation_pipeline, gpuav, cb_state, cb_state.compute_index, cb_state.GetErrorLoggerIndex(),
shader_resources)) {
gpuav.InternalError(cb_state.VkHandle(), loc, "Failed to GetManagedDescriptorSet in BindShaderResources");
return;
}
}
// Setup validation pipeline
// ---
{
DispatchCmdBindPipeline(cb_state.VkHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, validation_pipeline.pipeline);
DispatchCmdDispatch(cb_state.VkHandle(), 1, 1, 1);
}
CommandBufferSubState::ErrorLoggerFunc error_logger = [&gpuav](const uint32_t* error_record,
const Location& loc_with_debug_region,
const LogObjectList& objlist) {
bool skip = false;
using namespace glsl;
if (GetErrorGroup(error_record) != kErrorGroup_GpuPreTraceRays) {
return skip;
}
const uint32_t error_sub_code = GetSubError(error_record);
switch (error_sub_code) {
case kErrorSubCode_PreTraceRays_LimitWidth: {
const uint32_t width = error_record[kValCmd_ErrorPayloadDword_0];
skip |= gpuav.LogError("VUID-VkTraceRaysIndirectCommandKHR-width-03638", objlist, loc_with_debug_region,
"Indirect trace rays of VkTraceRaysIndirectCommandKHR::width of %" PRIu32
" would exceed VkPhysicalDeviceLimits::maxComputeWorkGroupCount[0] * "
"VkPhysicalDeviceLimits::maxComputeWorkGroupSize[0] limit of %" PRIu64 ".",
width,
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupCount[0]) *
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupSize[0]));
break;
}
case kErrorSubCode_PreTraceRays_LimitHeight: {
const uint32_t height = error_record[kValCmd_ErrorPayloadDword_0];
skip |= gpuav.LogError("VUID-VkTraceRaysIndirectCommandKHR-height-03639", objlist, loc_with_debug_region,
"Indirect trace rays of VkTraceRaysIndirectCommandKHR::height of %" PRIu32
" would exceed VkPhysicalDeviceLimits::maxComputeWorkGroupCount[1] * "
"VkPhysicalDeviceLimits::maxComputeWorkGroupSize[1] limit of %" PRIu64 ".",
height,
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupCount[1]) *
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupSize[1]));
break;
}
case kErrorSubCode_PreTraceRays_LimitDepth: {
const uint32_t depth = error_record[kValCmd_ErrorPayloadDword_0];
skip |= gpuav.LogError("VUID-VkTraceRaysIndirectCommandKHR-depth-03640", objlist, loc_with_debug_region,
"Indirect trace rays of VkTraceRaysIndirectCommandKHR::height of %" PRIu32
" would exceed VkPhysicalDeviceLimits::maxComputeWorkGroupCount[2] * "
"VkPhysicalDeviceLimits::maxComputeWorkGroupSize[2] limit of %" PRIu64 ".",
depth,
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupCount[2]) *
static_cast<uint64_t>(gpuav.phys_dev_props.limits.maxComputeWorkGroupSize[2]));
break;
}
case kErrorSubCode_PreTraceRays_LimitVolume: {
const VkExtent3D trace_rays_extent = {error_record[kValCmd_ErrorPayloadDword_0],
error_record[kValCmd_ErrorPayloadDword_1],
error_record[kValCmd_ErrorPayloadDword_2]};
const uint64_t rays_volume = trace_rays_extent.width * trace_rays_extent.height * trace_rays_extent.depth;
skip |= gpuav.LogError(
"VUID-VkTraceRaysIndirectCommandKHR-width-03641", objlist, loc_with_debug_region,
"Indirect trace rays of volume %" PRIu64
" (%s) would exceed VkPhysicalDeviceRayTracingPipelinePropertiesKHR::maxRayDispatchInvocationCount "
"limit of %" PRIu32 ".",
rays_volume, string_VkExtent3D(trace_rays_extent).c_str(),
gpuav.phys_dev_ext_props.ray_tracing_props_khr.maxRayDispatchInvocationCount);
break;
}
default:
break;
}
return skip;
};
cb_state.AddCommandErrorLogger(loc, &last_bound, std::move(error_logger));
}
struct BuildAccelerationStructuresValidationShader {
static size_t GetSpirvSize() { return validation_cmd_tlas_comp_size * sizeof(uint32_t); }
static const uint32_t* GetSpirv() { return validation_cmd_tlas_comp; }
glsl::TLASValidationShaderPushData push_constants{};
static std::vector<VkDescriptorSetLayoutBinding> GetDescriptorSetLayoutBindings() { return {}; }
std::vector<VkWriteDescriptorSet> GetDescriptorWrites() const { return {}; }
};
class DummyBLAS {
public:
DummyBLAS(Validator& gpuav, CommandBufferSubState& cb_state)
: device(gpuav.device), vertex_buffer(gpuav), transform_buffer(gpuav), scratch_buffer(gpuav), blas_buffer(gpuav) {
{
VkBufferCreateInfo vertex_buffer_ci = vku::InitStructHelper();
vertex_buffer_ci.size = 3 * 3 * sizeof(float);
vertex_buffer_ci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
VmaAllocationCreateInfo alloc_ci = {};
alloc_ci.usage = VMA_MEMORY_USAGE_AUTO;
alloc_ci.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
const VkResult result = vertex_buffer.Create(&vertex_buffer_ci, &alloc_ci);
if (result != VK_SUCCESS) {
gpuav.InternalVmaError(LogObjectList(), result, "Failed to create dummy BLAS's vertex buffer.");
return;
}
constexpr std::array vertices = {// Vertex 0
10.0f, 10.0f, 0.0f,
// Vertex 1
-10.0f, 10.0f, 0.0f,
// Vertex 2
0.0f, -10.0f, 0.0f};
auto vertex_buffer_ptr = static_cast<float*>(vertex_buffer.GetMappedPtr());
std::copy(vertices.begin(), vertices.end(), vertex_buffer_ptr);
}
{
VkBufferCreateInfo transform_buffer_ci = vku::InitStructHelper();
transform_buffer_ci.size = sizeof(VkTransformMatrixKHR) + 16;
transform_buffer_ci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
VmaAllocationCreateInfo alloc_ci = {};
alloc_ci.usage = VMA_MEMORY_USAGE_AUTO;
alloc_ci.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
const VkResult result = transform_buffer.Create(&transform_buffer_ci, &alloc_ci);
if (result != VK_SUCCESS) {
gpuav.InternalVmaError(LogObjectList(), result, "Failed to create dummy BLAS's transform buffer.");
return;
}
// clang-format off
VkTransformMatrixKHR transform_matrix = {{
{ 1.0f, 0.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f, 0.0f },
}};
// clang-format on
auto transform_buffer_ptr = static_cast<VkTransformMatrixKHR*>(transform_buffer.GetMappedPtr());
std::memcpy(transform_buffer_ptr, &transform_matrix, sizeof(transform_matrix));
}
triangle = vku::InitStructHelper();
triangle.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
triangle.geometry.triangles = vku::InitStructHelper();
triangle.geometry.triangles.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT;
triangle.geometry.triangles.vertexData.deviceAddress = vertex_buffer.Address();
triangle.geometry.triangles.vertexStride = 3 * sizeof(float);
triangle.geometry.triangles.maxVertex = 2;
triangle.geometry.triangles.indexType = VK_INDEX_TYPE_NONE_KHR;
triangle.geometry.triangles.indexData.deviceAddress = 0;
triangle.geometry.triangles.transformData.deviceAddress = Align<VkDeviceAddress>(transform_buffer.Address(), 16);
as_build_geom_info = vku::InitStructHelper();
as_build_geom_info.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
as_build_geom_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
as_build_geom_info.srcAccelerationStructure = VK_NULL_HANDLE;
as_build_geom_info.dstAccelerationStructure = VK_NULL_HANDLE;
as_build_geom_info.geometryCount = 1;
as_build_geom_info.pGeometries = &triangle;
as_build_geom_info.scratchData.deviceAddress = 0;
const uint32_t max_prim_count = triangle.geometry.triangles.maxVertex;
VkAccelerationStructureBuildSizesInfoKHR build_sizes_info = vku::InitStructHelper();
DispatchGetAccelerationStructureBuildSizesKHR(gpuav.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
&as_build_geom_info, &max_prim_count, &build_sizes_info);
{
VkBufferCreateInfo scratch_buffer_ci = vku::InitStructHelper();
scratch_buffer_ci.size = build_sizes_info.buildScratchSize +
gpuav.phys_dev_ext_props.acc_structure_props.minAccelerationStructureScratchOffsetAlignment;
scratch_buffer_ci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
VmaAllocationCreateInfo alloc_ci = {};
alloc_ci.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
if (gpuav.IsAllDeviceLocalMappable()) {
alloc_ci.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
}
const VkResult result = scratch_buffer.Create(&scratch_buffer_ci, &alloc_ci);
if (result != VK_SUCCESS) {
gpuav.InternalVmaError(LogObjectList(), result, "Failed to create dummy BLAS's scratch buffer.");
return;
}
}
{
VkBufferCreateInfo blas_buffer_ci = vku::InitStructHelper();
blas_buffer_ci.size = build_sizes_info.accelerationStructureSize;
blas_buffer_ci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT;
VmaAllocationCreateInfo alloc_ci = {};
alloc_ci.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
if (gpuav.IsAllDeviceLocalMappable()) {
alloc_ci.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
}
const VkResult result = blas_buffer.Create(&blas_buffer_ci, &alloc_ci);
if (result != VK_SUCCESS) {
gpuav.InternalVmaError(LogObjectList(), result, "Failed to create dummy BLAS buffer.");
return;
}
}
as_build_geom_info.scratchData.deviceAddress = Align<VkDeviceAddress>(
scratch_buffer.Address(), gpuav.phys_dev_ext_props.acc_structure_props.minAccelerationStructureScratchOffsetAlignment);
VkAccelerationStructureCreateInfoKHR as_ci = vku::InitStructHelper();
as_ci.buffer = blas_buffer.VkHandle();
as_ci.offset = 0;
as_ci.size = blas_buffer.Size();
as_ci.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
DispatchCreateAccelerationStructureKHR(gpuav.device, &as_ci, nullptr, &blas_handle);
as_build_geom_info.dstAccelerationStructure = blas_handle;
VkAccelerationStructureDeviceAddressInfoKHR addr_info = vku::InitStructHelper();
addr_info.accelerationStructure = blas_handle;
blas_address = DispatchGetAccelerationStructureDeviceAddressKHR(gpuav.device, &addr_info);
cb_state.on_pre_cb_submission_functions.emplace_back(
[this](Validator& gpuav, CommandBufferSubState& cb, VkCommandBuffer per_submission_cb) {
VkAccelerationStructureBuildRangeInfoKHR triangle_build_range{};
triangle_build_range.primitiveCount = 1;
triangle_build_range.primitiveOffset = 0;
triangle_build_range.firstVertex = 0;
triangle_build_range.transformOffset = 0;
std::array build_range_infos = {&triangle_build_range};
DispatchCmdBuildAccelerationStructuresKHR(per_submission_cb, 1, &as_build_geom_info, build_range_infos.data());
VkBufferMemoryBarrier barrier_blas_build = vku::InitStructHelper();
barrier_blas_build.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
barrier_blas_build.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
barrier_blas_build.buffer = blas_buffer.VkHandle();
barrier_blas_build.offset = 0;
barrier_blas_build.size = blas_buffer.Size();
DispatchCmdPipelineBarrier(per_submission_cb, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 0, nullptr, 1,
&barrier_blas_build, 0, nullptr);
});
}
~DummyBLAS() {
if (blas_handle) {
DispatchDestroyAccelerationStructureKHR(device, blas_handle, nullptr);
}
vertex_buffer.Destroy();
transform_buffer.Destroy();
scratch_buffer.Destroy();
blas_buffer.Destroy();
}
public:
VkDeviceAddress blas_address = 0;
private:
VkDevice device = VK_NULL_HANDLE;
VkAccelerationStructureKHR blas_handle = VK_NULL_HANDLE;
vko::Buffer vertex_buffer;
vko::Buffer transform_buffer;
vko::Buffer scratch_buffer;
vko::Buffer blas_buffer;
VkAccelerationStructureGeometryKHR triangle{};
VkAccelerationStructureBuildGeometryInfoKHR as_build_geom_info{};
};
void TLAS(Validator& gpuav, const Location& loc, CommandBufferSubState& cb_state, const LastBound& last_bound, uint32_t info_count,
const VkAccelerationStructureBuildGeometryInfoKHR* infos,
const VkAccelerationStructureBuildRangeInfoKHR* const* build_ranges_infos) {
VVL_ZoneScoped;
if (!gpuav.gpuav_settings.validate_acceleration_structures_builds) {
return;
}
struct BlasArray {
VkDeviceAddress array_start_addr = 0;
uint32_t size = 0;
uint32_t is_array_of_pointers = 0;
uint32_t info_i = 0;
uint32_t geom_i = 0;
};
struct BlasBuiltInCmd {
std::shared_ptr<vvl::AccelerationStructureKHR> blas = {};
size_t p_info_i = 0;
};
std::vector<BlasArray> blas_arrays;
std::vector<BlasBuiltInCmd> blas_built_in_cmd_array;
for (const auto [info_i, info] : vvl::enumerate(infos, info_count)) {
if (info.type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR) {
for (uint32_t geom_i = 0; geom_i < info.geometryCount; ++geom_i) {
const VkAccelerationStructureGeometryKHR& geom = rt::GetGeometry(info, geom_i);
if (geom.geometryType == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
BlasArray blas_array;
blas_array.size = build_ranges_infos[info_i][geom_i].primitiveCount;
blas_array.array_start_addr =
geom.geometry.instances.data.deviceAddress + build_ranges_infos[info_i][geom_i].primitiveOffset;
blas_array.is_array_of_pointers = uint32_t(geom.geometry.instances.arrayOfPointers);
blas_array.info_i = info_i;
blas_array.geom_i = geom_i;
blas_arrays.emplace_back(blas_array);
}
}
} else if (info.type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) {
auto blas = gpuav.Get<vvl::AccelerationStructureKHR>(info.dstAccelerationStructure);
if (blas) {
BlasBuiltInCmd blas_build_in_cmd{};
blas_build_in_cmd.blas = std::move(blas);
blas_build_in_cmd.p_info_i = info_i;
blas_built_in_cmd_array.emplace_back(blas_build_in_cmd);
}
}
}
// No TLAS built in command, so no validation to perform
if (blas_arrays.empty()) {
return;
}
valpipe::RestorablePipelineState restorable_state(cb_state, VK_PIPELINE_BIND_POINT_COMPUTE);
ValidationCommandsGpuavState& val_cmd_gpuav_state =
gpuav.shared_resources_cache.GetOrCreate<ValidationCommandsGpuavState>(gpuav, loc);
valpipe::ComputePipeline<BuildAccelerationStructuresValidationShader>& validation_pipeline =
gpuav.shared_resources_cache.GetOrCreate<valpipe::ComputePipeline<BuildAccelerationStructuresValidationShader>>(
gpuav, loc, val_cmd_gpuav_state.error_logging_desc_set_layout_);
if (!validation_pipeline.valid) {
return;
}
vko::BufferRange as_arrays_ptr_buffer =
cb_state.gpu_resources_manager.GetDeviceLocalBufferRange(sizeof(glsl::AccelerationStructureArraysPtr));
cb_state.on_pre_cb_submission_functions.emplace_back([as_arrays_ptr_buffer](Validator& gpuav, CommandBufferSubState& cb,
VkCommandBuffer per_submission_cb) {
VVL_ZoneScopedN("validate_as_builds_pre_submit");
ReadLockGuard lock(gpuav.device_state->as_with_addresses.array_mutex);
// valid AS addresses buffer
vko::BufferRange as_addresses_buffer = cb.gpu_resources_manager.GetHostCoherentBufferRange(
2 * sizeof(uint32_t) + gpuav.device_state->as_with_addresses.array.size() * sizeof(uint64_t));
auto accel_struct_addresses_buffer_u32_ptr = (uint32_t*)as_addresses_buffer.offset_mapped_ptr;
*accel_struct_addresses_buffer_u32_ptr = (uint32_t)gpuav.device_state->as_with_addresses.array.size();
auto as_addresses_ptr = (uint64_t*)(accel_struct_addresses_buffer_u32_ptr + 2);
// valid AS metadata buffer
vko::BufferRange as_metadatas_buffer = cb.gpu_resources_manager.GetHostCachedBufferRange(
gpuav.device_state->as_with_addresses.array.size() * sizeof(uint32_t));
auto as_metadatas_ptr = (uint32_t*)(as_metadatas_buffer.offset_mapped_ptr);
// valid AS buffer address ranges buffer
vko::BufferRange as_buffer_addr_ranges_buffer = cb.gpu_resources_manager.GetHostCoherentBufferRange(
gpuav.device_state->as_with_addresses.array.size() * (2 * sizeof(uint64_t)));
auto as_buffer_addr_ranges_ptr = (uint64_t*)(as_buffer_addr_ranges_buffer.offset_mapped_ptr);
uint32_t written_count = 0;
for (const vvl::AccelerationStructureKHR* as : gpuav.device_state->as_with_addresses.array) {
as_addresses_ptr[written_count] = as->acceleration_structure_address;
uint32_t metadata = 0;
const bool is_buffer_destroyed = as->buffer_state && !as->buffer_state->Destroyed();
const bool is_buffer_bound_to_memory = is_buffer_destroyed && as->buffer_state->IsMemoryBound();
metadata |= SET_BUILD_AS_METADATA_BUFFER_STATUS(is_buffer_destroyed);
metadata |= SET_BUILD_AS_METADATA_AS_TYPE(as->GetType());
metadata |= SET_BUILD_AS_METADATA_BUFFER_MEMORY_STATUS(is_buffer_bound_to_memory);
as_metadatas_ptr[written_count] = metadata;
const vvl::range<VkDeviceAddress> as_buffer_addr_range = as->device_address_range;
as_buffer_addr_ranges_ptr[2 * written_count] = as_buffer_addr_range.begin;
as_buffer_addr_ranges_ptr[2 * written_count + 1] = as_buffer_addr_range.end;
++written_count;
}
// Fill a GPU buffer with a pointer to the AS metadata
vko::BufferRange submit_time_ptr_to_accel_structs_metadata_buffer =
cb.gpu_resources_manager.GetHostCoherentBufferRange(sizeof(glsl::AccelerationStructureArraysPtr));
auto submit_time_ptr_to_accel_structs_metadata_buffer_ptr =
(glsl::AccelerationStructureArraysPtr*)submit_time_ptr_to_accel_structs_metadata_buffer.offset_mapped_ptr;
submit_time_ptr_to_accel_structs_metadata_buffer_ptr->addresses_ptr = as_addresses_buffer.offset_address;
submit_time_ptr_to_accel_structs_metadata_buffer_ptr->metadata_ptr = as_metadatas_buffer.offset_address;
submit_time_ptr_to_accel_structs_metadata_buffer_ptr->buffer_ranges_ptr = as_buffer_addr_ranges_buffer.offset_address;
vko::CmdSynchronizedCopyBufferRange(per_submission_cb, as_arrays_ptr_buffer,
submit_time_ptr_to_accel_structs_metadata_buffer);
});
// Setup Validation pipeline
// ---
{
DummyBLAS& dummy_blas = gpuav.shared_resources_cache.GetOrCreate<DummyBLAS>(gpuav, cb_state);
// Fill a buffer with BLAS built in this cmd
vko::BufferRange blas_built_in_cmd_buffer;
if (!blas_built_in_cmd_array.empty()) {
blas_built_in_cmd_buffer =
cb_state.gpu_resources_manager.GetHostCachedBufferRange(blas_built_in_cmd_array.size() * (2 * sizeof(uint64_t)));
auto blas_built_in_cmd_buffer_ptr = (uint64_t*)(blas_built_in_cmd_buffer.offset_mapped_ptr);
for (const auto [i, blas_built_in_cmd] : vvl::enumerate(blas_built_in_cmd_array)) {
const vvl::range<VkDeviceAddress> blas_built_in_cmd_buffer_addr_range =
blas_built_in_cmd.blas->device_address_range;
blas_built_in_cmd_buffer_ptr[2 * i] = blas_built_in_cmd_buffer_addr_range.begin;
blas_built_in_cmd_buffer_ptr[2 * i + 1] = blas_built_in_cmd_buffer_addr_range.end;
}
}
BuildAccelerationStructuresValidationShader shader_resources;
shader_resources.push_constants.ptr_to_ptr_to_accel_structs_arrays = as_arrays_ptr_buffer.offset_address;
shader_resources.push_constants.valid_dummy_blas_addr = dummy_blas.blas_address;
shader_resources.push_constants.blas_built_in_cmd_array_ptr = blas_built_in_cmd_buffer.offset_address;
shader_resources.push_constants.blas_built_in_cmd_array_size = (uint32_t)blas_built_in_cmd_array.size();
DispatchCmdBindPipeline(cb_state.VkHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, validation_pipeline.pipeline);
// Validation dispatch, one for each TLAS build
// ---
for (size_t blas_array_i = 0; blas_array_i < blas_arrays.size(); ++blas_array_i) {
const auto blas_array_buffers = gpuav.GetBuffersByAddress(blas_arrays[blas_array_i].array_start_addr);
if (blas_array_buffers.empty()) {
assert(false);
} else {
VkBufferMemoryBarrier barrier_write_after_read = vku::InitStructHelper();
barrier_write_after_read.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
barrier_write_after_read.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier_write_after_read.buffer = blas_array_buffers[0]->VkHandle();
barrier_write_after_read.offset = 0;
barrier_write_after_read.size = VK_WHOLE_SIZE;
DispatchCmdPipelineBarrier(cb_state.VkHandle(), VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1, &barrier_write_after_read, 0,
nullptr);
}
shader_resources.push_constants.validation_mode = glsl::kBuildASValidationMode_invalid_AS;
const uint32_t is_array_of_pointers = blas_arrays[blas_array_i].is_array_of_pointers;
if (is_array_of_pointers == 0) {
shader_resources.push_constants.blas_array_start_addr = blas_arrays[blas_array_i].array_start_addr;
shader_resources.push_constants.blas_ptr_array_start_addr = 0;
} else {
shader_resources.push_constants.blas_ptr_array_start_addr = blas_arrays[blas_array_i].array_start_addr;
shader_resources.push_constants.blas_array_start_addr = 0;
}
shader_resources.push_constants.blas_array_size = blas_arrays[blas_array_i].size;
shader_resources.push_constants.is_array_of_pointers = is_array_of_pointers;
shader_resources.push_constants.blas_array_i = (uint32_t)blas_array_i;
const bool bind_error_logging_desc_set = blas_array_i == 0;
if (!BindShaderResources(validation_pipeline, gpuav, cb_state, cb_state.compute_index, cb_state.GetErrorLoggerIndex(),
shader_resources, bind_error_logging_desc_set)) {
assert(false);
return;
}
constexpr uint32_t wg_size_x = 8;
constexpr uint32_t wg_size_y = 8;
const uint32_t as_instances_count = blas_arrays[blas_array_i].size;
const uint32_t wg_count_x = as_instances_count / wg_size_x + uint32_t(as_instances_count % wg_size_x > 0);
DispatchCmdDispatch(cb_state.VkHandle(), wg_count_x, 1, 1);
shader_resources.push_constants.validation_mode = glsl::kBuildASValidationMode_memory_overlaps;
BindShaderPushConstants(validation_pipeline, gpuav, cb_state, shader_resources);
const uint32_t wg_count_y =
(uint32_t)blas_built_in_cmd_array.size() / wg_size_y + uint32_t(blas_built_in_cmd_array.size() % wg_size_y > 0);
DispatchCmdDispatch(cb_state.VkHandle(), wg_count_x, wg_count_y, 1);
if (!blas_array_buffers.empty()) {
VkBufferMemoryBarrier barrier_read_after_write = vku::InitStructHelper();
barrier_read_after_write.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier_read_after_write.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
barrier_read_after_write.buffer = blas_array_buffers[0]->VkHandle();
barrier_read_after_write.offset = 0;
barrier_read_after_write.size = VK_WHOLE_SIZE;
DispatchCmdPipelineBarrier(cb_state.VkHandle(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 0, nullptr, 1,
&barrier_read_after_write, 0, nullptr);
}
}
}
CommandBufferSubState::ErrorLoggerFunc error_logger = [&gpuav, blas_arrays = std::move(blas_arrays),
blas_built_in_cmd_array = std::move(blas_built_in_cmd_array)](
const uint32_t* error_record, const Location& loc_with_debug_region,
const LogObjectList& objlist) {
bool skip = false;
using namespace glsl;
if (GetErrorGroup(error_record) != kErrorGroup_GpuPreBuildAccelerationStructures) {
return skip;
}
const uint64_t blas_in_tlas_addr = glsl::GetUint64(error_record + kValCmd_ErrorPayloadDword_0);
const uint32_t as_instance_i = error_record[kValCmd_ErrorPayloadDword_2];
const uint32_t blas_array_i = error_record[kValCmd_ErrorPayloadDword_3];
// Gather error info
// ---
const char* vvl_bug_msg =
"this is most likely a validation layer bug. Please file an issue at "
"https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues ";
const auto as_found_it =
std::find_if(gpuav.device_state->as_with_addresses.array.begin(), gpuav.device_state->as_with_addresses.array.end(),
[blas_in_tlas_addr](vvl::AccelerationStructureKHR* as) {
return as->acceleration_structure_address == blas_in_tlas_addr;
});
std::stringstream ss_as;
std::stringstream ss_as_buffer;
if (as_found_it != gpuav.device_state->as_with_addresses.array.end()) {
ss_as << "Acceleration structure corresponding to reference: " << gpuav.FormatHandle((*as_found_it)->VkHandle());
if ((*as_found_it)->buffer_state) {
ss_as_buffer << "(" << gpuav.FormatHandle((*as_found_it)->buffer_state->VkHandle()) << ") ";
}
} else {
ss_as << "Could not map acceleration structure reference to its corresponding handle, " << vvl_bug_msg;
}
const std::string ss_as_str = ss_as.str();
const std::string ss_buffer_str = ss_as_buffer.str();
const BlasArray blas_array = blas_arrays[blas_array_i];
std::ostringstream invalid_blas_loc;
invalid_blas_loc << "pInfos[" << blas_array.info_i << "].pGeometries[" << blas_array.geom_i
<< "].geometry.instances<VkAccelerationStructureInstance" << (blas_array.is_array_of_pointers ? " *" : "")
<< ">[" << as_instance_i << ']' << (blas_array.is_array_of_pointers ? "->" : ".")
<< "accelerationStructureReference (0x" << std::hex << blas_in_tlas_addr << ")";
const std::string invalid_blas_loc_str = invalid_blas_loc.str();
// Log error
// ---
const uint32_t error_sub_code = GetSubError(error_record);
switch (error_sub_code) {
case kErrorSubCode_PreBuildAccelerationStructures_BlasAddrAlignment: {
skip |= gpuav.LogError("VUID-vkCmdBuildAccelerationStructuresKHR-pInfos-03717", objlist, loc_with_debug_region,
"%s is not aligned to 16 bytes.", invalid_blas_loc_str.c_str());
break;
}
case kErrorSubCode_PreBuildAccelerationStructures_InvalidAS: {
skip |= gpuav.LogError("VUID-vkCmdBuildAccelerationStructuresKHR-pInfos-12281", objlist, loc_with_debug_region,
"%s is an invalid acceleration structure reference.", invalid_blas_loc_str.c_str());
break;
}
case kErrorSubCode_PreBuildAccelerationStructures_DestroyedASBuffer: {
skip |= gpuav.LogError("VUID-vkCmdBuildAccelerationStructuresKHR-pInfos-12281", objlist, loc_with_debug_region,
"%s is an invalid acceleration structure reference - underlying buffer %swas already "
"destroyed when build command started execution. %s.",
invalid_blas_loc_str.c_str(), ss_buffer_str.c_str(), ss_as_str.c_str());
break;
}
case kErrorSubCode_PreBuildAccelerationStructures_InvalidASType: {
std::stringstream ss_as_type;
if (as_found_it != gpuav.device_state->as_with_addresses.array.end()) {
ss_as_type << ", but has type " << string_VkAccelerationStructureTypeKHR((*as_found_it)->GetType())
<< ". ";
}
const std::string ss_as_type_str = ss_as_type.str();
skip |= gpuav.LogError("VUID-vkCmdBuildAccelerationStructuresKHR-pInfos-12281", objlist, loc_with_debug_region,
"%s is not a bottom level acceleration structure%s%s.", invalid_blas_loc_str.c_str(),
ss_as_type_str.c_str(), ss_as_str.c_str());
break;
}
case kErrorSubCode_PreBuildAccelerationStructures_DestroyedASMemory: {
skip |= gpuav.LogError("VUID-vkCmdBuildAccelerationStructuresKHR-pInfos-03709", objlist, loc_with_debug_region,
"%s is an invalid acceleration structure reference - underlying buffer %s was not bound to "
"memory anymore when build command started execution. Memory was probably destroyed. %s.",
invalid_blas_loc_str.c_str(), ss_buffer_str.c_str(), ss_as_str.c_str());
break;
}
case kErrorSubCode_PreBuildAccelerationStructures_BlasMemoryOverlap: {
const uint32_t blas_built_in_cmd_i = error_record[kValCmd_ErrorPayloadDword_4];
const BlasBuiltInCmd& blas_built_in_cmd = blas_built_in_cmd_array[blas_built_in_cmd_i];
std::stringstream error_ss;
if (as_found_it != gpuav.device_state->as_with_addresses.array.end()) {
const vvl::range<VkDeviceAddress> blas_in_tlas_buffer_addr_range = (*as_found_it)->device_address_range;
const vvl::range<VkDeviceAddress> blas_built_in_cmd_buffer_addr_range =
blas_built_in_cmd.blas->device_address_range;
const vvl::range<VkDeviceAddress> overlap =
blas_in_tlas_buffer_addr_range & blas_built_in_cmd_buffer_addr_range;
assert(overlap.non_empty());
const VkAccelerationStructureKHR blas_built_in_cmd_handle = blas_built_in_cmd.blas->VkHandle();
const VkAccelerationStructureKHR blas_in_tlas_handle = (*as_found_it)->VkHandle();
if (blas_built_in_cmd_handle != blas_in_tlas_handle) {
error_ss << "pInfos[" << blas_built_in_cmd.p_info_i << "].dstAccelerationStructure ("
<< gpuav.FormatHandle(blas_built_in_cmd.blas->VkHandle()) << "), backed by buffer ("
<< gpuav.FormatHandle(blas_built_in_cmd.blas->buffer_state->VkHandle())
<< "), overlaps on buffer address range " << vvl::string_range_hex(overlap) << " with buffer ("
<< gpuav.FormatHandle((*as_found_it)->buffer_state->VkHandle()) << ") of BLAS ("
<< gpuav.FormatHandle((*as_found_it)->VkHandle()) << "), referenced in " << invalid_blas_loc_str;
} else {
error_ss << "pInfos[" << blas_built_in_cmd.p_info_i << "].dstAccelerationStructure ("
<< gpuav.FormatHandle(blas_built_in_cmd.blas->VkHandle())
<< ") is also referenced in a TLAS built in the same command, through " << invalid_blas_loc_str;
}
} else {
error_ss << "Could not retrieve error information, " << vvl_bug_msg;
}
const std::string error_str = error_ss.str();
skip |= gpuav.LogError("VUID-vkCmdBuildAccelerationStructuresKHR-dstAccelerationStructure-03706", objlist,
loc_with_debug_region, "%s.", error_str.c_str());
break;
}
default:
break;
}
return skip;
};
cb_state.AddCommandErrorLogger(loc, &last_bound, std::move(error_logger));
}
struct BLASValidationShader {
static size_t GetSpirvSize() { return validation_cmd_blas_comp_size * sizeof(uint32_t); }
static const uint32_t* GetSpirv() { return validation_cmd_blas_comp; }
glsl::BLASValidationShaderPushData push_constants{};
static std::vector<VkDescriptorSetLayoutBinding> GetDescriptorSetLayoutBindings() { return {}; }
std::vector<VkWriteDescriptorSet> GetDescriptorWrites() const { return {}; }
};
void BLAS(Validator& gpuav, const Location& loc, CommandBufferSubState& cb_state, const LastBound& last_bound, uint32_t info_count,
const VkAccelerationStructureBuildGeometryInfoKHR* infos,
const VkAccelerationStructureBuildRangeInfoKHR* const* pp_build_ranges_infos) {
VVL_ZoneScoped;
if (!gpuav.gpuav_settings.validate_acceleration_structures_builds) {
return;
}
valpipe::RestorablePipelineState restorable_state(cb_state, VK_PIPELINE_BIND_POINT_COMPUTE);
ValidationCommandsGpuavState& val_cmd_gpuav_state =
gpuav.shared_resources_cache.GetOrCreate<ValidationCommandsGpuavState>(gpuav, loc);
valpipe::ComputePipeline<BLASValidationShader>& validation_pipeline =
gpuav.shared_resources_cache.GetOrCreate<valpipe::ComputePipeline<BLASValidationShader>>(
gpuav, loc, val_cmd_gpuav_state.error_logging_desc_set_layout_);
if (!validation_pipeline.valid) {
return;
}
struct ErrorInfo {
uint32_t info_i{};
uint32_t geom_i{};
VkGeometryTypeKHR geom_type;
VkAccelerationStructureGeometryDataKHR geom;
VkAccelerationStructureBuildRangeInfoKHR build_range_info{};
};
std::vector<ErrorInfo> error_infos;
// Setup Validation pipeline
// ---
{
bool error_logging_desc_set_already_bound = false;
for (uint32_t info_i = 0; info_i < info_count; ++info_i) {
const VkAccelerationStructureBuildGeometryInfoKHR& info = infos[info_i];
for (uint32_t geom_i = 0; geom_i < info.geometryCount; ++geom_i) {
const VkAccelerationStructureGeometryKHR& geom_data = rt::GetGeometry(info, geom_i);
const bool setup_triangle_indices_validation = geom_data.geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR &&
geom_data.geometry.triangles.indexType != VK_INDEX_TYPE_NONE_KHR;
const bool setup_aabbs_validation = geom_data.geometryType == VK_GEOMETRY_TYPE_AABBS_KHR;
const bool setup_transform_validation = geom_data.geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR &&
geom_data.geometry.triangles.transformData.deviceAddress != 0;
if (!setup_triangle_indices_validation && !setup_aabbs_validation && !setup_transform_validation) {
continue;
}
DispatchCmdBindPipeline(cb_state.VkHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, validation_pipeline.pipeline);
const VkAccelerationStructureBuildRangeInfoKHR& build_range_info = pp_build_ranges_infos[info_i][geom_i];
ErrorInfo& error_info = error_infos.emplace_back();
error_info.info_i = info_i;
error_info.geom_i = geom_i;
error_info.geom_type = geom_data.geometryType;
error_info.build_range_info = build_range_info;
BLASValidationShader shader_resources;
shader_resources.push_constants.first_vertex = build_range_info.firstVertex;
shader_resources.push_constants.address_byte_offset = build_range_info.primitiveOffset;
shader_resources.push_constants.primitive_count = build_range_info.primitiveCount;
shader_resources.push_constants.error_info_i = uint32_t(error_infos.size() - 1);
constexpr uint32_t shader_wg_size_x = 64;
if (setup_triangle_indices_validation) {
error_info.geom.triangles = geom_data.geometry.triangles;
shader_resources.push_constants.validation_mode = glsl::kBLASValidationMode_triangles_indices;
shader_resources.push_constants.address = geom_data.geometry.triangles.indexData.deviceAddress;
shader_resources.push_constants.index_type = geom_data.geometry.triangles.indexType;
shader_resources.push_constants.max_vertex = geom_data.geometry.triangles.maxVertex;
if (!BindShaderResources(validation_pipeline, gpuav, cb_state, cb_state.compute_index,
cb_state.GetErrorLoggerIndex(), shader_resources,
!error_logging_desc_set_already_bound)) {
assert(false);
return;
}
error_logging_desc_set_already_bound = true;
const uint32_t wg_count_x = (3 * build_range_info.primitiveCount) / shader_wg_size_x +
uint32_t(((3 * build_range_info.primitiveCount) % shader_wg_size_x) > 0);
DispatchCmdDispatch(cb_state.VkHandle(), wg_count_x, 1, 1);
if (const auto index_buffers = gpuav.GetBuffersByAddress(geom_data.geometry.triangles.indexData.deviceAddress);
!index_buffers.empty()) {
VkBufferMemoryBarrier barrier_read_after_write = vku::InitStructHelper();
barrier_read_after_write.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier_read_after_write.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
barrier_read_after_write.buffer = index_buffers[0]->VkHandle();
barrier_read_after_write.offset = 0;
barrier_read_after_write.size = VK_WHOLE_SIZE;
DispatchCmdPipelineBarrier(cb_state.VkHandle(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 0, nullptr, 1,
&barrier_read_after_write, 0, nullptr);
}
}
if (setup_aabbs_validation) {
error_info.geom.aabbs = geom_data.geometry.aabbs;
shader_resources.push_constants.validation_mode = glsl::kBLASValidationMode_aabbs;
shader_resources.push_constants.address = geom_data.geometry.aabbs.data.deviceAddress;
shader_resources.push_constants.stride = geom_data.geometry.aabbs.stride;
if (!BindShaderResources(validation_pipeline, gpuav, cb_state, cb_state.compute_index,
cb_state.GetErrorLoggerIndex(), shader_resources,
!error_logging_desc_set_already_bound)) {
assert(false);
return;
}
error_logging_desc_set_already_bound = true;
const uint32_t wg_count_x = (build_range_info.primitiveCount) / shader_wg_size_x +
uint32_t(((build_range_info.primitiveCount) % shader_wg_size_x) > 0);
DispatchCmdDispatch(cb_state.VkHandle(), wg_count_x, 1, 1);
if (const auto aabb_buffers = gpuav.GetBuffersByAddress(geom_data.geometry.aabbs.data.deviceAddress);
!aabb_buffers.empty()) {
VkBufferMemoryBarrier barrier_read_after_write = vku::InitStructHelper();
barrier_read_after_write.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier_read_after_write.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
barrier_read_after_write.buffer = aabb_buffers[0]->VkHandle();
barrier_read_after_write.offset = 0;
barrier_read_after_write.size = VK_WHOLE_SIZE;
DispatchCmdPipelineBarrier(cb_state.VkHandle(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 0, nullptr, 1,
&barrier_read_after_write, 0, nullptr);
}
}
if (setup_transform_validation) {
error_info.geom.triangles = geom_data.geometry.triangles;
shader_resources.push_constants.validation_mode = glsl::kBLASValidationMode_transform_matrix;
shader_resources.push_constants.address_byte_offset = build_range_info.transformOffset;
shader_resources.push_constants.address = geom_data.geometry.triangles.transformData.deviceAddress;
if (!BindShaderResources(validation_pipeline, gpuav, cb_state, cb_state.compute_index,
cb_state.GetErrorLoggerIndex(), shader_resources,
!error_logging_desc_set_already_bound)) {
assert(false);
return;
}
error_logging_desc_set_already_bound = true;
DispatchCmdDispatch(cb_state.VkHandle(), 1, 1, 1);
if (const auto transform_buffers =
gpuav.GetBuffersByAddress(geom_data.geometry.triangles.transformData.deviceAddress);
!transform_buffers.empty()) {
VkBufferMemoryBarrier barrier_read_after_write = vku::InitStructHelper();
barrier_read_after_write.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier_read_after_write.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
barrier_read_after_write.buffer = transform_buffers[0]->VkHandle();
barrier_read_after_write.offset = 0;
barrier_read_after_write.size = VK_WHOLE_SIZE;
DispatchCmdPipelineBarrier(cb_state.VkHandle(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 0, nullptr, 1,
&barrier_read_after_write, 0, nullptr);
}
}
}
}
}
CommandBufferSubState::ErrorLoggerFunc error_logger = [&gpuav, error_infos = std::move(error_infos)](
const uint32_t* error_record, const Location& loc_with_debug_region,
const LogObjectList& objlist) {
bool skip = false;
using namespace glsl;
if (GetErrorGroup(error_record) != kErrorGroup_GpuPreBuildAccelerationStructures) {
return skip;
}
const uint32_t error_info_i = error_record[kValCmd_ErrorPayloadDword_0];
const uint32_t gid = error_record[kValCmd_ErrorPayloadDword_1];
assert(error_info_i < error_infos.size());
const ErrorInfo& error_info = error_infos[error_info_i];
// Log error
// ---
const uint32_t error_sub_code = GetSubError(error_record);
switch (error_sub_code) {
case kErrorSubCode_PreBuildAccelerationStructures_MaxFetchedIndex: {
const uint32_t index = error_record[kValCmd_ErrorPayloadDword_2];
const uint32_t index_type_byte_size = IndexTypeSize(error_info.geom.triangles.indexType);
skip |= gpuav.LogError(
"VUID-VkAccelerationStructureBuildRangeInfoKHR-maxVertex-10774", objlist, loc_with_debug_region,
"Index out of bounds.\n"
"Index (%" PRIu32 ") + firstVertex (%" PRIu32 ") = %" PRIu32
" but VkAccelerationStructureGeometryTrianglesDataKHR::maxVertex is only %" PRIu32
".\n"
"Index buffer starts at VkDeviceAddress indexData (0x%" PRIx64 ") + primitiveOffset (%" PRIu32 ") = 0x%" PRIx64
"\n"
"Given index type of %s, and that starting address, that's IndexBuffer[%" PRIu32
"] (VkDeviceAddress: 0x%" PRIx64
")\n"
"Corresponding BLAS build command info:\n"
"VkAccelerationStructureBuildGeometryInfoKHR[%" PRIu32 "]::VkAccelerationStructureGeometryKHR[%" PRIu32
"]::VkAccelerationStructureGeometryTrianglesDataKHR was:\n"
" vertexFormat: %s\n"
" vertexData: 0x%" PRIx64
"\n"
" vertexStride: %" PRIu64
"\n"
" maxVertex: %" PRIu32
"\n"
" indexType: %s\n"
" indexData: 0x%" PRIx64
"\n"
" transformData: 0x%" PRIx64
"\n\n"
"VkAccelerationStructureBuildRangeInfoKHR[%" PRIu32 "][%" PRIu32
"] was:\n"
" primitiveCount: %" PRIu32
"\n"
" primitiveOffset: %" PRIu32
"\n"
" firstVertex: %" PRIu32
"\n"
" transformOffset: %" PRIu32 "\n",
index, error_info.build_range_info.firstVertex, index + error_info.build_range_info.firstVertex,
error_info.geom.triangles.maxVertex, error_info.geom.triangles.indexData.deviceAddress,
error_info.build_range_info.primitiveOffset,
error_info.geom.triangles.indexData.deviceAddress + error_info.build_range_info.primitiveOffset,
string_VkIndexType(error_info.geom.triangles.indexType), gid,
error_info.geom.triangles.indexData.deviceAddress + error_info.build_range_info.primitiveOffset +
gid * index_type_byte_size,
error_info.info_i, error_info.geom_i, string_VkFormat(error_info.geom.triangles.vertexFormat),
error_info.geom.triangles.vertexData.deviceAddress, error_info.geom.triangles.vertexStride,
error_info.geom.triangles.maxVertex, string_VkIndexType(error_info.geom.triangles.indexType),
error_info.geom.triangles.indexData.deviceAddress, error_info.geom.triangles.transformData.deviceAddress,
error_info.info_i, error_info.geom_i, error_info.build_range_info.primitiveCount,
error_info.build_range_info.primitiveOffset, error_info.build_range_info.firstVertex,
error_info.build_range_info.transformOffset
);
break;
}
case kErrorSubCode_PreBuildAccelerationStructures_MinMaxAabb_X:
case kErrorSubCode_PreBuildAccelerationStructures_MinMaxAabb_Y:
case kErrorSubCode_PreBuildAccelerationStructures_MinMaxAabb_Z: {
const float min = *(float*)(error_record + kValCmd_ErrorPayloadDword_2);
const float max = *(float*)(error_record + kValCmd_ErrorPayloadDword_3);
vvl::Field min_field{};
vvl::Field max_field{};
const char* vuid = "";
switch (error_sub_code) {
case kErrorSubCode_PreBuildAccelerationStructures_MinMaxAabb_X:
min_field = vvl::Field::minX;
max_field = vvl::Field::maxX;
vuid = "VUID-VkAabbPositionsKHR-minX-03546";
break;
case kErrorSubCode_PreBuildAccelerationStructures_MinMaxAabb_Y:
min_field = vvl::Field::minY;
max_field = vvl::Field::maxY;
vuid = "VUID-VkAabbPositionsKHR-minY-03547";
break;
case kErrorSubCode_PreBuildAccelerationStructures_MinMaxAabb_Z:
min_field = vvl::Field::minZ;
max_field = vvl::Field::maxZ;
vuid = "VUID-VkAabbPositionsKHR-minZ-03548";
break;
default:
break;
}
skip |= gpuav.LogError(
vuid, objlist, loc_with_debug_region,
"Ill formed AABB at primitive index %" PRIu32
".\n"
"%s (%f) > %s (%f)\n"
"AABB was found at VkDeviceAddress aabbs.data (0x%" PRIx64 ") + primitiveOffset (%" PRIu32
") + primitive index (%" PRIu32 ") * stride (%" PRIu64 ") = 0x%" PRIx64
"\n"
"Corresponding BLAS build command info:\n"
"VkAccelerationStructureBuildGeometryInfoKHR[%" PRIu32 "]::VkAccelerationStructureGeometryKHR[%" PRIu32
"]::VkAccelerationStructureGeometryAabbsDataKHR was:\n"
" data.deviceAddress: 0x%" PRIx64
"\n"
" stride: %" PRIu64
"\n\n"
"VkAccelerationStructureBuildRangeInfoKHR[%" PRIu32 "][%" PRIu32
"] was:\n"
" primitiveCount: %" PRIu32
"\n"
" primitiveOffset: %" PRIu32
"\n"
" firstVertex: %" PRIu32
"\n"
" transformOffset: %" PRIu32 "\n",
gid, vvl::String(min_field), min, vvl::String(max_field), max, error_info.geom.aabbs.data.deviceAddress,
error_info.build_range_info.primitiveOffset, gid, error_info.geom.aabbs.stride,
error_info.geom.aabbs.data.deviceAddress + error_info.build_range_info.primitiveOffset +
gid * error_info.geom.aabbs.stride,
error_info.info_i, error_info.geom_i, error_info.geom.aabbs.data.deviceAddress, error_info.geom.aabbs.stride,
error_info.info_i, error_info.geom_i, error_info.build_range_info.primitiveCount,
error_info.build_range_info.primitiveOffset, error_info.build_range_info.firstVertex,
error_info.build_range_info.transformOffset
);
break;
}
case kErrorSubCode_PreBuildAccelerationStructures_Transform: {
skip |= gpuav.LogError(
"VUID-VkTransformMatrixKHR-matrix-03799", objlist, loc_with_debug_region,
"Transform matrix's first three columns do not define an invertible 3x3 matrix.\n"
"Corresponding BLAS build command info:\n"
"VkAccelerationStructureBuildGeometryInfoKHR[%" PRIu32 "]::VkAccelerationStructureGeometryKHR[%" PRIu32
"]::VkAccelerationStructureGeometryTrianglesDataKHR was:\n"
" vertexFormat: %s\n"
" vertexData: 0x%" PRIx64
"\n"
" vertexStride: %" PRIu64
"\n"
" maxVertex: %" PRIu32
"\n"
" indexType: %s\n"
" indexData: 0x%" PRIx64
"\n"
" transformData: 0x%" PRIx64
"\n\n"
"VkAccelerationStructureBuildRangeInfoKHR[%" PRIu32 "][%" PRIu32
"] was:\n"
" primitiveCount: %" PRIu32
"\n"
" primitiveOffset: %" PRIu32
"\n"
" firstVertex: %" PRIu32
"\n"
" transformOffset: %" PRIu32 "\n",
error_info.info_i, error_info.geom_i, string_VkFormat(error_info.geom.triangles.vertexFormat),
error_info.geom.triangles.vertexData.deviceAddress, error_info.geom.triangles.vertexStride,
error_info.geom.triangles.maxVertex, string_VkIndexType(error_info.geom.triangles.indexType),
error_info.geom.triangles.indexData.deviceAddress, error_info.geom.triangles.transformData.deviceAddress,
error_info.info_i, error_info.geom_i, error_info.build_range_info.primitiveCount,
error_info.build_range_info.primitiveOffset, error_info.build_range_info.firstVertex,
error_info.build_range_info.transformOffset
);
break;
}
default:
break;
}
return skip;
};
cb_state.AddCommandErrorLogger(loc, &last_bound, std::move(error_logger));
}
} // namespace valcmd
} // namespace gpuav