blob: b143768ac402a8a58cff41c672edec074caaf7c7 [file]
/* Copyright (c) 2025-2026 The Khronos Group Inc.
* Copyright (c) 2025-2026 Valve Corporation
* Copyright (c) 2025-2026 LunarG, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "../framework/layer_validation_tests.h"
#include "../framework/pipeline_helper.h"
#include "../framework/descriptor_helper.h"
#include "cooperative_matrix_helper.h"
#include "shader_helper.h"
class PositiveGpuAVShaderSanitizer : public GpuAVGpuAVShaderSanitizer {};
void GpuAVGpuAVShaderSanitizer::InitCoopMatFp16() {
SetTargetApiVersion(VK_API_VERSION_1_3);
AddRequiredExtensions(VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME);
AddRequiredExtensions(VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME);
AddRequiredFeature(vkt::Feature::cooperativeMatrix);
AddRequiredFeature(vkt::Feature::vulkanMemoryModel);
AddRequiredFeature(vkt::Feature::shaderFloat16);
AddRequiredFeature(vkt::Feature::storageBuffer16BitAccess);
RETURN_IF_SKIP(InitGpuAvFramework());
RETURN_IF_SKIP(InitState());
CooperativeMatrixHelper helper(*this);
bool found = false;
for (const auto& prop : helper.coop_matrix_props) {
if (prop.scope == VK_SCOPE_SUBGROUP_KHR && prop.AType == VK_COMPONENT_TYPE_FLOAT16_KHR && prop.MSize == 16 &&
prop.KSize == 16) {
found = true;
break;
}
}
if (!found) {
GTEST_SKIP() << "fp16 16x16 A-type cooperative matrix property not found";
}
}
void GpuAVGpuAVShaderSanitizer::CoopMatAlignmentTest(const char* cs_source, const std::vector<uint32_t>& params,
bool expect_error) {
CreateComputePipelineHelper pipe(*this);
pipe.dsl_bindings_ = {{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr},
{1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr}};
pipe.cs_ = VkShaderObj(*m_device, cs_source, VK_SHADER_STAGE_COMPUTE_BIT, SPV_ENV_VULKAN_1_2);
pipe.CreateComputePipeline();
vkt::Buffer payload_buffer(*m_device, 4096, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, kHostVisibleMemProps);
const VkDeviceSize param_size = params.size() * sizeof(uint32_t);
vkt::Buffer param_buffer(*m_device, param_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, kHostVisibleMemProps);
auto* ptr = static_cast<uint32_t*>(param_buffer.Memory().Map());
memcpy(ptr, params.data(), param_size);
param_buffer.Memory().Unmap();
pipe.descriptor_set_.WriteDescriptorBufferInfo(0, payload_buffer, 0, VK_WHOLE_SIZE, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
pipe.descriptor_set_.WriteDescriptorBufferInfo(1, param_buffer, 0, VK_WHOLE_SIZE, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
pipe.descriptor_set_.UpdateDescriptorSets();
m_command_buffer.Begin();
vk::CmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipe);
vk::CmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipe.pipeline_layout_, 0, 1,
&pipe.descriptor_set_.set_, 0, nullptr);
vk::CmdDispatch(m_command_buffer, 1, 1, 1);
m_command_buffer.End();
if (expect_error) {
m_errorMonitor->SetAllowedFailureMsg("VUID-RuntimeSpirv-OpCooperativeMatrixLoadKHR-08986");
m_errorMonitor->SetDesiredError("VUID-RuntimeSpirv-OpCooperativeMatrixLoadKHR-08986");
}
m_default_queue->SubmitAndWait(m_command_buffer);
if (expect_error) {
m_errorMonitor->VerifyFound();
}
}
// Set a single SSBO to all zero
void GpuAVGpuAVShaderSanitizer::SimpleZeroComputeTest(const char* shader, int source_type, const char* expected_error,
uint32_t error_count) {
RETURN_IF_SKIP(InitGpuAvFramework());
RETURN_IF_SKIP(InitState());
CreateComputePipelineHelper pipe(*this);
pipe.dsl_bindings_[0] = {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr};
pipe.cs_ = VkShaderObj(*m_device, shader, VK_SHADER_STAGE_COMPUTE_BIT, SPV_ENV_VULKAN_1_1, (SpvSourceType)source_type);
pipe.CreateComputePipeline();
vkt::Buffer in_buffer(*m_device, 256, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, kHostVisibleMemProps);
void* in_ptr = in_buffer.Memory().Map();
memset(in_ptr, 0, 256);
pipe.descriptor_set_.WriteDescriptorBufferInfo(0, in_buffer, 0, VK_WHOLE_SIZE, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
pipe.descriptor_set_.UpdateDescriptorSets();
m_command_buffer.Begin();
vk::CmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipe);
vk::CmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipe.pipeline_layout_, 0, 1,
&pipe.descriptor_set_.set_, 0, nullptr);
vk::CmdDispatch(m_command_buffer, 1, 1, 1);
m_command_buffer.End();
if (expected_error) m_errorMonitor->SetDesiredError(expected_error, error_count);
m_default_queue->SubmitAndWait(m_command_buffer);
if (expected_error) m_errorMonitor->VerifyFound();
}
TEST_F(PositiveGpuAVShaderSanitizer, DivideByOne) {
const char* cs_source = R"glsl(
#version 450 core
layout(set=0, binding=0) buffer SSBO {
uint u_index;
int s_index;
uvec4 v_index;
float f_index;
uint u_result;
int s_result;
uvec4 v_result;
float f_result;
};
void main() {
u_result = 5 / (u_index + 1);
s_result = 5 / (s_index - 1);
v_result = uvec4(1) / (uvec4(1) + v_index);
u_result = u_result / 2;
f_result = 1 / f_index;
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, Mod) {
const char* cs_source = R"glsl(
#version 450 core
layout(set=0, binding=0) buffer SSBO {
int a[2];
uvec4 b[2];
ivec3 c[2];
};
void main() {
a[0] = 5 % (a[1] - 1);
b[0] = uvec4(1) % (uvec4(1) + b[0]);
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, SRem) {
const char* cs_source = R"asm(
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpDecorate %SSBO Block
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %SSBO 1 Offset 4
OpDecorate %_ Binding 0
OpDecorate %_ DescriptorSet 0
%void = OpTypeVoid
%4 = OpTypeFunction %void
%int = OpTypeInt 32 1
%SSBO = OpTypeStruct %int %int
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
%_ = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%int_1 = OpConstant %int 1
%int_0 = OpConstant %int 0
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
%main = OpFunction %void None %4
%6 = OpLabel
%15 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_0
%16 = OpLoad %int %15
%17 = OpIAdd %int %16 %int_1
%18 = OpSRem %int %int_1 %17
%19 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_1
OpStore %19 %18
OpReturn
OpFunctionEnd
)asm";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_ASM);
}
TEST_F(PositiveGpuAVShaderSanitizer, ImageGather) {
SetTargetApiVersion(VK_API_VERSION_1_2);
RETURN_IF_SKIP(InitGpuAvFramework());
RETURN_IF_SKIP(InitState());
// if (condition != 0) {
// result = textureGather(tex, vec2(0), -1);
// }
const char* cs_source = R"asm(
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main" %_ %tex
OpExecutionMode %main LocalSize 1 1 1
OpDecorate %SSBO Block
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %SSBO 1 Offset 16
OpDecorate %_ Binding 1
OpDecorate %_ DescriptorSet 0
OpDecorate %tex Binding 0
OpDecorate %tex DescriptorSet 0
%void = OpTypeVoid
%4 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%SSBO = OpTypeStruct %uint %v4float
%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
%_ = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%int_1 = OpConstant %int 1
%int_n1 = OpConstant %int -1
%24 = OpTypeImage %float 2D 0 0 0 1 Unknown
%25 = OpTypeSampledImage %24
%_ptr_UniformConstant_25 = OpTypePointer UniformConstant %25
%tex = OpVariable %_ptr_UniformConstant_25 UniformConstant
%v2float = OpTypeVector %float 2
%float_0 = OpConstant %float 0
%31 = OpConstantComposite %v2float %float_0 %float_0
%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
%main = OpFunction %void None %4
%6 = OpLabel
%16 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
%17 = OpLoad %uint %16
%20 = OpINotEqual %bool %17 %uint_0
OpSelectionMerge %22 None
OpBranchConditional %20 %21 %22
%21 = OpLabel
%28 = OpLoad %25 %tex
%32 = OpImageGather %v4float %28 %31 %int_n1
%34 = OpAccessChain %_ptr_StorageBuffer_v4float %_ %int_1
OpStore %34 %32
OpBranch %22
%22 = OpLabel
OpReturn
OpFunctionEnd
)asm";
OneOffDescriptorSet descriptor_set(m_device,
{
{0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_ALL, nullptr},
{1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr},
});
const vkt::PipelineLayout pipeline_layout(*m_device, {&descriptor_set.layout_});
CreateComputePipelineHelper pipe(*this);
pipe.cp_ci_.layout = pipeline_layout;
pipe.cs_ = VkShaderObj(*m_device, cs_source, VK_SHADER_STAGE_COMPUTE_BIT, SPV_ENV_VULKAN_1_2, SPV_SOURCE_ASM);
pipe.CreateComputePipeline();
vkt::Buffer buffer(*m_device, 64, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, kHostVisibleMemProps);
void* in_ptr = buffer.Memory().Map();
memset(in_ptr, 0, 64);
vkt::Image image(*m_device, 16, 16, VK_FORMAT_B8G8R8A8_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT);
image.SetLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vkt::ImageView image_view = image.CreateView();
vkt::Sampler sampler(*m_device, SafeSaneSamplerCreateInfo());
descriptor_set.WriteDescriptorImageInfo(0, image_view, sampler);
descriptor_set.WriteDescriptorBufferInfo(1, buffer, 0, VK_WHOLE_SIZE, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
descriptor_set.UpdateDescriptorSets();
m_command_buffer.Begin();
vk::CmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipe);
vk::CmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 1, &descriptor_set.set_, 0,
nullptr);
vk::CmdDispatch(m_command_buffer, 1, 1, 1);
m_command_buffer.End();
m_default_queue->SubmitAndWait(m_command_buffer);
}
TEST_F(PositiveGpuAVShaderSanitizer, MultiplePass) {
TEST_DESCRIPTION("Make sure multiple functions passes work");
RETURN_IF_SKIP(InitGpuAvFramework());
RETURN_IF_SKIP(InitState());
const char* cs_source = R"glsl(
#version 450 core
layout(set=0, binding=0) buffer SSBO {
int a;
vec4 b;
float c;
};
layout(set=0, binding=1) uniform sampler2D tex;
void main() {
a = 2 / a;
vec4 b = textureGather(tex, vec2(0), 0);
c = mod(4.0, c);
float d = pow(c + 50.0f, 1.0f);
float e = atan(c, 1.0f);
float f = min(d, e) + max(e, 1.0);
}
)glsl";
OneOffDescriptorSet descriptor_set(m_device,
{
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr},
{1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_ALL, nullptr},
});
const vkt::PipelineLayout pipeline_layout(*m_device, {&descriptor_set.layout_});
CreateComputePipelineHelper pipe(*this);
pipe.cp_ci_.layout = pipeline_layout;
pipe.cs_ = VkShaderObj(*m_device, cs_source, VK_SHADER_STAGE_COMPUTE_BIT, SPV_ENV_VULKAN_1_1);
pipe.CreateComputePipeline();
}
TEST_F(PositiveGpuAVShaderSanitizer, Pow) {
const char* cs_source = R"glsl(
#version 460
layout(set=0, binding=0) buffer SSBO {
float x;
float y;
float result;
vec3 resultVec;
};
void main() {
result = pow(x + 1.0f, y + 1.0f);
vec3 a = vec3(x) + vec3(1.0f);
vec3 b = vec3(y) + vec3(1.0f);
resultVec = pow(a, b);
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, PowConstant) {
const char* cs_source = R"glsl(
#version 460
layout(set=0, binding=0) buffer SSBO {
float x;
float y;
float result;
vec3 resultVec;
};
void main() {
result = pow(1.0f, 1.0f);
resultVec = pow(vec3(1.0f), vec3(1.0f));
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, PowVectorMix) {
const char* cs_source = R"glsl(
#version 450 core
layout(set=0, binding=0) buffer SSBO {
vec3 x;
vec3 y;
vec3 result;
};
void main() {
x += vec3(1.0f, 0.0f, 1.0f);
y += vec3(0.0f, 1.0f, 0.0f);
result = pow(x, y);
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, PowFloat16) {
SetTargetApiVersion(VK_API_VERSION_1_1);
AddRequiredExtensions(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
AddRequiredFeature(vkt::Feature::shaderFloat16);
AddRequiredFeature(vkt::Feature::storageBuffer16BitAccess);
const char* cs_source = R"glsl(
#version 450 core
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
layout(set = 0, binding = 0) buffer UBO {
float16_t x;
float16_t y;
float result;
};
void main() {
result = float(pow(x, float16_t(1.0)));
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, Atan2) {
const char* cs_source = R"glsl(
#version 460
layout(set=0, binding=0) buffer SSBO {
float x;
float y;
float result;
vec3 resultVec;
};
void main() {
result = atan(x + 1.0f, y + 1.0f);
vec3 a = vec3(x) + vec3(1.0f);
vec3 b = vec3(y) + vec3(1.0f);
resultVec = atan(a, b);
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, Atan2Constant) {
const char* cs_source = R"glsl(
#version 460
layout(set=0, binding=0) buffer SSBO {
float x;
float y;
float result;
vec3 resultVec;
};
void main() {
result = atan(0.0f, 1.0f);
resultVec = atan(vec3(1.0f), vec3(0.0f));
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, Atan2VectorMix) {
const char* cs_source = R"glsl(
#version 450 core
layout(set=0, binding=0) buffer SSBO {
vec3 x;
vec3 y;
vec3 result;
};
void main() {
x += vec3(1.0f, 0.0f, 1.0f);
y += vec3(0.0f, 1.0f, 0.0f);
result = atan(x, y);
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, FMinMax) {
const char* cs_source = R"glsl(
#version 450 core
layout(set=0, binding=0) buffer SSBO {
vec3 x;
vec3 y;
vec3 result;
};
void main() {
result = min(x, y) + max(y.y, 0.0);
}
)glsl";
SimpleZeroComputeTest(cs_source, SPV_SOURCE_GLSL);
}
TEST_F(PositiveGpuAVShaderSanitizer, CoopMatAlignedStride) {
TEST_DESCRIPTION("OpCooperativeMatrixLoadKHR with a properly aligned stride (fp16, component_size=2)");
RETURN_IF_SKIP(InitCoopMatFp16());
const char* cs_source = R"glsl(
#version 450 core
#pragma use_vulkan_memory_model
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_KHR_cooperative_matrix : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
layout(local_size_x = 64) in;
layout(set=0, binding=0) coherent buffer SSBO { float16_t payload[]; };
layout(set=0, binding=1) buffer ParamSSBO { uint stride_val; uint offset_val; };
void main() {
coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> matA;
coopMatLoad(matA, payload, offset_val, stride_val, gl_CooperativeMatrixLayoutRowMajor);
coopMatStore(matA, payload, offset_val, stride_val, gl_CooperativeMatrixLayoutRowMajor);
}
)glsl";
CoopMatAlignmentTest(cs_source, {8, 8}, false);
}
TEST_F(PositiveGpuAVShaderSanitizer, CoopMatAlignedPointerBDA) {
TEST_DESCRIPTION("OpCooperativeMatrixLoadKHR with a BDA pointer that is properly aligned");
SetTargetApiVersion(VK_API_VERSION_1_3);
AddRequiredFeature(vkt::Feature::bufferDeviceAddress);
AddRequiredFeature(vkt::Feature::shaderInt64);
RETURN_IF_SKIP(InitCoopMatFp16());
const char* cs_source = R"glsl(
#version 450 core
#pragma use_vulkan_memory_model
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_KHR_cooperative_matrix : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_buffer_reference : enable
layout(local_size_x = 64) in;
layout(buffer_reference, std430) buffer BufRef { float16_t data[]; };
layout(set=0, binding=0) buffer AddrSSBO { uint64_t addr; };
void main() {
BufRef buf = BufRef(addr);
coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> matA;
coopMatLoad(matA, buf.data, 0, 16, gl_CooperativeMatrixLayoutRowMajor);
}
)glsl";
CreateComputePipelineHelper pipe(*this);
pipe.dsl_bindings_ = {{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr}};
pipe.cs_ = VkShaderObj(*m_device, cs_source, VK_SHADER_STAGE_COMPUTE_BIT, SPV_ENV_VULKAN_1_2);
pipe.CreateComputePipeline();
vkt::Buffer payload_buffer(*m_device, 4096, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, vkt::device_address);
VkDeviceAddress aligned_addr = payload_buffer.Address();
vkt::Buffer addr_buffer(*m_device, sizeof(VkDeviceAddress), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, kHostVisibleMemProps);
auto* addr_ptr = static_cast<VkDeviceAddress*>(addr_buffer.Memory().Map());
*addr_ptr = aligned_addr;
addr_buffer.Memory().Unmap();
pipe.descriptor_set_.WriteDescriptorBufferInfo(0, addr_buffer, 0, VK_WHOLE_SIZE, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
pipe.descriptor_set_.UpdateDescriptorSets();
m_command_buffer.Begin();
vk::CmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipe);
vk::CmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipe.pipeline_layout_, 0, 1,
&pipe.descriptor_set_.set_, 0, nullptr);
vk::CmdDispatch(m_command_buffer, 1, 1, 1);
m_command_buffer.End();
m_default_queue->SubmitAndWait(m_command_buffer);
}