Skip quadBroadcast/quadSwap split tests when subgroup size < 8 (#4656)
When the implementation selects a subgroup size < 8 for the test's
workgroup, the split predicate `id < subgroupSize / 2` bisects the
only quad in the subgroup, leaving no fully active quad — which is
undefined behavior for quad operations. This is observed on WARP
(which selects its native D3D12 wave size: 4 on arm64 NEON, often 4
on x86 for small workgroups) and may occur on any implementation
that picks a small native subgroup size at runtime.
Two coordinated guards:
* In the shader, the quad call is wrapped in
`if subgroupSize >= 8u { ... }` so it never executes when the
split predicate would be unsafe.
* In the JS checker, the actual subgroupSize is read out of
metadata.subgroup_size[0] and the test is skipped with t.skip
when it is < 8, so the missing output doesn't get flagged as a
failure.
Querying GPUAdapterInfo.subgroupMinSize would not be sufficient: the
size the implementation actually selects depends on the shader (its
workgroup size, register pressure, etc.), not just the adapter's
minimum supported size. Reading subgroupSize from inside the test
shader itself is the only reliable signal.diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts
index 4d55661..8556a3d 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts
@@ -326,6 +326,13 @@
const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
const testcase = kPredicateCases[t.params.predicate];
+ // Quad operations require a fully active quad. If the implementation
+ // selects a subgroup size < 8 for this workgroup, the split predicate
+ // (`id < subgroupSize / 2`) bisects the only quad, leaving no fully
+ // active quad (undefined behavior). The shader reads subgroupSize and
+ // skips the quad call when it would be unsafe; the checker observes
+ // the actual selected size in metadata and skips the test entirely so
+ // it doesn't get flagged as a regression.
const wgsl = `
enable subgroups;
@@ -363,9 +370,13 @@
metadata.id[lid] = id;
metadata.subgroup_size[lid] = subgroupSize;
- if ${testcase.cond} {
- let b = quadBroadcast(lid, ${t.params.id});
- output.results[lid] = b;
+ // Only run the quad op when a (subgroupSize / 2) split predicate is
+ // guaranteed to keep every quad fully active. See checker for skip.
+ if subgroupSize >= 8u {
+ if ${testcase.cond} {
+ let b = quadBroadcast(lid, ${t.params.id});
+ output.results[lid] = b;
+ }
}
}`;
@@ -377,6 +388,15 @@
uintsPerOutput,
new Uint32Array([0]), // unused
(metadata: Uint32Array, output: Uint32Array) => {
+ const bound = Math.floor(output.length / 2);
+ // metadata layout: [id, ..., subgroup_size, ...]. The first entry
+ // of the second half is the subgroupSize of invocation 0.
+ if (metadata[bound] < 8) {
+ t.skip(
+ `Implementation selected subgroup size ${metadata[bound]}; a split ` +
+ `predicate would leave no fully active quad (undefined behavior).`
+ );
+ }
return checkBroadcastCompute(metadata, output, t.params.id, testcase.filter);
}
);
diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts
index 08f1b75..93983d1 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts
@@ -345,6 +345,13 @@
const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
const testcase = kPredicateCases[t.params.predicate];
+ // Quad operations require a fully active quad. If the implementation
+ // selects a subgroup size < 8 for this workgroup, the split predicate
+ // (`id < subgroupSize / 2`) bisects the only quad, leaving no fully
+ // active quad (undefined behavior). The shader reads subgroupSize and
+ // skips the quad call when it would be unsafe; the checker observes
+ // the actual selected size in metadata and skips the test entirely so
+ // it doesn't get flagged as a regression.
const wgsl = `
enable subgroups;
@@ -382,9 +389,13 @@
metadata.id[lid] = id;
metadata.subgroup_size[lid] = subgroupSize;
- if ${testcase.cond} {
- let b = ${t.params.op}(lid);
- output.results[lid] = b;
+ // Only run the quad op when a (subgroupSize / 2) split predicate is
+ // guaranteed to keep every quad fully active. See checker for skip.
+ if subgroupSize >= 8u {
+ if ${testcase.cond} {
+ let b = ${t.params.op}(lid);
+ output.results[lid] = b;
+ }
}
}`;
@@ -396,6 +407,15 @@
uintsPerOutput,
new Uint32Array([0]), // unused
(metadata: Uint32Array, output: Uint32Array) => {
+ const bound = Math.floor(output.length / 2);
+ // metadata layout: [id, ..., subgroup_size, ...]. The first entry
+ // of the second half is the subgroupSize of invocation 0.
+ if (metadata[bound] < 8) {
+ t.skip(
+ `Implementation selected subgroup size ${metadata[bound]}; a split ` +
+ `predicate would leave no fully active quad (undefined behavior).`
+ );
+ }
return checkSwapCompute(metadata, output, t.params.op, testcase.filter);
}
);