17
17
#include " Vulkan/VkDebug.hpp"
18
18
#include " Vulkan/VkPipelineLayout.hpp"
19
19
20
+ namespace
21
+ {
22
+ enum { X, Y, Z };
23
+ } // anonymous namespace
24
+
20
25
namespace sw
21
26
{
22
27
ComputeProgram::ComputeProgram (SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout)
@@ -54,95 +59,94 @@ namespace sw
54
59
const int subgroupSize = SIMD::Width;
55
60
56
61
// Total number of invocations required to execute this workgroup.
57
- int numInvocations = localSize[0 ] * localSize[1 ] * localSize[2 ];
62
+ int numInvocations = localSize[X ] * localSize[Y ] * localSize[Z ];
58
63
59
64
Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET (Data, numWorkgroups));
60
65
Int4 workgroupID = *Pointer<Int4>(data + OFFSET (Data, workgroupID));
61
- Int4 workgroupSize = Int4 (localSize[0 ], localSize[1 ], localSize[2 ], 0 );
66
+ Int4 workgroupSize = Int4 (localSize[X ], localSize[Y ], localSize[Z ], 0 );
62
67
Int numSubgroups = (numInvocations + subgroupSize - 1 ) / subgroupSize;
63
68
64
- setInputBuiltin (spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
69
+ setInputBuiltin (spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
65
70
{
66
71
for (uint32_t component = 0 ; component < builtin.SizeInComponents ; component++)
67
72
{
68
73
value[builtin.FirstComponent + component] =
69
- As<Float4>( Int4 (Extract (numWorkgroups, component)));
74
+ As<SIMD::Float>( SIMD::Int (Extract (numWorkgroups, component)));
70
75
}
71
76
});
72
77
73
- setInputBuiltin (spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
78
+ setInputBuiltin (spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
74
79
{
75
80
for (uint32_t component = 0 ; component < builtin.SizeInComponents ; component++)
76
81
{
77
82
value[builtin.FirstComponent + component] =
78
- As<Float4>( Int4 (Extract (workgroupSize, component)));
83
+ As<SIMD::Float>( SIMD::Int (Extract (workgroupSize, component)));
79
84
}
80
85
});
81
86
82
- setInputBuiltin (spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
87
+ setInputBuiltin (spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
83
88
{
84
89
ASSERT (builtin.SizeInComponents == 1 );
85
- value[builtin.FirstComponent ] = As<Float4>( Int4 (numSubgroups));
90
+ value[builtin.FirstComponent ] = As<SIMD::Float>( SIMD::Int (numSubgroups));
86
91
});
87
92
88
- setInputBuiltin (spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
93
+ setInputBuiltin (spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
89
94
{
90
95
ASSERT (builtin.SizeInComponents == 1 );
91
- value[builtin.FirstComponent ] = As<Float4>( Int4 (subgroupSize));
96
+ value[builtin.FirstComponent ] = As<SIMD::Float>( SIMD::Int (subgroupSize));
92
97
});
93
98
94
- setInputBuiltin (spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
99
+ setInputBuiltin (spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
95
100
{
96
101
ASSERT (builtin.SizeInComponents == 1 );
97
- value[builtin.FirstComponent ] = As<Float4>( Int4 (0 , 1 , 2 , 3 ));
102
+ value[builtin.FirstComponent ] = As<SIMD::Float>( SIMD::Int (0 , 1 , 2 , 3 ));
98
103
});
99
104
100
- enum { XXXX, YYYY, ZZZZ };
101
-
102
105
For (Int subgroupIndex = 0 , subgroupIndex < numSubgroups, subgroupIndex++)
103
106
{
104
- Int4 localInvocationIndex = Int4 (subgroupIndex * 4 ) + Int4 (0 , 1 , 2 , 3 );
107
+ // TODO: Replace SIMD::Int(0, 1, 2, 3) with SIMD-width equivalent
108
+ auto localInvocationIndex = SIMD::Int (subgroupIndex * SIMD::Width) + SIMD::Int (0 , 1 , 2 , 3 );
105
109
106
110
// Disable lanes where (invocationIDs >= numInvocations)
107
- routine.activeLaneMask = CmpLT (localInvocationIndex, Int4 (numInvocations));
111
+ routine.activeLaneMask = CmpLT (localInvocationIndex, SIMD::Int (numInvocations));
108
112
109
- Int4 localInvocationID[3 ];
113
+ SIMD::Int localInvocationID[3 ];
110
114
{
111
- Int4 idx = localInvocationIndex;
112
- localInvocationID[ZZZZ ] = idx / Int4 (localSize[0 ] * localSize[1 ]);
113
- idx -= localInvocationID[ZZZZ ] * Int4 (localSize[0 ] * localSize[1 ]); // modulo
114
- localInvocationID[YYYY ] = idx / Int4 (localSize[0 ]);
115
- idx -= localInvocationID[YYYY ] * Int4 (localSize[0 ]); // modulo
116
- localInvocationID[XXXX ] = idx;
115
+ SIMD::Int idx = localInvocationIndex;
116
+ localInvocationID[Z ] = idx / SIMD::Int (localSize[X ] * localSize[Y ]);
117
+ idx -= localInvocationID[Z ] * SIMD::Int (localSize[X ] * localSize[Y ]); // modulo
118
+ localInvocationID[Y ] = idx / SIMD::Int (localSize[X ]);
119
+ idx -= localInvocationID[Y ] * SIMD::Int (localSize[X ]); // modulo
120
+ localInvocationID[X ] = idx;
117
121
}
118
122
119
- setInputBuiltin (spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
123
+ setInputBuiltin (spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
120
124
{
121
125
ASSERT (builtin.SizeInComponents == 1 );
122
- value[builtin.FirstComponent ] = As<Float4 >(localInvocationIndex);
126
+ value[builtin.FirstComponent ] = As<SIMD::Float >(localInvocationIndex);
123
127
});
124
128
125
- setInputBuiltin (spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
129
+ setInputBuiltin (spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
126
130
{
127
131
ASSERT (builtin.SizeInComponents == 1 );
128
- value[builtin.FirstComponent ] = As<Float4>( Int4 (subgroupIndex));
132
+ value[builtin.FirstComponent ] = As<SIMD::Float>( SIMD::Int (subgroupIndex));
129
133
});
130
134
131
- setInputBuiltin (spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
135
+ setInputBuiltin (spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
132
136
{
133
137
for (uint32_t component = 0 ; component < builtin.SizeInComponents ; component++)
134
138
{
135
- value[builtin.FirstComponent + component] = As<Float4 >(localInvocationID[component]);
139
+ value[builtin.FirstComponent + component] = As<SIMD::Float >(localInvocationID[component]);
136
140
}
137
141
});
138
142
139
- setInputBuiltin (spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)
143
+ setInputBuiltin (spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)
140
144
{
141
- Int4 localBase = workgroupID * workgroupSize;
145
+ auto localBase = workgroupID * workgroupSize;
142
146
for (uint32_t component = 0 ; component < builtin.SizeInComponents ; component++)
143
147
{
144
- Int4 globalInvocationID = Int4 (Extract (localBase, component)) + localInvocationID[component];
145
- value[builtin.FirstComponent + component] = As<Float4 >(globalInvocationID);
148
+ auto globalInvocationID = SIMD::Int (Extract (localBase, component)) + localInvocationID[component];
149
+ value[builtin.FirstComponent + component] = As<SIMD::Float >(globalInvocationID);
146
150
}
147
151
});
148
152
@@ -151,7 +155,7 @@ namespace sw
151
155
}
152
156
}
153
157
154
- void ComputeProgram::setInputBuiltin (spv::BuiltIn id, std::function<void (const SpirvShader::BuiltinMapping& builtin, Array<Float4 >& value)> cb)
158
+ void ComputeProgram::setInputBuiltin (spv::BuiltIn id, std::function<void (const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float >& value)> cb)
155
159
{
156
160
auto it = shader->inputBuiltins .find (id);
157
161
if (it != shader->inputBuiltins .end ())
@@ -170,21 +174,21 @@ namespace sw
170
174
171
175
Data data;
172
176
data.descriptorSets = descriptorSets;
173
- data.numWorkgroups [0 ] = groupCountX;
174
- data.numWorkgroups [1 ] = groupCountY;
175
- data.numWorkgroups [2 ] = groupCountZ;
177
+ data.numWorkgroups [X ] = groupCountX;
178
+ data.numWorkgroups [Y ] = groupCountY;
179
+ data.numWorkgroups [Z ] = groupCountZ;
176
180
data.numWorkgroups [3 ] = 0 ;
177
181
178
182
// TODO(bclayton): Split work across threads.
179
183
for (uint32_t groupZ = 0 ; groupZ < groupCountZ; groupZ++)
180
184
{
181
- data.workgroupID [2 ] = groupZ;
185
+ data.workgroupID [Z ] = groupZ;
182
186
for (uint32_t groupY = 0 ; groupY < groupCountY; groupY++)
183
187
{
184
- data.workgroupID [1 ] = groupY;
188
+ data.workgroupID [Y ] = groupY;
185
189
for (uint32_t groupX = 0 ; groupX < groupCountX; groupX++)
186
190
{
187
- data.workgroupID [0 ] = groupX;
191
+ data.workgroupID [X ] = groupX;
188
192
runWorkgroup (&data);
189
193
}
190
194
}
0 commit comments