Skip to content

Commit 4f12626

Browse files
refactor: pass extra walker params
Signed-off-by: Bartosz Dunajski <[email protected]>
1 parent b155875 commit 4f12626

29 files changed

+134
-43
lines changed

level_zero/api/core/ze_module_api_entrypoints.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ ze_result_t zeKernelSuggestGroupSize(
103103
ze_result_t zeKernelSuggestMaxCooperativeGroupCount(
104104
ze_kernel_handle_t hKernel,
105105
uint32_t *totalGroupCount) {
106-
return L0::Kernel::fromHandle(hKernel)->suggestMaxCooperativeGroupCount(totalGroupCount, NEO::EngineGroupType::compute, false);
106+
*totalGroupCount = L0::Kernel::fromHandle(hKernel)->suggestMaxCooperativeGroupCount(NEO::EngineGroupType::compute, false, false);
107+
return ZE_RESULT_SUCCESS;
107108
}
108109

109110
ze_result_t zeKernelSetArgumentValue(

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2758,12 +2758,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
27582758
template <GFXCORE_FAMILY gfxCoreFamily>
27592759
ze_result_t CommandListCoreFamily<gfxCoreFamily>::programSyncBuffer(Kernel &kernel, NEO::Device &device,
27602760
const ze_group_count_t &threadGroupDimensions) {
2761-
uint32_t maximalNumberOfWorkgroupsAllowed;
2762-
auto ret = kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, this->engineGroupType,
2763-
device.isEngineInstanced());
2764-
UNRECOVERABLE_IF(ret != ZE_RESULT_SUCCESS);
2765-
size_t requestedNumberOfWorkgroups = (threadGroupDimensions.groupCountX * threadGroupDimensions.groupCountY *
2766-
threadGroupDimensions.groupCountZ);
2761+
uint32_t maximalNumberOfWorkgroupsAllowed = kernel.suggestMaxCooperativeGroupCount(this->engineGroupType, device.isEngineInstanced(), false);
2762+
2763+
size_t requestedNumberOfWorkgroups = (threadGroupDimensions.groupCountX * threadGroupDimensions.groupCountY * threadGroupDimensions.groupCountZ);
27672764
if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) {
27682765
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
27692766
}

level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
191191
std::list<void *> additionalCommands;
192192

193193
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect);
194+
195+
auto maxWgCountPerTile = kernel->suggestMaxCooperativeGroupCount(this->engineGroupType, device->getNEODevice()->isEngineInstanced(), true);
196+
194197
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
195198
0, // eventAddress
196199
static_cast<uint64_t>(Event::STATE_SIGNALED), // postSyncImmValue
@@ -212,6 +215,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
212215
launchParams.additionalSizeParam, // additionalSizeParam
213216
0, // partitionCount
214217
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
218+
maxWgCountPerTile, // maxWgCountPerTile
215219
NEO::ThreadArbitrationPolicy::NotPresent, // defaultPipelinedThreadArbitrationPolicy
216220
launchParams.isIndirect, // isIndirect
217221
launchParams.isPredicate, // isPredicate

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
345345
}
346346
}
347347

348+
auto maxWgCountPerTile = kernel->suggestMaxCooperativeGroupCount(this->engineGroupType, device->getNEODevice()->isEngineInstanced(), true);
349+
348350
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
349351
eventAddress, // eventAddress
350352
static_cast<uint64_t>(Event::STATE_SIGNALED), // postSyncImmValue
@@ -366,6 +368,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
366368
launchParams.additionalSizeParam, // additionalSizeParam
367369
this->partitionCount, // partitionCount
368370
launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace
371+
maxWgCountPerTile, // maxWgCountPerTile
369372
this->defaultPipelinedThreadArbitrationPolicy, // defaultPipelinedThreadArbitrationPolicy
370373
launchParams.isIndirect, // isIndirect
371374
launchParams.isPredicate, // isPredicate

level_zero/core/source/kernel/kernel.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
144144
virtual void patchGlobalOffset() = 0;
145145
virtual void patchRegionParams(const CmdListKernelLaunchParams &launchParams) = 0;
146146

147-
virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
148-
bool isEngineInstanced) = 0;
147+
virtual uint32_t suggestMaxCooperativeGroupCount(NEO::EngineGroupType engineGroupType, bool isEngineInstanced, bool forceSingleTileQuery) = 0;
149148
virtual ze_result_t setCacheConfig(ze_cache_config_flags_t flags) = 0;
150149

151150
virtual ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) = 0;

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -477,8 +477,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
477477
return ZE_RESULT_SUCCESS;
478478
}
479479

480-
ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
481-
bool isEngineInstanced) {
480+
uint32_t KernelImp::suggestMaxCooperativeGroupCount(NEO::EngineGroupType engineGroupType, bool isEngineInstanced, bool forceSingleTileQuery) {
482481
UNRECOVERABLE_IF(0 == groupSize[0]);
483482
UNRECOVERABLE_IF(0 == groupSize[1]);
484483
UNRECOVERABLE_IF(0 == groupSize[2]);
@@ -496,20 +495,18 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
496495
bool platformImplicitScaling = helper.platformSupportsImplicitScaling(rootDeviceEnvironment);
497496
auto deviceBitfield = module->getDevice()->getNEODevice()->getDeviceBitfield();
498497

499-
if (NEO::ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, platformImplicitScaling)) {
498+
if (!forceSingleTileQuery && NEO::ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, platformImplicitScaling)) {
500499
numSubDevicesForExecution = static_cast<uint32_t>(deviceBitfield.count());
501500
}
502501

503-
*totalGroupCount = NEO::KernelHelper::getMaxWorkGroupCount(rootDeviceEnvironment,
504-
descriptor,
505-
numSubDevicesForExecution,
506-
usedSlmSize,
507-
workDim,
508-
localWorkSize,
509-
engineGroupType,
510-
isEngineInstanced);
511-
512-
return ZE_RESULT_SUCCESS;
502+
return NEO::KernelHelper::getMaxWorkGroupCount(rootDeviceEnvironment,
503+
descriptor,
504+
numSubDevicesForExecution,
505+
usedSlmSize,
506+
workDim,
507+
localWorkSize,
508+
engineGroupType,
509+
isEngineInstanced);
513510
}
514511

515512
ze_result_t KernelImp::setIndirectAccess(ze_kernel_indirect_access_flags_t flags) {

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ struct KernelImp : Kernel {
6969

7070
ze_result_t getKernelName(size_t *pSize, char *pName) override;
7171

72-
ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
73-
bool isEngineInstanced) override;
72+
uint32_t suggestMaxCooperativeGroupCount(NEO::EngineGroupType engineGroupType, bool isEngineInstanced, bool forceSingleTileQuery) override;
7473

7574
const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); }
7675
uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; }

level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2023 Intel Corporation
2+
* Copyright (C) 2023-2024 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -44,8 +44,7 @@ uint32_t KernelImpSuggestMaxCooperativeGroupCountFixture::getMaxWorkGroupCount()
4444
kernel.groupSize[0] = lws[0];
4545
kernel.groupSize[1] = lws[1];
4646
kernel.groupSize[2] = lws[2];
47-
uint32_t totalGroupCount = 0;
48-
kernel.KernelImp::suggestMaxCooperativeGroupCount(&totalGroupCount, NEO::EngineGroupType::cooperativeCompute, true);
47+
uint32_t totalGroupCount = kernel.KernelImp::suggestMaxCooperativeGroupCount(NEO::EngineGroupType::cooperativeCompute, true, false);
4948
return totalGroupCount;
5049
}
5150
} // namespace ult

level_zero/core/test/unit_tests/gen9/test_cmdlist_create_gen9.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
1919
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
2020
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
21+
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
2122

2223
#include <vector>
2324

@@ -44,6 +45,7 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
4445
DeviceFixture::tearDown();
4546
}
4647

48+
std::vector<std::unique_ptr<L0::ult::Module>> mockModules;
4749
std::vector<void *> isaBuffers;
4850
ze_group_count_t dispatchKernelArguments;
4951
void *buffer = nullptr;
@@ -85,6 +87,13 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
8587
kernel.perThreadDataSize = perThreadDataSize;
8688

8789
kernel.kernelImmData = &kernelData;
90+
91+
kernel.groupSize[0] = 1;
92+
kernel.groupSize[1] = 1;
93+
kernel.groupSize[2] = 1;
94+
95+
mockModules.emplace_back(std::make_unique<L0::ult::Module>(device, nullptr, ModuleType::builtin));
96+
kernel.module = mockModules.back().get();
8897
}
8998
void cleanupKernel(WhiteBox<::L0::KernelImmutableData> &kernelData) {
9099
kernelData.isaGraphicsAllocation.reset(nullptr);

level_zero/core/test/unit_tests/mocks/mock_kernel.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ Mock<::L0::KernelImp>::Mock() : BaseClass() {
4242
immutableData.kernelDescriptor = &descriptor;
4343
immutableData.kernelInfo = &info;
4444
crossThreadData.reset(new uint8_t[100]);
45+
46+
groupSize[0] = 1;
47+
groupSize[1] = 1;
48+
groupSize[2] = 1;
4549
}
4650
Mock<::L0::KernelImp>::~Mock() {
4751
delete immutableData.isaGraphicsAllocation.release();

0 commit comments

Comments
 (0)