diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 77856a2a4d9a1..988a3857e3070 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -342,10 +342,16 @@ event_impl::get_info() { if (MState == HES_Discarded) return info::event_command_status::ext_oneapi_unknown; - if (!MHostEvent && MEvent) { - return get_event_info( - this->getHandleRef(), this->getPlugin()); + if (!MHostEvent) { + // Command is enqueued and PiEvent is ready + if (MEvent) + return get_event_info( + this->getHandleRef(), this->getPlugin()); + // Command is blocked and not enqueued, PiEvent is not assigned yet + else if (MCommand) + return sycl::info::event_command_status::submitted; } + return MHostEvent && MState.load() != HES_Complete ? sycl::info::event_command_status::submitted : info::event_command_status::complete; diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index bc343ea662413..256f2cea18372 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -212,6 +212,11 @@ class event_impl { } bool needsCleanupAfterWait() { return MNeedsCleanupAfterWait; } + /// Returns worker queue for command. + /// + /// @return a reference to MWorkerQueue. + QueueImplPtr &getWorkerQueue() { return MWorkerQueue; }; + /// Checks if an event is in a fully intialized state. Default-constructed /// events will return true only after having initialized its native event, /// while other events will assume that they are fully initialized at @@ -243,6 +248,8 @@ class event_impl { std::weak_ptr MQueue; const bool MIsProfilingEnabled = false; + QueueImplPtr MWorkerQueue; + /// Dependency events prepared for waiting by backend. std::vector MPreparedDepsEvents; std::vector MPreparedHostDepsEvents; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ec98c3ac3d8b6..3e3ec40bc2fa9 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -120,9 +120,7 @@ class queue_impl { } if (!MHostQueue) { const QueueOrder QOrder = - MPropList.has_property() - ? QueueOrder::Ordered - : QueueOrder::OOO; + MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO; MQueues.push_back(createQueue(QOrder)); } } @@ -202,6 +200,8 @@ class queue_impl { /// \return true if this queue has discard_events support. bool has_discard_events_support() const { return MHasDiscardEventsSupport; } + bool isInOrder() const { return MIsInorder; } + /// Queries SYCL queue for information. /// /// The return type depends on information being queried. diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 800fa40490015..445eb1fa79879 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -203,17 +203,34 @@ static std::string commandToName(Command::CommandType Type) { } #endif -static std::vector -getPiEvents(const std::vector &EventImpls) { +std::vector +Command::getPiEvents(const std::vector &EventImpls) const { std::vector RetPiEvents; for (auto &EventImpl : EventImpls) { - if (EventImpl->getHandleRef() != nullptr) - RetPiEvents.push_back(EventImpl->getHandleRef()); + if (EventImpl->getHandleRef() == nullptr) + continue; + + // Do not add redundant event dependencies for in-order queues. + // At this stage dependency is definitely pi task and need to check if + // current one is a host task. In this case we should not skip pi event due + // to different sync mechanisms for different task types on in-order queue. + const QueueImplPtr &WorkerQueue = getWorkerQueue(); + if (EventImpl->getWorkerQueue() == WorkerQueue && + WorkerQueue->isInOrder() && !isHostTask()) + continue; + + RetPiEvents.push_back(EventImpl->getHandleRef()); } return RetPiEvents; } +bool Command::isHostTask() const { + return (MType == CommandType::RUN_CG) /* host task has this type also */ && + ((static_cast(this))->getCG().getType() == + CG::CGTYPE::CodeplayHostTask); +} + static void flushCrossQueueDeps(const std::vector &EventImpls, const QueueImplPtr &Queue) { for (auto &EventImpl : EventImpls) { @@ -240,7 +257,8 @@ class DispatchHostTask { // sophisticated waiting mechanism to allow to utilize this thread for any // other available job and resume once all required events are ready. for (auto &PluginWithEvents : RequiredEventsPerPlugin) { - std::vector RawEvents = getPiEvents(PluginWithEvents.second); + std::vector RawEvents = + MThisCmd->getPiEvents(PluginWithEvents.second); try { PluginWithEvents.first->call(RawEvents.size(), RawEvents.data()); @@ -393,10 +411,12 @@ void Command::waitForEvents(QueueImplPtr Queue, Command::Command(CommandType Type, QueueImplPtr Queue) : MQueue(std::move(Queue)), MEvent(std::make_shared(MQueue)), + MWorkerQueue(MEvent->getWorkerQueue()), MPreparedDepsEvents(MEvent->getPreparedDepsEvents()), MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents()), MType(Type) { MSubmittedQueue = MQueue; + MWorkerQueue = MQueue; MEvent->setCommand(this); MEvent->setContextImpl(MQueue->getContextImplPtr()); MEvent->setStateIncomplete(); @@ -600,12 +620,6 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, Command *ConnectionCmd = nullptr; - // Do not add redundant event dependencies for in-order queues. - if (Dep.MDepCommand && Dep.MDepCommand->getWorkerQueue() == WorkerQueue && - WorkerQueue->has_property() && - getType() != CommandType::HOST_TASK) - return nullptr; - ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match we'll connect them using host task if (DepEventContext != WorkerContext && !WorkerContext->is_host()) { @@ -621,14 +635,14 @@ const ContextImplPtr &Command::getWorkerContext() const { return MQueue->getContextImplPtr(); } -const QueueImplPtr &Command::getWorkerQueue() const { return MQueue; } +const QueueImplPtr &Command::getWorkerQueue() const { + assert(MWorkerQueue && "MWorkerQueue must not be nullptr"); + return MWorkerQueue; +} bool Command::producesPiEvent() const { return true; } -bool Command::supportsPostEnqueueCleanup() const { - // Isolated commands are cleaned up separately - return !MUsers.empty() || !MDeps.empty(); -} +bool Command::supportsPostEnqueueCleanup() const { return true; } Command *Command::addDep(DepDesc NewDep, std::vector &ToCleanUp) { Command *ConnectionCmd = nullptr; @@ -1298,6 +1312,9 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq, if (!MSrcQueue->is_host()) { MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } + + MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue; + emitInstrumentationDataProxy(); } @@ -1335,10 +1352,6 @@ const ContextImplPtr &MemCpyCommand::getWorkerContext() const { return getWorkerQueue()->getContextImplPtr(); } -const QueueImplPtr &MemCpyCommand::getWorkerQueue() const { - return MQueue->is_host() ? MSrcQueue : MQueue; -} - bool MemCpyCommand::producesPiEvent() const { // TODO remove this workaround once the batching issue is addressed in Level // Zero plugin. @@ -1481,6 +1494,8 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq, MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } + MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue; + emitInstrumentationDataProxy(); } @@ -1518,10 +1533,6 @@ const ContextImplPtr &MemCpyCommandHost::getWorkerContext() const { return getWorkerQueue()->getContextImplPtr(); } -const QueueImplPtr &MemCpyCommandHost::getWorkerQueue() const { - return MQueue->is_host() ? MSrcQueue : MQueue; -} - pi_int32 MemCpyCommandHost::enqueueImp() { const QueueImplPtr &Queue = getWorkerQueue(); waitForPreparedHostEvents(); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index bcbff35252704..d95f0e307456a 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -199,7 +199,7 @@ class Command { /// Get the queue this command will be submitted to. Could differ from MQueue /// for memory copy commands. - virtual const QueueImplPtr &getWorkerQueue() const; + const QueueImplPtr &getWorkerQueue() const; /// Returns true iff the command produces a PI event on non-host devices. virtual bool producesPiEvent() const; @@ -207,11 +207,20 @@ class Command { /// Returns true iff this command can be freed by post enqueue cleanup. virtual bool supportsPostEnqueueCleanup() const; + /// Collect PI events from EventImpls and filter out some of them in case of + /// in order queue + std::vector + getPiEvents(const std::vector &EventImpls) const; + + bool isHostTask() const; + protected: QueueImplPtr MQueue; QueueImplPtr MSubmittedQueue; EventImplPtr MEvent; + QueueImplPtr &MWorkerQueue; + /// Dependency events prepared for waiting by backend. /// See processDepEvent for details. std::vector &MPreparedDepsEvents; @@ -252,6 +261,10 @@ class Command { return MPreparedHostDepsEvents; } + const std::vector &getPreparedDepsEvents() const { + return MPreparedDepsEvents; + } + /// Contains list of dependencies(edges) std::vector MDeps; /// Contains list of commands that depend on the command. @@ -492,7 +505,6 @@ class MemCpyCommand : public Command { const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData() final; const ContextImplPtr &getWorkerContext() const final; - const QueueImplPtr &getWorkerQueue() const final; bool producesPiEvent() const final; private: @@ -517,7 +529,6 @@ class MemCpyCommandHost : public Command { const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData() final; const ContextImplPtr &getWorkerContext() const final; - const QueueImplPtr &getWorkerQueue() const final; private: pi_int32 enqueueImp() final; diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index 0b7865b5346fb..900fa713d58ce 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -58,23 +58,20 @@ bool Scheduler::GraphProcessor::enqueueCommand( return false; } - // Recursively enqueue all the dependencies first and - // exit immediately if any of the commands cannot be enqueued. - for (DepDesc &Dep : Cmd->MDeps) { - if (!enqueueCommand(Dep.MDepCommand, EnqueueResult, ToCleanUp, Blocking)) - return false; + // Recursively enqueue all the implicit + explicit backend level dependencies + // first and exit immediately if any of the commands cannot be enqueued. + for (const EventImplPtr &Event : Cmd->getPreparedDepsEvents()) { + if (Command *DepCmd = static_cast(Event->getCommand())) + if (!enqueueCommand(DepCmd, EnqueueResult, ToCleanUp, Blocking)) + return false; } - // Asynchronous host operations (amongst dependencies of an arbitrary command) - // are not supported (see Command::processDepEvent method). This impacts - // operation of host-task feature a lot with hangs and long-runs. Hence we - // have this workaround here. - // This workaround is safe as long as the only asynchronous host operation we - // have is a host task. - // This may iterate over some of dependencies in Cmd->MDeps. Though, the - // enqueue operation is idempotent and the second call will result in no-op. - // TODO remove the workaround when proper fix for host-task dispatching is - // implemented. + // Recursively enqueue all the implicit + explicit host dependencies and + // exit immediately if any of the commands cannot be enqueued. + // Host task execution is asynchronous. In current implementation enqueue for + // this command will wait till host task completion by waitInternal call on + // MHostDepsEvents. TO FIX: implement enqueue of blocked commands on host task + // completion stage and eliminate this event waiting in enqueue. for (const EventImplPtr &Event : Cmd->getPreparedHostDepsEvents()) { if (Command *DepCmd = static_cast(Event->getCommand())) if (!enqueueCommand(DepCmd, EnqueueResult, ToCleanUp, Blocking)) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index c76ada8a70b97..b43b53aa72dcf 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -157,12 +157,6 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, CleanUp(); std::rethrow_exception(std::current_exception()); } - - // If there are no memory dependencies decouple and free the command. - // Though, dismiss ownership of native kernel command group as it's - // resources may be in use by backend and synchronization point here is - // at native kernel execution finish. - CleanUp(); } } cleanupCommands(ToCleanUp); diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index 774ade8129200..8211d8ab0ef04 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -22,4 +22,5 @@ add_sycl_unittest(SchedulerTests OBJECT LeafLimitDiffContexts.cpp InOrderQueueSyncCheck.cpp RunOnHostIntelCG.cpp + EnqueueWithDependsOnDeps.cpp ) diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp new file mode 100644 index 0000000000000..d06df85f4d07a --- /dev/null +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -0,0 +1,396 @@ +//==------------ EnqueueWithDependsOnDeps.cpp --- Scheduler unit tests------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SchedulerTest.hpp" +#include "SchedulerTestUtils.hpp" +#include + +#include +#include +#include +#include + +#include + +using namespace sycl; +using EventImplPtr = std::shared_ptr; + +namespace DependsOnTest { +class MockHandlerCustom : public MockHandler { +public: + MockHandlerCustom(std::shared_ptr Queue, + bool IsHost) + : MockHandler(Queue, IsHost) {} + + std::unique_ptr finalize() { + std::unique_ptr CommandGroup; + switch (getType()) { + case sycl::detail::CG::Kernel: { + CommandGroup.reset(new sycl::detail::CGExecKernel( + getNDRDesc(), std::move(getHostKernel()), getKernel(), + std::move(MImpl->MKernelBundle), getArgsStorage(), getAccStorage(), + getSharedPtrStorage(), getRequirements(), getEvents(), getArgs(), + getKernelName(), getOSModuleHandle(), getStreamStorage(), + MImpl->MAuxiliaryResources, getCGType(), getCodeLoc())); + break; + } + case sycl::detail::CG::CodeplayHostTask: { + CommandGroup.reset(new detail::CGHostTask( + std::move(getHostTask()), getQueue(), getQueue()->getContextImplPtr(), + getArgs(), getArgsStorage(), getAccStorage(), getSharedPtrStorage(), + getRequirements(), getEvents(), getCGType(), getCodeLoc())); + break; + } + default: + throw sycl::runtime_error("Unhandled type of command group", + PI_ERROR_INVALID_OPERATION); + } + + return CommandGroup; + } +}; +} // namespace DependsOnTest +detail::Command *AddTaskCG(bool IsHost, MockScheduler &MS, + detail::QueueImplPtr DevQueue, + const std::vector &Events) { + std::vector ToEnqueue; + + // Emulating processing of command group function + DependsOnTest::MockHandlerCustom MockCGH(DevQueue, false); + + for (auto EventImpl : Events) + MockCGH.depends_on(detail::createSyclObjFromImpl(EventImpl)); + + if (IsHost) + MockCGH.host_task([] {}); + else { + kernel_bundle KernelBundle = + sycl::get_kernel_bundle( + DevQueue->get_context()); + auto ExecBundle = sycl::build(KernelBundle); + MockCGH.use_kernel_bundle(ExecBundle); + MockCGH.single_task>([] {}); + } + + std::unique_ptr CmdGroup = MockCGH.finalize(); + + detail::Command *NewCmd = + MS.addCG(std::move(CmdGroup), + IsHost ? MS.getDefaultHostQueue() : DevQueue, ToEnqueue); + EXPECT_EQ(ToEnqueue.size(), 0u); + return NewCmd; +} + +bool CheckTestExecutionRequirements(const platform &plt) { + if (plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return false; + } + // This test only contains device image for SPIR-V capable devices. + if (plt.get_backend() != sycl::backend::opencl && + plt.get_backend() != sycl::backend::ext_oneapi_level_zero) { + std::cout << "Only OpenCL and Level Zero are supported for this test\n"; + return false; + } + return true; +} + +inline constexpr auto DisablePostEnqueueCleanupName = + "SYCL_DISABLE_POST_ENQUEUE_CLEANUP"; + +TEST_F(SchedulerTest, EnqueueNoMemObjTwoHostTasks) { + // Checks enqueue of two dependent host tasks + + unittest::ScopedEnvVar DisabledCleanup{ + DisablePostEnqueueCleanupName, "1", + detail::SYCLConfig::reset}; + + default_selector Selector; + platform Plt{Selector}; + if (!CheckTestExecutionRequirements(Plt)) + return; + + queue QueueDev(context(Plt), Selector); + MockScheduler MS; + + detail::QueueImplPtr QueueDevImpl = detail::getSyclObjImpl(QueueDev); + detail::QueueImplPtr QueueHostImpl = MS.getDefaultHostQueue(); + + std::vector Events; + + detail::Command *Cmd1 = AddTaskCG(true, MS, QueueDevImpl, Events); + EventImplPtr Cmd1Event = Cmd1->getEvent(); + + // Simulate depends_on() call + Events.push_back(Cmd1Event); + detail::Command *Cmd2 = AddTaskCG(true, MS, QueueDevImpl, Events); + EventImplPtr Cmd2Event = Cmd2->getEvent(); + + detail::EnqueueResultT Result; + EXPECT_TRUE(MS.enqueueCommand(Cmd2, Result, detail::BlockingT::NON_BLOCKING)); + + // Preconditions for post enqueue checks + EXPECT_TRUE(Cmd1->isSuccessfullyEnqueued()); + EXPECT_TRUE(Cmd2->isSuccessfullyEnqueued()); + + Cmd2Event->wait(Cmd2Event); + EXPECT_EQ(Cmd1Event->get_info(), + info::event_command_status::complete); + EXPECT_EQ(Cmd2Event->get_info(), + info::event_command_status::complete); +} + +TEST_F(SchedulerTest, EnqueueNoMemObjKernelDepHost) { + // Checks enqueue of kernel depending on host task + unittest::ScopedEnvVar DisabledCleanup{ + DisablePostEnqueueCleanupName, "1", + detail::SYCLConfig::reset}; + + default_selector Selector; + platform Plt{Selector}; + if (!CheckTestExecutionRequirements(Plt)) + return; + + unittest::PiMock Mock{Plt}; + setupDefaultMockAPIs(Mock); + + queue QueueDev(context(Plt), Selector); + MockScheduler MS; + + detail::QueueImplPtr QueueDevImpl = detail::getSyclObjImpl(QueueDev); + + std::vector Events; + + detail::Command *Cmd1 = AddTaskCG(true, MS, QueueDevImpl, Events); + EventImplPtr Cmd1Event = Cmd1->getEvent(); + + // Simulate depends_on() call + Events.push_back(Cmd1Event); + detail::Command *Cmd2 = AddTaskCG(false, MS, QueueDevImpl, Events); + EventImplPtr Cmd2Event = Cmd2->getEvent(); + + detail::EnqueueResultT Result; + EXPECT_TRUE(MS.enqueueCommand(Cmd2, Result, detail::BlockingT::NON_BLOCKING)); + + // Preconditions for post enqueue checks + EXPECT_TRUE(Cmd1->isSuccessfullyEnqueued()); + EXPECT_TRUE(Cmd2->isSuccessfullyEnqueued()); + + Cmd2Event->wait(Cmd2Event); +} + +TEST_F(SchedulerTest, EnqueueNoMemObjHostDepKernel) { + // Checks enqueue of host task depending on kernel + unittest::ScopedEnvVar DisabledCleanup{ + DisablePostEnqueueCleanupName, "1", + detail::SYCLConfig::reset}; + + default_selector Selector; + platform Plt{Selector}; + if (!CheckTestExecutionRequirements(Plt)) + return; + + unittest::PiMock Mock{Plt}; + setupDefaultMockAPIs(Mock); + + queue QueueDev(context(Plt), Selector); + MockScheduler MS; + + detail::QueueImplPtr QueueDevImpl = detail::getSyclObjImpl(QueueDev); + + std::vector Events; + + detail::Command *Cmd1 = AddTaskCG(false, MS, QueueDevImpl, Events); + EventImplPtr Cmd1Event = Cmd1->getEvent(); + + // Simulate depends_on() call + Events.push_back(Cmd1Event); + detail::Command *Cmd2 = AddTaskCG(true, MS, QueueDevImpl, Events); + EventImplPtr Cmd2Event = Cmd2->getEvent(); + + detail::EnqueueResultT Result; + EXPECT_TRUE(MS.enqueueCommand(Cmd2, Result, detail::BlockingT::NON_BLOCKING)); + + // Preconditions for post enqueue checks + EXPECT_TRUE(Cmd1->isSuccessfullyEnqueued()); + EXPECT_TRUE(Cmd2->isSuccessfullyEnqueued()); + Cmd2Event->wait(Cmd2Event); +} + +TEST_F(SchedulerTest, EnqueueNoMemObjDoubleKernelDepHostBlocked) { + // Checks blocking command tranfer for dependent kernels and enqueue of root + // kernel on host task completion + unittest::ScopedEnvVar DisabledCleanup{ + DisablePostEnqueueCleanupName, "1", + detail::SYCLConfig::reset}; + + default_selector Selector; + platform Plt{Selector}; + if (!CheckTestExecutionRequirements(Plt)) + return; + + unittest::PiMock Mock{Plt}; + setupDefaultMockAPIs(Mock); + + queue QueueDev(context(Plt), Selector); + MockScheduler MS; + + detail::QueueImplPtr QueueDevImpl = detail::getSyclObjImpl(QueueDev); + + std::vector Events; + + detail::Command *Cmd1 = AddTaskCG(true, MS, QueueDevImpl, Events); + EventImplPtr Cmd1Event = Cmd1->getEvent(); + Cmd1->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; + + // Depends on host task + Events.push_back(Cmd1Event); + detail::Command *Cmd2 = AddTaskCG(false, MS, QueueDevImpl, Events); + EventImplPtr Cmd2Event = Cmd2->getEvent(); + + // Depends on kernel depending on host task + Events.clear(); + Events.push_back(Cmd2Event); + detail::Command *Cmd3 = AddTaskCG(false, MS, QueueDevImpl, Events); + EventImplPtr Cmd3Event = Cmd2->getEvent(); + + detail::EnqueueResultT Result; + EXPECT_FALSE( + MS.enqueueCommand(Cmd2, Result, detail::BlockingT::NON_BLOCKING)); + EXPECT_EQ(Result.MResult, detail::EnqueueResultT::SyclEnqueueBlocked); + EXPECT_EQ(Result.MCmd, static_cast(Cmd1)); + EXPECT_FALSE( + MS.enqueueCommand(Cmd3, Result, detail::BlockingT::NON_BLOCKING)); + EXPECT_EQ(Result.MResult, detail::EnqueueResultT::SyclEnqueueBlocked); + EXPECT_EQ(Result.MCmd, static_cast(Cmd1)); + + // Preconditions for post enqueue checks + EXPECT_FALSE(Cmd1->isSuccessfullyEnqueued()); + EXPECT_FALSE(Cmd2->isSuccessfullyEnqueued()); + EXPECT_FALSE(Cmd3->isSuccessfullyEnqueued()); + + Cmd1->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; + + EXPECT_TRUE(MS.enqueueCommand(Cmd3, Result, detail::BlockingT::NON_BLOCKING)); + + EXPECT_TRUE(Cmd1->isSuccessfullyEnqueued()); + EXPECT_TRUE(Cmd2->isSuccessfullyEnqueued()); + EXPECT_TRUE(Cmd3->isSuccessfullyEnqueued()); + + Cmd3Event->wait(Cmd2Event); +} + +std::vector> PassedNumEvents; +inline pi_result redefinedEventsWaitCustom(pi_uint32 num_events, + const pi_event *event_list) { + PassedNumEvents.push_back(std::make_pair(num_events, event_list)); + return PI_SUCCESS; +} + +std::vector> PassedNumEventsToLaunch; +inline pi_result redefinedEnqueueKernelLaunchCustom( + pi_queue, pi_kernel, pi_uint32, const size_t *, const size_t *, + const size_t *, pi_uint32 num_events, const pi_event *event_list, + pi_event *event) { + PassedNumEventsToLaunch.push_back(std::make_pair(num_events, event_list)); + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +void EventsWaitVerification(queue &QueueDev) { + MockScheduler MS; + + detail::QueueImplPtr QueueDevImpl = detail::getSyclObjImpl(QueueDev); + + std::vector Events; + + detail::Command *Cmd1 = AddTaskCG(true, MS, QueueDevImpl, Events); + EventImplPtr Cmd1Event = Cmd1->getEvent(); + + // Depends on host task + Events.push_back(Cmd1Event); + detail::Command *Cmd2 = AddTaskCG(false, MS, QueueDevImpl, Events); + EventImplPtr Cmd2Event = Cmd2->getEvent(); + + // Depends on kernel depending on host task + Events.clear(); + Events.push_back(Cmd2Event); + detail::Command *Cmd3 = AddTaskCG(false, MS, QueueDevImpl, Events); + EventImplPtr Cmd3Event = Cmd2->getEvent(); + + detail::EnqueueResultT Result; + EXPECT_TRUE(MS.enqueueCommand(Cmd3, Result, detail::BlockingT::NON_BLOCKING)); + Cmd3Event->wait(Cmd3Event); + + // One piEventsWait call: + // kernel2 waits for kernel 1 by sending event list to enqueue launch call + // (depending on queue property). Cmd3Event.wait() waits for kernel2 via + // piEventsWait. + ASSERT_EQ(PassedNumEvents.size(), 1u); + auto [EventCount, EventArr] = PassedNumEvents[0]; + ASSERT_EQ(EventCount, 1u); + EXPECT_EQ(*EventArr, Cmd3Event->getHandleRef()); +} + +TEST_F(SchedulerTest, InOrderEnqueueNoMemObjDoubleKernelDepHost) { + // Checks blocking command tranfer for dependent kernels and enqueue of root + // kernel on host task completion + unittest::ScopedEnvVar DisabledCleanup{ + DisablePostEnqueueCleanupName, "1", + detail::SYCLConfig::reset}; + + default_selector Selector; + platform Plt{Selector}; + if (!CheckTestExecutionRequirements(Plt)) + return; + + unittest::PiMock Mock{Plt}; + setupDefaultMockAPIs(Mock); + Mock.redefine(redefinedEventsWaitCustom); + Mock.redefine( + redefinedEnqueueKernelLaunchCustom); + + { + queue QueueDev(context(Plt), Selector); + PassedNumEvents.clear(); + PassedNumEventsToLaunch.clear(); + EventsWaitVerification(QueueDev); + // 1st -> kernel after host, no pi events + // 2nd -> kernel after kernel, 1 pi event + ASSERT_EQ(PassedNumEventsToLaunch.size(), 2u); + { + auto [EventCount, EventArr] = PassedNumEventsToLaunch[0]; + EXPECT_EQ(EventCount, 0u); + EXPECT_EQ(EventArr, nullptr); + } + { + auto [EventCount, EventArr] = PassedNumEventsToLaunch[1]; + EXPECT_EQ(EventCount, 1u); + } + } + + { + queue QueueDev(context(Plt), Selector, property::queue::in_order()); + PassedNumEvents.clear(); + PassedNumEventsToLaunch.clear(); + EventsWaitVerification(QueueDev); + // 1st -> kernel after host, no pi events + // 2nd -> kernel after kernel and in order queue, 0 pi event + ASSERT_EQ(PassedNumEventsToLaunch.size(), 2u); + { + auto [EventCount, EventArr] = PassedNumEventsToLaunch[0]; + EXPECT_EQ(EventCount, 0u); + EXPECT_EQ(EventArr, nullptr); + } + { + auto [EventCount, EventArr] = PassedNumEventsToLaunch[1]; + EXPECT_EQ(EventCount, 0u); + EXPECT_EQ(EventArr, nullptr); + } + } +} \ No newline at end of file diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index dbe604944da43..078f767e664ff 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -61,11 +61,7 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { CGH.use_kernel_bundle(ExecBundle); CGH.single_task>([] {}); }); - InOrderQueue - .submit([&](sycl::handler &CGH) { - CGH.use_kernel_bundle(ExecBundle); - CGH.host_task([=] {}); - }) + InOrderQueue.submit([&](sycl::handler &CGH) { CGH.host_task([=] {}); }) .wait(); EXPECT_TRUE(GEventsWaitCounter == 1); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index be4543ec65cb0..02511f92eca69 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -209,3 +209,65 @@ sycl::detail::Requirement getMockRequirement(const MemObjT &MemObj) { /*Dims*/ 0, /*ElementSize*/ 0}; } + +class MockHandler : public sycl::handler { +public: + MockHandler(std::shared_ptr Queue, bool IsHost) + : sycl::handler(Queue, IsHost) {} + // Methods + using sycl::handler::getType; + using sycl::handler::MImpl; + + sycl::detail::NDRDescT &getNDRDesc() { return MNDRDesc; } + sycl::detail::code_location &getCodeLoc() { return MCodeLoc; } + sycl::detail::CG::CGTYPE &getCGType() { return MCGType; } + std::vector> &getStreamStorage() { + return MStreamStorage; + } + std::unique_ptr &getHostKernel() { + return MHostKernel; + } + std::vector> &getArgsStorage() { return MArgsStorage; } + std::vector &getAccStorage() { + return MAccStorage; + } + std::vector> &getSharedPtrStorage() { + return MSharedPtrStorage; + } + std::vector &getRequirements() { + return MRequirements; + } + std::vector &getEvents() { return MEvents; } + std::vector &getArgs() { return MArgs; } + std::string &getKernelName() { return MKernelName; } + sycl::detail::OSModuleHandle &getOSModuleHandle() { return MOSModuleHandle; } + std::shared_ptr &getKernel() { return MKernel; } + std::unique_ptr &getHostTask() { return MHostTask; } + std::shared_ptr &getQueue() { return MQueue; } + + void setType(sycl::detail::CG::CGTYPE Type) { + static_cast(this)->MCGType = Type; + } + + template + void setHostKernel(KernelType Kernel) { + static_cast(this)->MHostKernel.reset( + new sycl::detail::HostKernel(Kernel)); + } + + template void setNDRangeDesc(sycl::nd_range Range) { + static_cast(this)->MNDRDesc.set(std::move(Range)); + } + + void addStream(const sycl::detail::StreamImplPtr &Stream) { + sycl::handler::addStream(Stream); + } + + std::unique_ptr finalize() { + throw sycl::runtime_error("Unhandled type of command group", + PI_ERROR_INVALID_OPERATION); + + return nullptr; + } +}; \ No newline at end of file diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 0f7b95fcb586e..85d764b4b283a 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -19,45 +19,22 @@ using namespace sycl; inline constexpr auto DisablePostEnqueueCleanupName = "SYCL_DISABLE_POST_ENQUEUE_CLEANUP"; -class MockHandler : public sycl::handler { +class MockHandlerStreamInit : public MockHandler { public: - MockHandler(std::shared_ptr Queue, bool IsHost) - : sycl::handler(Queue, IsHost) {} - - void setType(detail::CG::CGTYPE Type) { - static_cast(this)->MCGType = Type; - } - - template - void setHostKernel(KernelType Kernel) { - static_cast(this)->MHostKernel.reset( - new sycl::detail::HostKernel(Kernel)); - } - - template void setNDRangeDesc(sycl::nd_range Range) { - static_cast(this)->MNDRDesc.set(std::move(Range)); - } - - void addStream(const detail::StreamImplPtr &Stream) { - sycl::handler::addStream(Stream); - } - + MockHandlerStreamInit(std::shared_ptr Queue, bool IsHost) + : MockHandler(Queue, IsHost) {} std::unique_ptr finalize() { - auto CGH = static_cast(this); std::unique_ptr CommandGroup; - switch (CGH->MCGType) { + switch (getType()) { case detail::CG::Kernel: case detail::CG::RunOnHostIntel: { CommandGroup.reset(new detail::CGExecKernel( - std::move(CGH->MNDRDesc), std::move(CGH->MHostKernel), - std::move(CGH->MKernel), std::move(MImpl->MKernelBundle), - std::move(CGH->MArgsStorage), std::move(CGH->MAccStorage), - std::move(CGH->MSharedPtrStorage), std::move(CGH->MRequirements), - std::move(CGH->MEvents), std::move(CGH->MArgs), - std::move(CGH->MKernelName), std::move(CGH->MOSModuleHandle), - std::move(CGH->MStreamStorage), std::move(MImpl->MAuxiliaryResources), - CGH->MCGType, CGH->MCodeLoc)); + getNDRDesc(), std::move(getHostKernel()), getKernel(), + std::move(MImpl->MKernelBundle), + getArgsStorage(), getAccStorage(), getSharedPtrStorage(), + getRequirements(), getEvents(), getArgs(), getKernelName(), + getOSModuleHandle(), getStreamStorage(), std::move(MImpl->MAuxiliaryResources), + getCGType(), getCodeLoc())); break; } default: @@ -107,7 +84,7 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { detail::QueueImplPtr HQueueImpl = detail::getSyclObjImpl(HQueue); // Emulating processing of command group function - MockHandler MockCGH(HQueueImpl, true); + MockHandlerStreamInit MockCGH(HQueueImpl, true); MockCGH.setType(detail::CG::Kernel); auto EmptyKernel = [](sycl::nd_item<1>) {}; diff --git a/sycl/unittests/scheduler/WaitAfterCleanup.cpp b/sycl/unittests/scheduler/WaitAfterCleanup.cpp index 1ddf2b3299053..751086fc96eec 100644 --- a/sycl/unittests/scheduler/WaitAfterCleanup.cpp +++ b/sycl/unittests/scheduler/WaitAfterCleanup.cpp @@ -8,10 +8,24 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" +#include using namespace sycl; +TEST_F(SchedulerTest, PostEnqueueCleanupForCommandDefault) { + auto Cmd = new MockCommand(detail::getSyclObjImpl(MQueue)); + auto Event = Cmd->getEvent(); + ASSERT_FALSE(Event == nullptr) << "Command must have an event\n"; + + detail::Scheduler::getInstance().waitForEvent(Event); + EXPECT_EQ(Event->getCommand(), nullptr) << "Command should be cleaned up\n"; +} + TEST_F(SchedulerTest, WaitAfterCleanup) { + unittest::ScopedEnvVar DisabledCleanup{ + "SYCL_DISABLE_POST_ENQUEUE_CLEANUP", "1", + detail::SYCLConfig::reset}; + auto Cmd = new MockCommand(detail::getSyclObjImpl(MQueue)); auto Event = Cmd->getEvent(); ASSERT_FALSE(Event == nullptr) << "Command must have an event\n";