@@ -203,17 +203,34 @@ static std::string commandToName(Command::CommandType Type) {
203
203
}
204
204
#endif
205
205
206
- static std::vector<RT::PiEvent>
207
- getPiEvents (const std::vector<EventImplPtr> &EventImpls) {
206
+ std::vector<RT::PiEvent>
207
+ Command:: getPiEvents (const std::vector<EventImplPtr> &EventImpls) const {
208
208
std::vector<RT::PiEvent> RetPiEvents;
209
209
for (auto &EventImpl : EventImpls) {
210
- if (EventImpl->getHandleRef () != nullptr )
211
- RetPiEvents.push_back (EventImpl->getHandleRef ());
210
+ if (EventImpl->getHandleRef () == nullptr )
211
+ continue ;
212
+
213
+ // Do not add redundant event dependencies for in-order queues.
214
+ // At this stage dependency is definitely pi task and need to check if
215
+ // current one is a host task. In this case we should not skip pi event due
216
+ // to different sync mechanisms for different task types on in-order queue.
217
+ const QueueImplPtr &WorkerQueue = getWorkerQueue ();
218
+ if (EventImpl->getWorkerQueue () == WorkerQueue &&
219
+ WorkerQueue->isInOrder () && !isHostTask ())
220
+ continue ;
221
+
222
+ RetPiEvents.push_back (EventImpl->getHandleRef ());
212
223
}
213
224
214
225
return RetPiEvents;
215
226
}
216
227
228
+ bool Command::isHostTask () const {
229
+ return (MType == CommandType::RUN_CG) /* host task has this type also */ &&
230
+ ((static_cast <const ExecCGCommand *>(this ))->getCG ().getType () ==
231
+ CG::CGTYPE::CodeplayHostTask);
232
+ }
233
+
217
234
static void flushCrossQueueDeps (const std::vector<EventImplPtr> &EventImpls,
218
235
const QueueImplPtr &Queue) {
219
236
for (auto &EventImpl : EventImpls) {
@@ -240,7 +257,8 @@ class DispatchHostTask {
240
257
// sophisticated waiting mechanism to allow to utilize this thread for any
241
258
// other available job and resume once all required events are ready.
242
259
for (auto &PluginWithEvents : RequiredEventsPerPlugin) {
243
- std::vector<RT::PiEvent> RawEvents = getPiEvents (PluginWithEvents.second );
260
+ std::vector<RT::PiEvent> RawEvents =
261
+ MThisCmd->getPiEvents (PluginWithEvents.second );
244
262
try {
245
263
PluginWithEvents.first ->call <PiApiKind::piEventsWait>(RawEvents.size (),
246
264
RawEvents.data ());
@@ -393,10 +411,12 @@ void Command::waitForEvents(QueueImplPtr Queue,
393
411
Command::Command (CommandType Type, QueueImplPtr Queue)
394
412
: MQueue(std::move(Queue)),
395
413
MEvent (std::make_shared<detail::event_impl>(MQueue)),
414
+ MWorkerQueue(MEvent->getWorkerQueue ()),
396
415
MPreparedDepsEvents(MEvent->getPreparedDepsEvents ()),
397
416
MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents ()),
398
417
MType(Type) {
399
418
MSubmittedQueue = MQueue;
419
+ MWorkerQueue = MQueue;
400
420
MEvent->setCommand (this );
401
421
MEvent->setContextImpl (MQueue->getContextImplPtr ());
402
422
MEvent->setStateIncomplete ();
@@ -600,12 +620,6 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
600
620
601
621
Command *ConnectionCmd = nullptr ;
602
622
603
- // Do not add redundant event dependencies for in-order queues.
604
- if (Dep.MDepCommand && Dep.MDepCommand ->getWorkerQueue () == WorkerQueue &&
605
- WorkerQueue->has_property <property::queue::in_order>() &&
606
- getType () != CommandType::HOST_TASK)
607
- return nullptr ;
608
-
609
623
ContextImplPtr DepEventContext = DepEvent->getContextImpl ();
610
624
// If contexts don't match we'll connect them using host task
611
625
if (DepEventContext != WorkerContext && !WorkerContext->is_host ()) {
@@ -621,14 +635,14 @@ const ContextImplPtr &Command::getWorkerContext() const {
621
635
return MQueue->getContextImplPtr ();
622
636
}
623
637
624
- const QueueImplPtr &Command::getWorkerQueue () const { return MQueue; }
638
+ const QueueImplPtr &Command::getWorkerQueue () const {
639
+ assert (MWorkerQueue && " MWorkerQueue must not be nullptr" );
640
+ return MWorkerQueue;
641
+ }
625
642
626
643
bool Command::producesPiEvent () const { return true ; }
627
644
628
- bool Command::supportsPostEnqueueCleanup () const {
629
- // Isolated commands are cleaned up separately
630
- return !MUsers.empty () || !MDeps.empty ();
631
- }
645
+ bool Command::supportsPostEnqueueCleanup () const { return true ; }
632
646
633
647
Command *Command::addDep (DepDesc NewDep, std::vector<Command *> &ToCleanUp) {
634
648
Command *ConnectionCmd = nullptr ;
@@ -1298,6 +1312,9 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq,
1298
1312
if (!MSrcQueue->is_host ()) {
1299
1313
MEvent->setContextImpl (MSrcQueue->getContextImplPtr ());
1300
1314
}
1315
+
1316
+ MWorkerQueue = MQueue->is_host () ? MSrcQueue : MQueue;
1317
+
1301
1318
emitInstrumentationDataProxy ();
1302
1319
}
1303
1320
@@ -1335,10 +1352,6 @@ const ContextImplPtr &MemCpyCommand::getWorkerContext() const {
1335
1352
return getWorkerQueue ()->getContextImplPtr ();
1336
1353
}
1337
1354
1338
- const QueueImplPtr &MemCpyCommand::getWorkerQueue () const {
1339
- return MQueue->is_host () ? MSrcQueue : MQueue;
1340
- }
1341
-
1342
1355
bool MemCpyCommand::producesPiEvent () const {
1343
1356
// TODO remove this workaround once the batching issue is addressed in Level
1344
1357
// Zero plugin.
@@ -1481,6 +1494,8 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq,
1481
1494
MEvent->setContextImpl (MSrcQueue->getContextImplPtr ());
1482
1495
}
1483
1496
1497
+ MWorkerQueue = MQueue->is_host () ? MSrcQueue : MQueue;
1498
+
1484
1499
emitInstrumentationDataProxy ();
1485
1500
}
1486
1501
@@ -1518,10 +1533,6 @@ const ContextImplPtr &MemCpyCommandHost::getWorkerContext() const {
1518
1533
return getWorkerQueue ()->getContextImplPtr ();
1519
1534
}
1520
1535
1521
- const QueueImplPtr &MemCpyCommandHost::getWorkerQueue () const {
1522
- return MQueue->is_host () ? MSrcQueue : MQueue;
1523
- }
1524
-
1525
1536
pi_int32 MemCpyCommandHost::enqueueImp () {
1526
1537
const QueueImplPtr &Queue = getWorkerQueue ();
1527
1538
waitForPreparedHostEvents ();
0 commit comments