@@ -333,7 +333,8 @@ GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
333
333
OMPX_InitialNumStreams(" LIBOMPTARGET_NUM_INITIAL_STREAMS" , 32 ),
334
334
OMPX_InitialNumEvents(" LIBOMPTARGET_NUM_INITIAL_EVENTS" , 32 ),
335
335
DeviceId(DeviceId), GridValues(OMPGridValues),
336
- PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock() {
336
+ PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(),
337
+ PinnedAllocs(*this ) {
337
338
if (OMP_NumTeams > 0 )
338
339
GridValues.GV_Max_Teams =
339
340
std::min (GridValues.GV_Max_Teams , uint32_t (OMP_NumTeams));
@@ -581,23 +582,110 @@ GenericDeviceTy::getExecutionModeForKernel(StringRef Name,
581
582
return ExecModeGlobal.getValue ();
582
583
}
583
584
584
- Error GenericDeviceTy::registerHostPinnedMemoryBuffer (const void *Buffer,
585
- size_t Size) {
586
- std::lock_guard<std::shared_mutex> Lock (HostAllocationsMutex);
585
+ Error PinnedAllocationMapTy::registerHostBuffer (void *HstPtr,
586
+ void *DevAccessiblePtr,
587
+ size_t Size) {
588
+ assert (HstPtr && " Invalid pointer" );
589
+ assert (DevAccessiblePtr && " Invalid pointer" );
587
590
588
- auto Res = HostAllocations.insert ({Buffer, Size});
591
+ std::lock_guard<std::shared_mutex> Lock (Mutex);
592
+
593
+ // No pinned allocation should intersect.
594
+ auto Res = Allocs.insert ({HstPtr, DevAccessiblePtr, Size});
589
595
if (!Res.second )
590
- return Plugin::error (" Registering an already registered pinned buffer" );
596
+ return Plugin::error (" Cannot register locked buffer" );
597
+
598
+ return Plugin::success ();
599
+ }
600
+
601
+ Error PinnedAllocationMapTy::unregisterHostBuffer (void *HstPtr) {
602
+ assert (HstPtr && " Invalid pointer" );
603
+
604
+ std::lock_guard<std::shared_mutex> Lock (Mutex);
605
+
606
+ // Find the pinned allocation starting at the host pointer address.
607
+ auto It = Allocs.find ({HstPtr});
608
+ if (It == Allocs.end ())
609
+ return Plugin::error (" Cannot find locked buffer" );
610
+
611
+ const EntryTy &Entry = *It;
612
+
613
+ // There should be no other references to the pinned allocation.
614
+ if (Entry.References > 1 )
615
+ return Plugin::error (" The locked buffer is still being used" );
616
+
617
+ // Remove the entry from the map.
618
+ Allocs.erase (It);
591
619
592
620
return Plugin::success ();
593
621
}
594
622
595
- Error GenericDeviceTy::unregisterHostPinnedMemoryBuffer (const void *Buffer) {
596
- std::lock_guard<std::shared_mutex> Lock (HostAllocationsMutex);
623
+ Expected<void *> PinnedAllocationMapTy::lockHostBuffer (void *HstPtr,
624
+ size_t Size) {
625
+ assert (HstPtr && " Invalid pointer" );
626
+
627
+ std::lock_guard<std::shared_mutex> Lock (Mutex);
628
+
629
+ auto It = findIntersecting (HstPtr);
630
+
631
+ // No intersecting registered allocation found in the map. We must lock and
632
+ // register the memory buffer into the map.
633
+ if (It == Allocs.end ()) {
634
+ // First, lock the host buffer and retrieve the device accessible pointer.
635
+ auto PinnedPtrOrErr = Device.dataLockImpl (HstPtr, Size);
636
+ if (!PinnedPtrOrErr)
637
+ return PinnedPtrOrErr.takeError ();
638
+
639
+ // Then, insert the host buffer entry into the map.
640
+ auto Res = Allocs.insert ({HstPtr, *PinnedPtrOrErr, Size});
641
+ if (!Res.second )
642
+ return Plugin::error (" Cannot register locked buffer" );
643
+
644
+ // Return the device accessible pointer.
645
+ return *PinnedPtrOrErr;
646
+ }
647
+
648
+ const EntryTy &Entry = *It;
649
+
650
+ #ifdef OMPTARGET_DEBUG
651
+ // Do not allow partial overlapping among host pinned buffers.
652
+ if (advanceVoidPtr (HstPtr, Size) > advanceVoidPtr (Entry.HstPtr , Entry.Size ))
653
+ return Plugin::error (" Partial overlapping not allowed in locked memory" );
654
+ #endif
655
+
656
+ // Increase the number of references.
657
+ Entry.References ++;
658
+
659
+ // Return the device accessible pointer after applying the correct offset.
660
+ return advanceVoidPtr (Entry.DevAccessiblePtr ,
661
+ getPtrDiff (HstPtr, Entry.HstPtr ));
662
+ }
663
+
664
+ Error PinnedAllocationMapTy::unlockHostBuffer (void *HstPtr) {
665
+ assert (HstPtr && " Invalid pointer" );
666
+
667
+ std::lock_guard<std::shared_mutex> Lock (Mutex);
597
668
598
- size_t Erased = HostAllocations.erase (Buffer);
669
+ auto It = findIntersecting (HstPtr);
670
+ if (It == Allocs.end ())
671
+ return Plugin::error (" Cannot find locked buffer" );
672
+
673
+ const EntryTy &Entry = *It;
674
+
675
+ // Decrease the number of references. No need to do anything if there are
676
+ // others using the allocation.
677
+ if (--Entry.References > 0 )
678
+ return Plugin::success ();
679
+
680
+ // This was the last user of the allocation. Unlock the original locked memory
681
+ // buffer, which is the host pointer stored in the entry.
682
+ if (auto Err = Device.dataUnlockImpl (Entry.HstPtr ))
683
+ return Err;
684
+
685
+ // Remove the entry from the map.
686
+ size_t Erased = Allocs.erase (Entry);
599
687
if (!Erased)
600
- return Plugin::error (" Cannot find a registered host pinned buffer" );
688
+ return Plugin::error (" Cannot find locked buffer" );
601
689
602
690
return Plugin::success ();
603
691
}
@@ -648,7 +736,7 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
648
736
649
737
// Register allocated buffer as pinned memory if the type is host memory.
650
738
if (Kind == TARGET_ALLOC_HOST)
651
- if (auto Err = registerHostPinnedMemoryBuffer ( Alloc, Size))
739
+ if (auto Err = PinnedAllocs. registerHostBuffer (Alloc, Alloc, Size))
652
740
return Err;
653
741
654
742
return Alloc;
@@ -670,7 +758,7 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
670
758
671
759
// Unregister deallocated pinned memory buffer if the type is host memory.
672
760
if (Kind == TARGET_ALLOC_HOST)
673
- if (auto Err = unregisterHostPinnedMemoryBuffer (TgtPtr))
761
+ if (auto Err = PinnedAllocs. unregisterHostBuffer (TgtPtr))
674
762
return Err;
675
763
676
764
return Plugin::success ();
@@ -998,6 +1086,36 @@ int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) {
998
1086
return OFFLOAD_SUCCESS;
999
1087
}
1000
1088
1089
+ int32_t __tgt_rtl_data_lock (int32_t DeviceId, void *Ptr, int64_t Size,
1090
+ void **LockedPtr) {
1091
+ auto LockedPtrOrErr = Plugin::get ().getDevice (DeviceId).dataLock (Ptr, Size);
1092
+ if (!LockedPtrOrErr) {
1093
+ auto Err = LockedPtrOrErr.takeError ();
1094
+ REPORT (" Failure to lock memory %p: %s\n " , Ptr,
1095
+ toString (std::move (Err)).data ());
1096
+ return OFFLOAD_FAIL;
1097
+ }
1098
+
1099
+ if (!(*LockedPtrOrErr)) {
1100
+ REPORT (" Failure to lock memory %p: obtained a null locked pointer\n " , Ptr);
1101
+ return OFFLOAD_FAIL;
1102
+ }
1103
+ *LockedPtr = *LockedPtrOrErr;
1104
+
1105
+ return OFFLOAD_SUCCESS;
1106
+ }
1107
+
1108
+ int32_t __tgt_rtl_data_unlock (int32_t DeviceId, void *Ptr) {
1109
+ auto Err = Plugin::get ().getDevice (DeviceId).dataUnlock (Ptr);
1110
+ if (Err) {
1111
+ REPORT (" Failure to unlock memory %p: %s\n " , Ptr,
1112
+ toString (std::move (Err)).data ());
1113
+ return OFFLOAD_FAIL;
1114
+ }
1115
+
1116
+ return OFFLOAD_SUCCESS;
1117
+ }
1118
+
1001
1119
int32_t __tgt_rtl_data_submit (int32_t DeviceId, void *TgtPtr, void *HstPtr,
1002
1120
int64_t Size) {
1003
1121
return __tgt_rtl_data_submit_async (DeviceId, TgtPtr, HstPtr, Size,
0 commit comments