Skip to content

Support peer access DPC++ extension #2077

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
59d6646
Implement dpctl.SyclDevice peer access
ndgrigorian May 3, 2025
7d2d20f
Add backend validation for peer access
ndgrigorian May 4, 2025
fc50e3b
Directly use `get_backend()` from sycl_device
ndgrigorian May 5, 2025
e08c2c9
Tweak docstrings
ndgrigorian May 6, 2025
3af91a8
Simplify logic checking backends for peer access
ndgrigorian May 6, 2025
dfca713
link method for disabling peer access to enabling method
ndgrigorian May 6, 2025
bdfee1b
Fix copy paste errors in peer device interface header
ndgrigorian May 6, 2025
4b3e9f1
Clarify which device has invalid backend in peer_access methods
ndgrigorian May 6, 2025
a21d585
Use ostringstream to output backend names instead of integers
ndgrigorian May 6, 2025
b8c899e
Use ostringstream in kernel bundle interface
ndgrigorian May 6, 2025
bb1f1d9
Adds Python tests for peer_access methods
ndgrigorian May 7, 2025
7106845
parametrize test_peer_device_arg_validation
ndgrigorian May 7, 2025
8563cb0
Factor out peer access validation
ndgrigorian May 12, 2025
9e7031e
Factor common code out of C-API peer access functions
ndgrigorian May 12, 2025
da45cdb
Add C-API tests for peer access functions
ndgrigorian May 12, 2025
ba7222f
Add check that backend types are the same in _CallPeerAccess
ndgrigorian May 12, 2025
417889d
Inline _CallPeerAccess helper function
ndgrigorian May 13, 2025
59377b4
Inline _check_peer_access helper function
ndgrigorian May 13, 2025
09a1ef6
Clean up peer access functions in libsyclinterface
ndgrigorian May 14, 2025
eff82ba
Add helper for raising more specific errors in peer access methods
ndgrigorian May 14, 2025
2fe0c96
Add tests for dpctl peer access enum helper utilities
ndgrigorian May 14, 2025
c76b521
Add an additional libsyclinterface test for peer access to self
ndgrigorian May 14, 2025
629a4b4
Adds a Python test for peer access to self raising ValueError
ndgrigorian May 14, 2025
101366f
Fix typo in peer access tests
ndgrigorian May 14, 2025
178ce96
Change peer access test description
ndgrigorian May 14, 2025
321582f
Fix a typo
ndgrigorian May 14, 2025
56bd37f
Refactor separate can_access_peer methods into a single method
ndgrigorian May 14, 2025
6a34d74
Update docstring of `can_access_peer`
ndgrigorian May 14, 2025
7317df9
Further tweak `can_access_peer` docstring
ndgrigorian May 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/doc_sources/urls.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"oneapi_filter_selection": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_filter_selector.asciidoc",
"oneapi_default_context": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_default_context.asciidoc",
"oneapi_enqueue_barrier": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc",
"oneapi_peer_access": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc",
"sycl_aspects": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#table.device.aspect",
"sycl_context": "https://sycl.readthedocs.io/en/latest/iface/context.html",
"sycl_device": "https://sycl.readthedocs.io/en/latest/iface/device.html",
Expand Down
11 changes: 11 additions & 0 deletions dpctl/_backend.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ cdef extern from "syclinterface/dpctl_sycl_enum_types.h":
_L1_cache "L1_cache",
_next_partitionable "next_partitionable",

ctypedef enum _peer_access "DPCTLPeerAccessType":
_access_supported "access_supported",
_atomics_supported "atomics_supported",

ctypedef enum _event_status_type "DPCTLSyclEventStatusType":
_UNKNOWN_STATUS "DPCTL_UNKNOWN_STATUS"
_SUBMITTED "DPCTL_SUBMITTED"
Expand Down Expand Up @@ -278,7 +282,14 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h":
cdef DPCTLDeviceVectorRef DPCTLDevice_GetComponentDevices(
const DPCTLSyclDeviceRef DRef
)
cdef bool DPCTLDevice_CanAccessPeer(const DPCTLSyclDeviceRef DRef,
const DPCTLSyclDeviceRef PDRef,
_peer_access PT)
cdef void DPCTLDevice_EnablePeerAccess(const DPCTLSyclDeviceRef DRef,
const DPCTLSyclDeviceRef PDRef)

cdef void DPCTLDevice_DisablePeerAccess(const DPCTLSyclDeviceRef DRef,
const DPCTLSyclDeviceRef PDRef)

cdef extern from "syclinterface/dpctl_sycl_device_manager.h":
cdef DPCTLDeviceVectorRef DPCTLDeviceVector_CreateFromArray(
Expand Down
211 changes: 209 additions & 2 deletions dpctl/_sycl_device.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@ from ._backend cimport ( # noqa: E211
DPCTLCString_Delete,
DPCTLDefaultSelector_Create,
DPCTLDevice_AreEq,
DPCTLDevice_CanAccessPeer,
DPCTLDevice_Copy,
DPCTLDevice_CreateFromSelector,
DPCTLDevice_CreateSubDevicesByAffinity,
DPCTLDevice_CreateSubDevicesByCounts,
DPCTLDevice_CreateSubDevicesEqually,
DPCTLDevice_Delete,
DPCTLDevice_DisablePeerAccess,
DPCTLDevice_EnablePeerAccess,
DPCTLDevice_GetBackend,
DPCTLDevice_GetComponentDevices,
DPCTLDevice_GetCompositeDevice,
Expand Down Expand Up @@ -103,6 +106,7 @@ from ._backend cimport ( # noqa: E211
_device_type,
_global_mem_cache_type,
_partition_affinity_domain_type,
_peer_access,
)

from .enum_types import backend_type, device_type, global_mem_cache_type
Expand Down Expand Up @@ -213,14 +217,73 @@ cdef void _init_helper(_SyclDevice device, DPCTLSyclDeviceRef DRef) except *:
raise RuntimeError("Descriptor 'max_work_item_sizes3d' not available")


cdef inline bint _check_peer_access(SyclDevice dev, SyclDevice peer) except *:
"""
Check peer access ahead of time to avoid errors from unified runtime or
compiler implementation.
"""
cdef list _peer_access_backends = [
_backend_type._CUDA,
_backend_type._HIP,
_backend_type._LEVEL_ZERO
]
cdef _backend_type BTy1 = DPCTLDevice_GetBackend(dev._device_ref)
cdef _backend_type BTy2 = DPCTLDevice_GetBackend(peer.get_device_ref())
if (
BTy1 == BTy2 and
BTy1 in _peer_access_backends and
BTy2 in _peer_access_backends and
dev != peer
):
return True
return False


cdef inline void _raise_invalid_peer_access(
SyclDevice dev,
SyclDevice peer,
) except *:
"""
Check peer access ahead of time and raise errors for invalid cases.
"""
cdef list _peer_access_backends = [
_backend_type._CUDA,
_backend_type._HIP,
_backend_type._LEVEL_ZERO
]
cdef _backend_type BTy1 = DPCTLDevice_GetBackend(dev._device_ref)
cdef _backend_type BTy2 = DPCTLDevice_GetBackend(peer.get_device_ref())
if (BTy1 != BTy2):
raise ValueError(
f"Device with backend {_backend_type_to_filter_string_part(BTy1)} "
"cannot peer access device with backend "
f"{_backend_type_to_filter_string_part(BTy2)}"
)
if (BTy1 not in _peer_access_backends):
raise ValueError(
"Peer access not supported for backend "
f"{_backend_type_to_filter_string_part(BTy1)}"
)
if (BTy2 not in _peer_access_backends):
raise ValueError(
"Peer access not supported for backend "
f"{_backend_type_to_filter_string_part(BTy2)}"
)
if (dev == peer):
raise ValueError(
"Peer access cannot be enabled between a device and itself"
)
return


@functools.lru_cache(maxsize=None)
def _cached_filter_string(d : SyclDevice):
"""
Internal utility to compute filter_string of input SyclDevice
and cached with `functools.cache`.

Args:
d (dpctl.SyclDevice):
d (:class:`dpctl.SyclDevice`):
A device for which to compute the filter string.
Returns:
out(str):
Expand Down Expand Up @@ -1792,6 +1855,150 @@ cdef class SyclDevice(_SyclDevice):
raise ValueError("Internal error: NULL device vector encountered")
return _get_devices(cDVRef)

def can_access_peer(self, peer, value="access_supported"):
""" Returns ``True`` if this device (``self``) can enable peer access
to USM device memory on ``peer``, ``False`` otherwise.

If peer access is supported, it may be enabled by calling
:meth:`.enable_peer_access`.

For details, see
:oneapi_peer_access:`DPC++ peer access SYCL extension <>`.

Args:
peer (:class:`dpctl.SyclDevice`):
The :class:`dpctl.SyclDevice` instance to check for peer access
by this device.
value (str, optional):
Specifies the kind of peer access being queried.

The supported values are

- ``"access_supported"``
Returns ``True`` if it is possible for this device to
enable peer access to USM device memory on ``peer``.

- ``"atomics_supported"``
Returns ``True`` if it is possible for this device to
concurrently access and atomically modify USM device
memory on ``peer`` when enabled. Atomics must have
``memory_scope::system`` when modifying memory on a peer
device.

If ``False`` is returned, these operations result in
undefined behavior.

Default: ``"access_supported"``

Returns:
bool:
``True`` if the kind of peer access specified by ``value`` is
supported between this device and ``peer``, otherwise ``False``.

Raises:
TypeError:
If ``peer`` is not :class:`dpctl.SyclDevice`.
"""
cdef SyclDevice p_dev

if not isinstance(value, str):
raise TypeError(
f"Expected `value` to be of type str, got {type(value)}"
)
if value == "access_supported":
access_type = _peer_access._access_supported
elif value == "atomics_supported":
access_type = _peer_access._atomics_supported
else:
raise ValueError(
"`value` must be 'access_supported' or 'atomics_supported', "
f"got {value}"
)
if not isinstance(peer, SyclDevice):
raise TypeError(
"peer device must be a `dpctl.SyclDevice`, got "
f"{type(peer)}"
)
p_dev = <SyclDevice>peer
if _check_peer_access(self, p_dev):
return DPCTLDevice_CanAccessPeer(
self._device_ref,
p_dev.get_device_ref(),
access_type
)
return False

def enable_peer_access(self, peer):
""" Enables this device (``self``) to access USM device allocations
located on ``peer``.

Peer access may be disabled by calling :meth:`.disable_peer_access`.

For details, see
:oneapi_peer_access:`DPC++ peer access SYCL extension <>`.

Args:
peer (:class:`dpctl.SyclDevice`):
The :class:`dpctl.SyclDevice` instance to enable peer access
to.

Raises:
TypeError:
If ``peer`` is not :class:`dpctl.SyclDevice`.
ValueError:
If the backend associated with this device or ``peer`` does not
support peer access.
"""
cdef SyclDevice p_dev

if not isinstance(peer, SyclDevice):
raise TypeError(
"peer device must be a `dpctl.SyclDevice`, got "
f"{type(peer)}"
)
p_dev = <SyclDevice>peer
_raise_invalid_peer_access(self, p_dev)
DPCTLDevice_EnablePeerAccess(
self._device_ref,
p_dev.get_device_ref()
)
return

def disable_peer_access(self, peer):
""" Disables peer access to ``peer`` from this device (``self``).

Peer access may be enabled by calling :meth:`.enable_peer_access`.

For details, see
:oneapi_peer_access:`DPC++ peer access SYCL extension <>`.

Args:
peer (:class:`dpctl.SyclDevice`):
The :class:`dpctl.SyclDevice` instance to
disable peer access to.

Raises:
TypeError:
If ``peer`` is not :class:`dpctl.SyclDevice`.
ValueError:
If the backend associated with this device or ``peer`` does not
support peer access.
"""
cdef SyclDevice p_dev

if not isinstance(peer, SyclDevice):
raise TypeError(
"peer device must be a `dpctl.SyclDevice`, got "
f"{type(peer)}"
)
p_dev = <SyclDevice>peer
_raise_invalid_peer_access(self, p_dev)
DPCTLDevice_DisablePeerAccess(
self._device_ref,
p_dev.get_device_ref()
)
return

@property
def profiling_timer_resolution(self):
""" Profiling timer resolution.
Expand Down Expand Up @@ -1912,7 +2119,7 @@ cdef class SyclDevice(_SyclDevice):
same _device_ref as this SyclDevice.

Args:
other (dpctl.SyclDevice):
other (:class:`dpctl.SyclDevice`):
A :class:`dpctl.SyclDevice` instance to
compare against.

Expand Down
Loading
Loading