@@ -851,6 +851,33 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name,
851
851
case PI_CONTEXT_INFO_REFERENCE_COUNT:
852
852
return getInfo (param_value_size, param_value, param_value_size_ret,
853
853
context->get_reference_count ());
854
+ case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
855
+ int major = 0 ;
856
+ cl::sycl::detail::pi::assertion (
857
+ cuDeviceGetAttribute (&major,
858
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
859
+ context->get_device ()->get ()) == CUDA_SUCCESS);
860
+ pi_memory_order_capabilities capabilities =
861
+ (major >= 6 ) ? PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
862
+ PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL
863
+ : PI_MEMORY_ORDER_RELAXED;
864
+ return getInfo (param_value_size, param_value, param_value_size_ret,
865
+ capabilities);
866
+ }
867
+ case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
868
+ int major = 0 ;
869
+ cl::sycl::detail::pi::assertion (
870
+ cuDeviceGetAttribute (&major,
871
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
872
+ context->get_device ()->get ()) == CUDA_SUCCESS);
873
+ pi_memory_order_capabilities capabilities =
874
+ (major >= 5 ) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
875
+ PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
876
+ PI_MEMORY_SCOPE_SYSTEM
877
+ : PI_MEMORY_SCOPE_DEVICE;
878
+ return getInfo (param_value_size, param_value, param_value_size_ret,
879
+ capabilities);
880
+ }
854
881
default :
855
882
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME (param_name);
856
883
}
@@ -1112,11 +1139,31 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
1112
1139
atomic64);
1113
1140
}
1114
1141
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
1115
- // NVPTX currently only support at most monotonic atomic load/store.
1116
- // Acquire and release is present in newer PTX, but is not yet supported
1117
- // in LLVM NVPTX.
1142
+ int major = 0 ;
1143
+ cl::sycl::detail::pi::assertion (
1144
+ cuDeviceGetAttribute (&major,
1145
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
1146
+ device->get ()) == CUDA_SUCCESS);
1147
+ pi_memory_order_capabilities capabilities =
1148
+ (major >= 6 ) ? PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
1149
+ PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL
1150
+ : PI_MEMORY_ORDER_RELAXED;
1151
+ return getInfo (param_value_size, param_value, param_value_size_ret,
1152
+ capabilities);
1153
+ }
1154
+ case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
1155
+ int major = 0 ;
1156
+ cl::sycl::detail::pi::assertion (
1157
+ cuDeviceGetAttribute (&major,
1158
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
1159
+ device->get ()) == CUDA_SUCCESS);
1160
+ pi_memory_order_capabilities capabilities =
1161
+ (major >= 5 ) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
1162
+ PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
1163
+ PI_MEMORY_SCOPE_SYSTEM
1164
+ : PI_MEMORY_SCOPE_DEVICE;
1118
1165
return getInfo (param_value_size, param_value, param_value_size_ret,
1119
- PI_MEMORY_ORDER_RELAXED );
1166
+ capabilities );
1120
1167
}
1121
1168
case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
1122
1169
// NVIDIA devices only support one sub-group size (the warp size)
0 commit comments