@@ -47,26 +47,37 @@ namespace {
47
47
// This is an experimental option to test performance of device to device copy
48
48
// operations on copy engines (versus compute engine)
49
49
static const bool UseCopyEngineForD2DCopy = [] {
50
- const char *CopyEngineForD2DCopy =
50
+ const char *UrRet = std::getenv (" UR_L0_USE_COPY_ENGINE_FOR_D2D_COPY" );
51
+ const char *PiRet =
51
52
std::getenv (" SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY" );
53
+ const char *CopyEngineForD2DCopy = UrRet ? UrRet : (PiRet ? PiRet : nullptr );
54
+
52
55
return (CopyEngineForD2DCopy && (std::stoi (CopyEngineForD2DCopy) != 0 ));
53
56
}();
54
57
55
58
// This is an experimental option that allows the use of copy engine, if
56
59
// available in the device, in Level Zero plugin for copy operations submitted
57
60
// to an in-order queue. The default is 1.
58
61
static const bool UseCopyEngineForInOrderQueue = [] {
59
- const char *CopyEngineForInOrderQueue =
62
+ const char *UrRet = std::getenv (" UR_L0_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE" );
63
+ const char *PiRet =
60
64
std::getenv (" SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE" );
65
+ const char *CopyEngineForInOrderQueue =
66
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr );
67
+
61
68
return (!CopyEngineForInOrderQueue ||
62
69
(std::stoi (CopyEngineForInOrderQueue) != 0 ));
63
70
}();
64
71
65
72
// This is an experimental option that allows the use of multiple command lists
66
73
// when submitting barriers. The default is 0.
67
74
static const bool UseMultipleCmdlistBarriers = [] {
68
- const char *UseMultipleCmdlistBarriersFlag =
75
+ const char *UrRet = std::getenv (" UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS" );
76
+ const char *PiRet =
69
77
std::getenv (" SYCL_PI_LEVEL_ZERO_USE_MULTIPLE_COMMANDLIST_BARRIERS" );
78
+ const char *UseMultipleCmdlistBarriersFlag =
79
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr );
80
+
70
81
if (!UseMultipleCmdlistBarriersFlag)
71
82
return true ;
72
83
return std::stoi (UseMultipleCmdlistBarriersFlag) > 0 ;
@@ -75,8 +86,11 @@ static const bool UseMultipleCmdlistBarriers = [] {
75
86
// This is an experimental option that allows to disable caching of events in
76
87
// the context.
77
88
static const bool DisableEventsCaching = [] {
89
+ const char *UrRet = std::getenv (" UR_L0_DISABLE_EVENTS_CACHING" );
90
+ const char *PiRet = std::getenv (" SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING" );
78
91
const char *DisableEventsCachingFlag =
79
- std::getenv (" SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING" );
92
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr );
93
+
80
94
if (!DisableEventsCachingFlag)
81
95
return false ;
82
96
return std::stoi (DisableEventsCachingFlag) != 0 ;
@@ -85,8 +99,11 @@ static const bool DisableEventsCaching = [] {
85
99
// This is an experimental option that allows reset and reuse of uncompleted
86
100
// events in the in-order queue with discard_events property.
87
101
static const bool ReuseDiscardedEvents = [] {
102
+ const char *UrRet = std::getenv (" UR_L0_REUSE_DISCARDED_EVENTS" );
103
+ const char *PiRet = std::getenv (" SYCL_PI_LEVEL_ZERO_REUSE_DISCARDED_EVENTS" );
88
104
const char *ReuseDiscardedEventsFlag =
89
- std::getenv (" SYCL_PI_LEVEL_ZERO_REUSE_DISCARDED_EVENTS" );
105
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr );
106
+
90
107
if (!ReuseDiscardedEventsFlag)
91
108
return true ;
92
109
return std::stoi (ReuseDiscardedEventsFlag) > 0 ;
@@ -95,8 +112,11 @@ static const bool ReuseDiscardedEvents = [] {
95
112
// Due to a bug with 2D memory copy to and from non-USM pointers, this option is
96
113
// disabled by default.
97
114
static const bool UseMemcpy2DOperations = [] {
115
+ const char *UrRet = std::getenv (" UR_L0_USE_NATIVE_USM_MEMCPY2D" );
116
+ const char *PiRet = std::getenv (" SYCL_PI_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D" );
98
117
const char *UseMemcpy2DOperationsFlag =
99
- std::getenv (" SYCL_PI_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D" );
118
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr );
119
+
100
120
if (!UseMemcpy2DOperationsFlag)
101
121
return false ;
102
122
return std::stoi (UseMemcpy2DOperationsFlag) > 0 ;
@@ -130,16 +150,21 @@ static inline pi_result mapError(ze_result_t Result) {
130
150
// paths be less likely affected.
131
151
//
132
152
static bool doEagerInit = [] {
133
- const char *EagerInit = std::getenv (" SYCL_EAGER_INIT" );
153
+ const char *UrRet = std::getenv (" UR_L0_EAGER_INIT" );
154
+ const char *PiRet = std::getenv (" SYCL_EAGER_INIT" );
155
+ const char *EagerInit = UrRet ? UrRet : (PiRet ? PiRet : nullptr );
134
156
return EagerInit ? std::atoi (EagerInit) != 0 : false ;
135
157
}();
136
158
137
159
// Maximum number of events that can be present in an event ZePool is captured
138
160
// here. Setting it to 256 gave best possible performance for several
139
161
// benchmarks.
140
162
static const pi_uint32 MaxNumEventsPerPool = [] {
141
- const auto MaxNumEventsPerPoolEnv =
142
- std::getenv (" ZE_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL" );
163
+ const char *UrRet = std::getenv (" UR_L0_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL" );
164
+ const char *PiRet = std::getenv (" ZE_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL" );
165
+ const char *MaxNumEventsPerPoolEnv =
166
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr );
167
+
143
168
pi_uint32 Result =
144
169
MaxNumEventsPerPoolEnv ? std::atoi (MaxNumEventsPerPoolEnv) : 256 ;
145
170
if (Result <= 0 )
@@ -177,16 +202,18 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle) {
177
202
178
203
} // anonymous namespace
179
204
180
- // SYCL_PI_LEVEL_ZERO_USE_COMPUTE_ENGINE can be set to an integer (>=0) in
205
+ // UR_L0_LEVEL_ZERO_USE_COMPUTE_ENGINE can be set to an integer (>=0) in
181
206
// which case all compute commands will be submitted to the command-queue
182
207
// with the given index in the compute command group. If it is instead set
183
208
// to negative then all available compute engines may be used.
184
209
//
185
210
// The default value is "0".
186
211
//
187
212
static const std::pair<int , int > getRangeOfAllowedComputeEngines () {
188
- static const char *EnvVar =
189
- std::getenv (" SYCL_PI_LEVEL_ZERO_USE_COMPUTE_ENGINE" );
213
+ const char *UrRet = std::getenv (" UR_L0_USE_COMPUTE_ENGINE" );
214
+ const char *PiRet = std::getenv (" SYCL_PI_LEVEL_ZERO_USE_COMPUTE_ENGINE" );
215
+ const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr );
216
+
190
217
// If the environment variable is not set only use "0" CCS for now.
191
218
// TODO: allow all CCSs when HW support is complete.
192
219
if (!EnvVar)
@@ -466,8 +493,13 @@ pi_result _pi_queue::addEventToQueueCache(pi_event Event) {
466
493
// If number of events in the immediate command list exceeds this threshold then
467
494
// cleanup process for those events is executed.
468
495
static const size_t ImmCmdListsEventCleanupThreshold = [] {
469
- const char *ImmCmdListsEventCleanupThresholdStr = std::getenv (
496
+ const char *UrRet =
497
+ std::getenv (" UR_L0_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD" );
498
+ const char *PiRet = std::getenv (
470
499
" SYCL_PI_LEVEL_ZERO_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD" );
500
+ const char *ImmCmdListsEventCleanupThresholdStr =
501
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr );
502
+
471
503
static constexpr int Default = 1000 ;
472
504
if (!ImmCmdListsEventCleanupThresholdStr)
473
505
return Default;
@@ -484,8 +516,12 @@ static const size_t ImmCmdListsEventCleanupThreshold = [] {
484
516
// Get value of the threshold for number of active command lists allowed before
485
517
// we start heuristically cleaning them up.
486
518
static const size_t CmdListsCleanupThreshold = [] {
487
- const char *CmdListsCleanupThresholdStr =
519
+ const char *UrRet = std::getenv (" UR_L0_COMMANDLISTS_CLEANUP_THRESHOLD" );
520
+ const char *PiRet =
488
521
std::getenv (" SYCL_PI_LEVEL_ZERO_COMMANDLISTS_CLEANUP_THRESHOLD" );
522
+ const char *CmdListsCleanupThresholdStr =
523
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr );
524
+
489
525
static constexpr int Default = 20 ;
490
526
if (!CmdListsCleanupThresholdStr)
491
527
return Default;
@@ -826,9 +862,17 @@ static const zeCommandListBatchConfig ZeCommandListBatchConfig(bool IsCopy) {
826
862
zeCommandListBatchConfig Config{}; // default initialize
827
863
828
864
// Default value of 0. This specifies to use dynamic batch size adjustment.
829
- const auto BatchSizeStr =
830
- (IsCopy) ? std::getenv (" SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE" )
831
- : std::getenv (" SYCL_PI_LEVEL_ZERO_BATCH_SIZE" );
865
+ const char *UrRet = nullptr ;
866
+ const char *PiRet = nullptr ;
867
+ if (IsCopy) {
868
+ UrRet = std::getenv (" UR_L0_COPY_BATCH_SIZE" );
869
+ PiRet = std::getenv (" SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE" );
870
+ } else {
871
+ UrRet = std::getenv (" UR_L0_BATCH_SIZE" );
872
+ PiRet = std::getenv (" SYCL_PI_LEVEL_ZERO_BATCH_SIZE" );
873
+ }
874
+ const char *BatchSizeStr = UrRet ? UrRet : (PiRet ? PiRet : nullptr );
875
+
832
876
if (BatchSizeStr) {
833
877
pi_int32 BatchSizeStrVal = std::atoi (BatchSizeStr);
834
878
// Level Zero may only support a limted number of commands per command
@@ -861,10 +905,9 @@ static const zeCommandListBatchConfig ZeCommandListBatchConfig(bool IsCopy) {
861
905
Val = std::stoi (BatchConfig.substr (Pos));
862
906
} catch (...) {
863
907
if (IsCopy)
864
- urPrint (
865
- " SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE: failed to parse value\n " );
908
+ urPrint (" UR_L0_COPY_BATCH_SIZE: failed to parse value\n " );
866
909
else
867
- urPrint (" SYCL_PI_LEVEL_ZERO_BATCH_SIZE : failed to parse value\n " );
910
+ urPrint (" UR_L0_BATCH_SIZE : failed to parse value\n " );
868
911
break ;
869
912
}
870
913
switch (Ord) {
@@ -887,21 +930,20 @@ static const zeCommandListBatchConfig ZeCommandListBatchConfig(bool IsCopy) {
887
930
die (" Unexpected batch config" );
888
931
}
889
932
if (IsCopy)
890
- urPrint (" SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE : dynamic batch param "
933
+ urPrint (" UR_L0_COPY_BATCH_SIZE : dynamic batch param "
891
934
" #%d: %d\n " ,
892
935
(int )Ord, (int )Val);
893
936
else
894
- urPrint (
895
- " SYCL_PI_LEVEL_ZERO_BATCH_SIZE: dynamic batch param #%d: %d\n " ,
896
- (int )Ord, (int )Val);
937
+ urPrint (" UR_L0_BATCH_SIZE: dynamic batch param #%d: %d\n " , (int )Ord,
938
+ (int )Val);
897
939
};
898
940
899
941
} else {
900
942
// Negative batch sizes are silently ignored.
901
943
if (IsCopy)
902
- urPrint (" SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE : ignored negative value\n " );
944
+ urPrint (" UR_L0_COPY_BATCH_SIZE : ignored negative value\n " );
903
945
else
904
- urPrint (" SYCL_PI_LEVEL_ZERO_BATCH_SIZE : ignored negative value\n " );
946
+ urPrint (" UR_L0_BATCH_SIZE : ignored negative value\n " );
905
947
}
906
948
}
907
949
return Config;
@@ -922,7 +964,10 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] {
922
964
// Temporarily check whether immediate command list env var has been set. This
923
965
// affects default behavior of make_queue API.
924
966
static const bool ImmediateCommandlistEnvVarIsSet = [] {
925
- return std::getenv (" SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS" );
967
+ const char *UrRet = std::getenv (" UR_L0_USE_IMMEDIATE_COMMANDLISTS" );
968
+ const char *PiRet =
969
+ std::getenv (" SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS" );
970
+ return (UrRet ? std::stoi (UrRet) : (PiRet ? std::stoi (PiRet) : 0 ));
926
971
}();
927
972
928
973
_pi_queue::_pi_queue (std::vector<ze_command_queue_handle_t > &ComputeQueues,
@@ -1893,9 +1938,9 @@ pi_result _pi_queue::executeOpenCommandList(bool IsCopy) {
1893
1938
}
1894
1939
1895
1940
static const bool FilterEventWaitList = [] {
1896
- const char *Ret = std::getenv (" SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST " );
1897
- const bool RetVal = Ret ? std::stoi (Ret) : 0 ;
1898
- return RetVal ;
1941
+ const char *UrRet = std::getenv (" UR_L0_FILTER_EVENT_WAIT_LIST " );
1942
+ const char *PiRet = std::getenv ( " SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST " ) ;
1943
+ return (UrRet ? std::stoi (UrRet) : (PiRet ? std::stoi (PiRet) : 0 )) ;
1899
1944
}();
1900
1945
1901
1946
pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList (
@@ -2398,7 +2443,7 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName,
2398
2443
return ReturnValue (pi_uint32{Context->RefCount .load ()});
2399
2444
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT:
2400
2445
// 2D USM memcpy is supported unless disabled through
2401
- // SYCL_PI_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D .
2446
+ // UR_L0_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D .
2402
2447
return ReturnValue (pi_bool{UseMemcpy2DOperations});
2403
2448
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT:
2404
2449
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT:
@@ -2901,8 +2946,12 @@ pi_result piQueueFinish(pi_queue Queue) {
2901
2946
// TODO: this currently exhibits some issues in the driver, so
2902
2947
// we control this with an env var. Remove this control when
2903
2948
// we settle one way or the other.
2904
- static bool HoldLock =
2905
- std::getenv (" SYCL_PI_LEVEL_ZERO_QUEUE_FINISH_HOLD_LOCK" ) != nullptr ;
2949
+ const char *UrRet = std::getenv (" UR_L0_QUEUE_FINISH_HOLD_LOCK" );
2950
+ const char *PiRet =
2951
+ std::getenv (" SYCL_PI_LEVEL_ZERO_QUEUE_FINISH_HOLD_LOCK" );
2952
+ const bool HoldLock =
2953
+ UrRet ? std::stoi (UrRet) : (PiRet ? std::stoi (PiRet) : 0 );
2954
+
2906
2955
if (!HoldLock) {
2907
2956
Lock.unlock ();
2908
2957
}
@@ -5793,7 +5842,7 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
5793
5842
// If we have a list of events to make the barrier from, then we can create a
5794
5843
// barrier on these and use the resulting event as our future barrier.
5795
5844
// We use the same approach if
5796
- // SYCL_PI_LEVEL_ZERO_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a
5845
+ // UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a
5797
5846
// positive value.
5798
5847
// We use the same approach if we have in-order queue because every command
5799
5848
// depends on previous one, so we don't need to insert barrier to multiple
@@ -6346,8 +6395,10 @@ pi_result piEnqueueMemBufferCopyRect(
6346
6395
// Default to using compute engine for fill operation, but allow to
6347
6396
// override this with an environment variable.
6348
6397
static bool PreferCopyEngine = [] {
6349
- const char *Env = std::getenv (" SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_FILL" );
6350
- return Env ? std::stoi (Env) != 0 : false ;
6398
+ const char *UrRet = std::getenv (" UR_L0_USE_COPY_ENGINE_FOR_FILL" );
6399
+ const char *PiRet =
6400
+ std::getenv (" SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_FILL" );
6401
+ return (UrRet ? std::stoi (UrRet) : (PiRet ? std::stoi (PiRet) : 0 ));
6351
6402
}();
6352
6403
6353
6404
// PI interfaces must have queue's and buffer's mutexes locked on entry.
@@ -7188,7 +7239,10 @@ enum class USMAllocationForceResidencyType {
7188
7239
7189
7240
// Returns the desired USM residency setting
7190
7241
static USMAllocationForceResidencyType USMAllocationForceResidency = [] {
7191
- const auto Str = std::getenv (" SYCL_PI_LEVEL_ZERO_USM_RESIDENT" );
7242
+ const char *UrRet = std::getenv (" UR_L0_USM_RESIDENT" );
7243
+ const char *PiRet = std::getenv (" SYCL_PI_LEVEL_ZERO_USM_RESIDENT" );
7244
+ const char *Str = UrRet ? UrRet : (PiRet ? PiRet : nullptr );
7245
+
7192
7246
if (!Str)
7193
7247
return USMAllocationForceResidencyType::P2PDevices;
7194
7248
switch (std::atoi (Str)) {
@@ -8599,8 +8653,12 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
8599
8653
// cross-tile traffic.
8600
8654
//
8601
8655
static const bool SingleRootDeviceBufferMigration = [] {
8602
- const char *EnvStr =
8656
+ const char *UrRet =
8657
+ std::getenv (" UR_L0_SINGLE_ROOT_DEVICE_BUFFER_MIGRATION" );
8658
+ const char *PiRet =
8603
8659
std::getenv (" SYCL_PI_LEVEL_ZERO_SINGLE_ROOT_DEVICE_BUFFER_MIGRATION" );
8660
+ const char *EnvStr = UrRet ? UrRet : (PiRet ? PiRet : nullptr );
8661
+
8604
8662
if (EnvStr)
8605
8663
return (std::stoi (EnvStr) != 0 );
8606
8664
// The default is to migrate normally, which may not always be the
0 commit comments