intel · martygrant · May 23, 2025 · May 21, 2025
@@ -49,7 +49,7 @@ def git_url(self) -> str:
         return "https://github.com/intel/compute-benchmarks.git"
 
     def git_hash(self) -> str:
-        return "49a8c6314875c57fee9b59aea16e721572e3021d"
+        return "3283b5edb8bf771c519625af741b5db7a37b0111"
 
     def setup(self):
         if options.sycl is None:
@@ -196,11 +196,12 @@ def benchmarks(self) -> list[Benchmark]:
         # Add UR-specific benchmarks
         if options.ur is not None:
             benches += [
-                MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1, 1),
-                MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1, 1),
-                MemcpyExecute(self, 100, 4, 102400, 10, 1, 1, 0, 1),
-                MemcpyExecute(self, 100, 4, 102400, 10, 1, 1, 0, 0),
-                MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0, 1),
+                MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 1, 1, 1, 1, 0),
+                MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 0, 1, 1, 1, 0),
+                MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 1, 0),
+                MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 0, 0),
+                MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 0),
+                MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 1),
                 UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256, "Both"),
                 UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256 * 1024, "Both"),
                 UsmBatchMemoryAllocation(self, RUNTIMES.UR, "Device", 128, 256, "Both"),
@@ -211,6 +212,20 @@ def benchmarks(self) -> list[Benchmark]:
                     self, RUNTIMES.UR, "Device", 128, 128 * 1024, "Both"
                 ),
             ]
+        benches += [
+            MemcpyExecute(
+                self, RUNTIMES.SYCL_PREVIEW, 4096, 1, 1024, 40, 1, 1, 0, 1, 0
+            ),
+            MemcpyExecute(
+                self, RUNTIMES.SYCL_PREVIEW, 4096, 1, 1024, 40, 1, 1, 0, 1, 1
+            ),
+            MemcpyExecute(
+                self, RUNTIMES.SYCL_PREVIEW, 4096, 4, 1024, 10, 1, 1, 0, 1, 0
+            ),
+            MemcpyExecute(
+                self, RUNTIMES.SYCL_PREVIEW, 4096, 4, 1024, 10, 1, 1, 0, 1, 1
+            ),
+        ]
 
         return benches
 
@@ -533,6 +548,7 @@ class MemcpyExecute(ComputeBenchmark):
     def __init__(
         self,
         bench,
+        runtime: RUNTIMES,
         numOpsPerThread,
         numThreads,
         allocSize,
@@ -541,7 +557,9 @@ def __init__(
         dstUSM,
         useEvent,
         useCopyOffload,
+        useBarrier,
     ):
+        self.runtime = runtime
         self.numOpsPerThread = numOpsPerThread
         self.numThreads = numThreads
         self.allocSize = allocSize
@@ -550,7 +568,10 @@ def __init__(
         self.dstUSM = dstUSM
         self.useEvents = useEvent
         self.useCopyOffload = useCopyOffload
-        super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute")
+        self.useBarrier = useBarrier
+        super().__init__(
+            bench, f"multithread_benchmark_{self.runtime.value}", "MemcpyExecute"
+        )
 
     def extra_env_vars(self) -> dict:
         if not self.useCopyOffload:
@@ -560,9 +581,10 @@ def extra_env_vars(self) -> dict:
 
     def name(self):
         return (
-            f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}"
+            f"multithread_benchmark_{self.runtime.value} MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}"
             + (" without events" if not self.useEvents else "")
             + (" without copy offload" if not self.useCopyOffload else "")
+            + (" with barrier" if self.useBarrier else "")
         )
 
     def explicit_group(self):
@@ -571,21 +593,25 @@ def explicit_group(self):
             + str(self.numOpsPerThread)
             + " numThreads: "
             + str(self.numThreads)
+            + " allocSize: "
+            + str(self.allocSize)
         )
 
     def description(self) -> str:
         src_type = "device" if self.srcUSM == 1 else "host"
         dst_type = "device" if self.dstUSM == 1 else "host"
         events = "with" if self.useEvents else "without"
         copy_offload = "with" if self.useCopyOffload else "without"
+        with_barrier = "with" if self.useBarrier else "without"
         return (
             f"Measures multithreaded memory copy performance with {self.numThreads} threads "
             f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes "
-            f"from {src_type} to {dst_type} memory {events} events {copy_offload} driver copy offload."
+            f"from {src_type} to {dst_type} memory {events} events {copy_offload} driver copy offload "
+            f"{with_barrier} barrier. "
         )
 
     def get_tags(self):
-        return ["memory", "latency", "UR", "micro"]
+        return ["memory", "latency", runtime_to_tag_name(self.runtime), "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -599,6 +625,7 @@ def bin_args(self) -> list[str]:
             f"--iterations={self.iterations}",
             f"--SrcUSM={self.srcUSM}",
             f"--DstUSM={self.dstUSM}",
+            f"--UseBarrier={self.useBarrier}",
         ]