@@ -34,7 +34,7 @@ func.func @main() {
34
34
%out_1 = memref.get_global @bufferRhsGlobal : memref <8 x128 xf32 , #gpu.address_space <workgroup >>
35
35
36
36
// CHECK: %[[B:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>
37
- // CHECK: nvgpu.mbarrier.init %[[B]], %{{.*}} : <memorySpace = #gpu.address_space<workgroup>
37
+ // CHECK: nvgpu.mbarrier.init %[[B]][%{{.*}}] , %{{.*}} : <memorySpace = #gpu.address_space<workgroup>
38
38
// CHECK: gpu.barrier
39
39
//
40
40
// CHECK: %[[c0:.*]] = arith.constant 0 : index
@@ -44,27 +44,27 @@ func.func @main() {
44
44
// CHECK: scf.if %[[CMP]] {
45
45
//
46
46
// CHECK: %[[c0_7:.*]] = arith.constant 0 : index
47
- // CHECK: nvgpu.tma.async.load %[[D1]][%[[c0_7]], %[[c0_7]]], %[[B]] to %[[G1]]
47
+ // CHECK: nvgpu.tma.async.load %[[D1]][%[[c0_7]], %[[c0_7]]], %[[B]][%{{.*}}] to %[[G1]]
48
48
// CHECK-SAME: : <tensor = memref<64x8xf32, #gpu.address_space<workgroup>>,
49
49
// CHECK-SAME: swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>
50
50
// CHECK-SAME: -> memref<64x8xf32, #gpu.address_space<workgroup>>
51
51
//
52
52
// CHECK: %[[c0_8:.*]] = arith.constant 0 : index
53
- // CHECK: nvgpu.tma.async.load %[[D2]][%[[c0_8]], %[[c0_8]]], %[[B]] to %[[G2]]
53
+ // CHECK: nvgpu.tma.async.load %[[D2]][%[[c0_8]], %[[c0_8]]], %[[B]][%{{.*}}] to %[[G2]]
54
54
// CHECK-SAME: : <tensor = memref<8x128xf32, #gpu.address_space<workgroup>>,
55
55
// CHECK-SAME: swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>
56
56
// CHECK-SAME: -> memref<8x128xf32, #gpu.address_space<workgroup>>
57
57
//
58
58
// CHECK: %[[c6144:.*]] = arith.constant 6144 : index
59
- // CHECK: nvgpu.mbarrier.arrive.expect_tx %[[B]], %[[c6144]] : <memorySpace = #gpu.address_space<workgroup>
59
+ // CHECK: nvgpu.mbarrier.arrive.expect_tx %[[B]][%{{.*}}] , %[[c6144]] : <memorySpace = #gpu.address_space<workgroup>
60
60
// CHECK: } else {
61
61
// CHECK: %[[c0_7:.*]] = arith.constant 0 : index
62
- // CHECK: nvgpu.mbarrier.arrive.expect_tx %[[B]], %[[c0_7]] : <memorySpace = #gpu.address_space<workgroup>
62
+ // CHECK: nvgpu.mbarrier.arrive.expect_tx %[[B]][%{{.*}}] , %[[c0_7]] : <memorySpace = #gpu.address_space<workgroup>
63
63
// CHECK: }
64
64
//
65
65
// CHECK: %[[c0_6:.*]] = arith.constant 0 : index
66
66
// CHECK: %[[c10000000:.*]] = arith.constant 10000000 : index
67
- // CHECK: nvgpu.mbarrier.try_wait.parity %[[B]], %[[c0_6]], %[[c10000000]] : <memorySpace = #gpu.address_space<workgroup>
67
+ // CHECK: nvgpu.mbarrier.try_wait.parity %[[B]][%{{.*}}] , %[[c0_6]], %[[c10000000]] : <memorySpace = #gpu.address_space<workgroup>
68
68
69
69
/// Both copies are matched and end up in the same async group.
70
70
linalg.copy ins (%memref: memref <64 x8 xf32 >) outs (%out: memref <64 x8 xf32 , #gpu.address_space <workgroup >>)
0 commit comments