Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 1195b01

Browse files
committedMay 15, 2025·
[MLIR][OpenMP] Simplify OpenMP device codegen
After removing host operations from the device MLIR module, it is no longer necessary to provide special codegen logic to prevent these operations from causing compiler crashes or miscompilations. This patch removes these now unnecessary code paths to simplify codegen logic. Some MLIR tests are now replaced with Flang tests, since the responsibility of dealing with host operations has been moved earlier in the compilation flow. MLIR tests holding target device modules are updated to no longer include now unsupported host operations.
1 parent 035cac0 commit 1195b01

12 files changed

+409
-588
lines changed
 
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!REQUIRES: amdgpu-registered-target
10+
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
11+
12+
! CHECK-NOT: define void @nested_target_in_parallel
13+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
14+
subroutine nested_target_in_parallel(v)
15+
implicit none
16+
integer, intent(inout) :: v(10)
17+
18+
!$omp parallel
19+
!$omp target map(tofrom: v)
20+
!$omp end target
21+
!$omp end parallel
22+
end subroutine
23+
24+
! CHECK-NOT: define void @nested_target_in_wsloop
25+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_wsloop_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
26+
subroutine nested_target_in_wsloop(v)
27+
implicit none
28+
integer, intent(inout) :: v(10)
29+
integer :: i
30+
31+
!$omp do
32+
do i=1, 10
33+
!$omp target map(tofrom: v)
34+
!$omp end target
35+
end do
36+
end subroutine
37+
38+
! CHECK-NOT: define void @nested_target_in_parallel_with_private
39+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
40+
subroutine nested_target_in_parallel_with_private(v)
41+
implicit none
42+
integer, intent(inout) :: v(10)
43+
integer :: x
44+
x = 10
45+
46+
!$omp parallel firstprivate(x)
47+
!$omp target map(tofrom: v(1:x))
48+
!$omp end target
49+
!$omp end parallel
50+
end subroutine
51+
52+
! CHECK-NOT: define void @nested_target_in_task_with_private
53+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_task_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
54+
subroutine nested_target_in_task_with_private(v)
55+
implicit none
56+
integer, intent(inout) :: v(10)
57+
integer :: x
58+
x = 10
59+
60+
!$omp task firstprivate(x)
61+
!$omp target map(tofrom: v(1:x))
62+
!$omp end target
63+
!$omp end task
64+
end subroutine
65+
66+
! CHECK-NOT: define void @target_and_atomic_update
67+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_and_atomic_update_{{.*}}(ptr %{{.*}})
68+
subroutine target_and_atomic_update(x, expr)
69+
implicit none
70+
integer, intent(inout) :: x, expr
71+
72+
!$omp target
73+
!$omp end target
74+
75+
!$omp atomic update
76+
x = x + expr
77+
end subroutine
78+
79+
! CHECK-NOT: define void @nested_target_in_associate
80+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_associate_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
81+
subroutine nested_target_in_associate(x)
82+
integer, pointer, contiguous :: x(:)
83+
associate(y => x)
84+
!$omp target map(tofrom: y)
85+
!$omp end target
86+
end associate
87+
end subroutine
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!REQUIRES: amdgpu-registered-target
10+
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
11+
12+
! This tests the fix for https://github.com/llvm/llvm-project/issues/84606
13+
! We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
14+
15+
! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}QQmain{{.*}}({{.*}})
16+
program main
17+
implicit none
18+
integer, parameter :: N = 5
19+
integer, dimension(5) :: a
20+
integer :: i
21+
integer :: target_a = 0
22+
23+
!$omp task depend(out:a)
24+
do i = 1, N
25+
a(i) = i
26+
end do
27+
!$omp end task
28+
29+
!$omp target map(tofrom:target_a) map(tofrom:a)
30+
do i = 1, N
31+
target_a = target_a + i
32+
a(i) = a(i) + i
33+
end do
34+
!$omp end target
35+
print*, target_a
36+
print*, a
37+
end program main
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!REQUIRES: amdgpu-registered-target
10+
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
11+
12+
! The aim of this test is to verify host threadprivate directives do not cause
13+
! crashes during OpenMP target device codegen when used in conjunction with
14+
! target code in the same function.
15+
16+
! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]], ptr %[[ARG2:.*]]) #{{[0-9]+}} {
17+
! CHECK: %[[ALLOCA_X:.*]] = alloca ptr, align 8, addrspace(5)
18+
! CHECK: %[[ASCAST_X:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_X]] to ptr
19+
! CHECK: store ptr %[[ARG1]], ptr %[[ASCAST_X]], align 8
20+
21+
! CHECK: %[[ALLOCA_N:.*]] = alloca ptr, align 8, addrspace(5)
22+
! CHECK: %[[ASCAST_N:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_N]] to ptr
23+
! CHECK: store ptr %[[ARG2]], ptr %[[ASCAST_N]], align 8
24+
25+
! CHECK: %[[LOAD_X:.*]] = load ptr, ptr %[[ASCAST_X]], align 8
26+
! CHECK: call void @bar_(ptr %[[LOAD_X]], ptr %[[ASCAST_N]])
27+
28+
module test
29+
implicit none
30+
integer :: n
31+
!$omp threadprivate(n)
32+
33+
contains
34+
subroutine foo(x)
35+
integer, intent(inout) :: x(10)
36+
!$omp target map(tofrom: x(1:n))
37+
call bar(x, n)
38+
!$omp end target
39+
end subroutine
40+
end module

‎mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 159 additions & 264 deletions
Large diffs are not rendered by default.

‎mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,16 @@
33
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
44
llvm.func @_QQmain() attributes {bindc_name = "main"} {
55
%0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
6-
%1 = llvm.mlir.constant(10 : index) : i64
7-
%2 = llvm.mlir.constant(1 : index) : i64
8-
%3 = llvm.mlir.constant(0 : index) : i64
9-
%4 = llvm.mlir.constant(9 : index) : i64
10-
%5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%4 : i64) extent(%1 : i64) stride(%2 : i64) start_idx(%2 : i64)
11-
%6 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = "sp"}
12-
omp.target map_entries(%6 -> %arg0 : !llvm.ptr) {
13-
%7 = llvm.mlir.constant(20 : i32) : i32
14-
%8 = llvm.mlir.constant(0 : i64) : i64
15-
%9 = llvm.getelementptr %arg0[0, %8] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
16-
llvm.store %7, %9 : i32, !llvm.ptr
17-
%10 = llvm.mlir.constant(10 : i32) : i32
18-
%11 = llvm.mlir.constant(4 : i64) : i64
19-
%12 = llvm.getelementptr %arg0[0, %11] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
20-
llvm.store %10, %12 : i32, !llvm.ptr
6+
%1 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"}
7+
omp.target map_entries(%1 -> %arg0 : !llvm.ptr) {
8+
%2 = llvm.mlir.constant(20 : i32) : i32
9+
%3 = llvm.mlir.constant(0 : i64) : i64
10+
%4 = llvm.getelementptr %arg0[0, %3] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
11+
llvm.store %2, %4 : i32, !llvm.ptr
12+
%5 = llvm.mlir.constant(10 : i32) : i32
13+
%6 = llvm.mlir.constant(4 : i64) : i64
14+
%7 = llvm.getelementptr %arg0[0, %6] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
15+
llvm.store %5, %7 : i32, !llvm.ptr
2116
omp.terminator
2217
}
2318
llvm.return

‎mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,14 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
3030
llvm.func @test() {
3131
%0 = llvm.mlir.constant(1 : i64) : i64
3232
%1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr<5>
33-
%4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
33+
%2 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
3434
%ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
35-
%ascast2 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
36-
%6 = llvm.mlir.constant(9 : index) : i64
37-
%7 = llvm.mlir.constant(0 : index) : i64
38-
%8 = llvm.mlir.constant(1 : index) : i64
39-
%10 = llvm.mlir.constant(10 : index) : i64
40-
%11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
41-
%14 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
42-
%15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64)
43-
%16 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr
44-
%17 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
45-
omp.target map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
35+
%ascast2 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr
36+
%3 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
37+
%4 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
38+
%5 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
39+
%6 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
40+
omp.target map_entries(%4 -> %arg0, %5 -> %arg1, %6 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
4641
llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr
4742
llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr
4843
llvm.intr.dbg.declare #var_i = %arg2 : !llvm.ptr

‎mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir

Lines changed: 24 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,49 +7,36 @@
77
module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
88
omp.private {type = private} @_QFEk_private_i32 : i32
99
llvm.func @_QQmain() {
10-
%0 = llvm.mlir.constant(1 : i32) : i32
11-
%7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
12-
%8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
13-
%12 = llvm.mlir.constant(1 : i64) : i64
14-
%13 = llvm.alloca %12 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
15-
%14 = llvm.addrspacecast %13 : !llvm.ptr<5> to !llvm.ptr
16-
%15 = llvm.mlir.constant(1 : i64) : i64
17-
%16 = llvm.alloca %15 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
18-
%17 = llvm.addrspacecast %16 : !llvm.ptr<5> to !llvm.ptr
19-
%19 = llvm.mlir.constant(1 : index) : i64
20-
%20 = llvm.mlir.constant(0 : index) : i64
21-
%22 = llvm.mlir.addressof @_QFEa : !llvm.ptr
22-
%25 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
23-
%60 = llvm.getelementptr %8[0, 7, %20, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
24-
%61 = llvm.load %60 : !llvm.ptr -> i64
25-
%62 = llvm.getelementptr %8[0, 7, %20, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
26-
%63 = llvm.load %62 : !llvm.ptr -> i64
27-
%64 = llvm.getelementptr %8[0, 7, %20, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
28-
%65 = llvm.load %64 : !llvm.ptr -> i64
29-
%66 = llvm.sub %63, %19 : i64
30-
%67 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%66 : i64) extent(%63 : i64) stride(%65 : i64) start_idx(%61 : i64) {stride_in_bytes = true}
31-
%68 = llvm.getelementptr %22[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
32-
%69 = omp.map.info var_ptr(%22 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%68 : !llvm.ptr) bounds(%67) -> !llvm.ptr {name = ""}
33-
%70 = omp.map.info var_ptr(%22 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%69 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
34-
%71 = omp.map.info var_ptr(%17 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
35-
%72 = omp.map.info var_ptr(%14 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
36-
%73 = omp.map.info var_ptr(%25 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
37-
omp.target map_entries(%70 -> %arg0, %71 -> %arg1, %72 -> %arg2, %73 -> %arg3, %69 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
38-
%106 = llvm.mlir.constant(0 : index) : i64
39-
%107 = llvm.mlir.constant(13 : i32) : i32
40-
%108 = llvm.mlir.constant(1000 : i32) : i32
41-
%109 = llvm.mlir.constant(1 : i32) : i32
10+
%0 = llvm.mlir.constant(1 : i64) : i64
11+
%1 = llvm.alloca %0 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
12+
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
13+
%3 = llvm.mlir.constant(1 : i64) : i64
14+
%4 = llvm.alloca %3 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
15+
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
16+
%6 = llvm.mlir.addressof @_QFEa : !llvm.ptr
17+
%7 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
18+
%8 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
19+
%9 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr) -> !llvm.ptr {name = ""}
20+
%10 = omp.map.info var_ptr(%6 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
21+
%11 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
22+
%12 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
23+
%13 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
24+
omp.target map_entries(%10 -> %arg0, %11 -> %arg1, %12 -> %arg2, %13 -> %arg3, %9 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
25+
%14 = llvm.mlir.constant(0 : index) : i64
26+
%15 = llvm.mlir.constant(13 : i32) : i32
27+
%16 = llvm.mlir.constant(1000 : i32) : i32
28+
%17 = llvm.mlir.constant(1 : i32) : i32
4229
omp.teams {
4330
omp.parallel private(@_QFEk_private_i32 %arg2 -> %arg5 : !llvm.ptr) {
44-
%110 = llvm.mlir.constant(1 : i32) : i32
45-
%111 = llvm.alloca %110 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
46-
%112 = llvm.addrspacecast %111 : !llvm.ptr<5> to !llvm.ptr
31+
%18 = llvm.mlir.constant(1 : i32) : i32
32+
%19 = llvm.alloca %18 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
33+
%20 = llvm.addrspacecast %19 : !llvm.ptr<5> to !llvm.ptr
4734
omp.distribute {
4835
omp.wsloop {
49-
omp.loop_nest (%arg6) : i32 = (%109) to (%108) inclusive step (%109) {
36+
omp.loop_nest (%arg6) : i32 = (%17) to (%16) inclusive step (%17) {
5037
llvm.store %arg6, %arg5 : i32, !llvm.ptr
5138
%115 = llvm.mlir.constant(48 : i32) : i32
52-
"llvm.intr.memcpy"(%112, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
39+
"llvm.intr.memcpy"(%20, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
5340
omp.yield
5441
}
5542
} {omp.composite}

‎mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir

Lines changed: 0 additions & 43 deletions
This file was deleted.

‎mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir

Lines changed: 0 additions & 31 deletions
This file was deleted.

‎mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,48 @@ llvm.func @omp_threadprivate() {
8989
llvm.store %3, %5 : i32, !llvm.ptr
9090
llvm.return
9191
}
92+
93+
// -----
94+
95+
module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
96+
llvm.func @host_op_in_device(%arg0 : !llvm.ptr) {
97+
// expected-error @below {{unsupported host op found in device}}
98+
// expected-error @below {{LLVM Translation failed for operation: omp.threadprivate}}
99+
%0 = omp.threadprivate %arg0 : !llvm.ptr -> !llvm.ptr
100+
llvm.return
101+
}
102+
}
103+
104+
// -----
105+
106+
module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
107+
llvm.func @host_op_in_device_nested_target(%arg0 : !llvm.ptr) {
108+
// expected-error @below {{unsupported host op found in device}}
109+
// expected-error @below {{LLVM Translation failed for operation: omp.parallel}}
110+
omp.parallel {
111+
omp.target {
112+
omp.terminator
113+
}
114+
omp.terminator
115+
}
116+
llvm.return
117+
}
118+
}
119+
120+
// -----
121+
122+
module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
123+
llvm.func @host_op_in_device_sibling_target(%x: !llvm.ptr, %expr: i32) {
124+
omp.target {
125+
omp.terminator
126+
}
127+
// expected-error @below {{unsupported host op found in device}}
128+
// expected-error @below {{LLVM Translation failed for operation: omp.atomic.update}}
129+
omp.atomic.update %x : !llvm.ptr {
130+
^bb0(%xval: i32):
131+
%newval = llvm.add %xval, %expr : i32
132+
omp.yield(%newval : i32)
133+
}
134+
llvm.return
135+
}
136+
}

‎mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir

Lines changed: 0 additions & 160 deletions
This file was deleted.

‎mlir/test/Target/LLVMIR/openmp-task-target-device.mlir

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.