llvm · May 15, 2025
diff --git a/‎flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
Lines changed: 87 additions & 0 deletions b/‎flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
Lines changed: 87 additions & 0 deletions
diff --git a/‎flang/test/Integration/OpenMP/task-target-device.f90
Lines changed: 37 additions & 0 deletions b/‎flang/test/Integration/OpenMP/task-target-device.f90
Lines changed: 37 additions & 0 deletions
diff --git a/‎flang/test/Integration/OpenMP/threadprivate-target-device.f90
Lines changed: 40 additions & 0 deletions b/‎flang/test/Integration/OpenMP/threadprivate-target-device.f90
Lines changed: 40 additions & 0 deletions
diff --git a/‎mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Lines changed: 159 additions & 264 deletions b/‎mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Lines changed: 159 additions & 264 deletions
diff --git a/‎mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
Lines changed: 10 additions & 15 deletions b/‎mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
Lines changed: 10 additions & 15 deletions
diff --git a/‎mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
Lines changed: 7 additions & 12 deletions b/‎mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
Lines changed: 7 additions & 12 deletions
diff --git a/‎mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
Lines changed: 24 additions & 37 deletions b/‎mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
Lines changed: 24 additions & 37 deletions
diff --git a/‎mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
Lines changed: 0 additions & 43 deletions b/‎mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
Lines changed: 0 additions & 43 deletions
diff --git a/‎mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
Lines changed: 0 additions & 31 deletions b/‎mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
Lines changed: 0 additions & 31 deletions
diff --git a/‎mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
Lines changed: 45 additions & 0 deletions b/‎mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
Lines changed: 45 additions & 0 deletions
diff --git a/‎mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
Lines changed: 0 additions & 160 deletions b/‎mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
Lines changed: 0 additions & 160 deletions
diff --git a/‎mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
Lines changed: 0 additions & 26 deletions b/‎mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
Lines changed: 0 additions & 26 deletions
@@ -0,0 +1,87 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! CHECK-NOT: define void @nested_target_in_parallel
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_parallel(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+
+  !$omp parallel
+    !$omp target map(tofrom: v)
+    !$omp end target
+  !$omp end parallel
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_wsloop
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_wsloop_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_wsloop(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: i
+
+  !$omp do
+  do i=1, 10
+    !$omp target map(tofrom: v)
+    !$omp end target
+  end do
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_parallel_with_private
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_parallel_with_private(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: x
+  x = 10
+
+  !$omp parallel firstprivate(x)
+    !$omp target map(tofrom: v(1:x))
+    !$omp end target
+  !$omp end parallel
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_task_with_private
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_task_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_task_with_private(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: x
+  x = 10
+
+  !$omp task firstprivate(x)
+    !$omp target map(tofrom: v(1:x))
+    !$omp end target
+  !$omp end task
+end subroutine
+
+! CHECK-NOT: define void @target_and_atomic_update
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_and_atomic_update_{{.*}}(ptr %{{.*}})
+subroutine target_and_atomic_update(x, expr)
+  implicit none
+  integer, intent(inout) :: x, expr
+
+  !$omp target
+  !$omp end target
+
+  !$omp atomic update
+  x = x + expr
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_associate
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_associate_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_associate(x)
+  integer, pointer, contiguous :: x(:)
+  associate(y => x)
+    !$omp target map(tofrom: y)
+    !$omp end target
+  end associate
+end subroutine
@@ -0,0 +1,37 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! This tests the fix for https://github.com/llvm/llvm-project/issues/84606
+! We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
+
+! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}QQmain{{.*}}({{.*}})
+program main
+  implicit none
+  integer, parameter :: N = 5
+  integer, dimension(5) :: a
+  integer :: i
+  integer :: target_a = 0
+
+  !$omp task depend(out:a)
+  do i = 1, N
+    a(i) = i
+  end do
+  !$omp end task
+
+  !$omp target map(tofrom:target_a) map(tofrom:a)
+  do i = 1, N
+    target_a = target_a + i
+    a(i) = a(i) + i
+  end do
+  !$omp end target
+  print*, target_a
+  print*, a
+end program main
@@ -0,0 +1,40 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! The aim of this test is to verify host threadprivate directives do not cause
+! crashes during OpenMP target device codegen when used in conjunction with
+! target code in the same function.
+
+! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]], ptr %[[ARG2:.*]]) #{{[0-9]+}} {
+! CHECK:  %[[ALLOCA_X:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK:  %[[ASCAST_X:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_X]] to ptr
+! CHECK:  store ptr %[[ARG1]], ptr %[[ASCAST_X]], align 8
+
+! CHECK:  %[[ALLOCA_N:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK:  %[[ASCAST_N:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_N]] to ptr
+! CHECK:  store ptr %[[ARG2]], ptr %[[ASCAST_N]], align 8
+
+! CHECK:  %[[LOAD_X:.*]] = load ptr, ptr %[[ASCAST_X]], align 8
+! CHECK:  call void @bar_(ptr %[[LOAD_X]], ptr %[[ASCAST_N]])
+
+module test
+  implicit none
+  integer :: n
+  !$omp threadprivate(n)
+  
+  contains
+  subroutine foo(x)
+    integer, intent(inout) :: x(10)
+    !$omp target map(tofrom: x(1:n))
+      call bar(x, n)
+    !$omp end target
+  end subroutine
+end module
@@ -3,21 +3,16 @@
 module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {bindc_name = "main"} {
     %0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
-    %1 = llvm.mlir.constant(10 : index) : i64
-    %2 = llvm.mlir.constant(1 : index) : i64
-    %3 = llvm.mlir.constant(0 : index) : i64
-    %4 = llvm.mlir.constant(9 : index) : i64
-    %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%4 : i64) extent(%1 : i64) stride(%2 : i64) start_idx(%2 : i64)
-    %6 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = "sp"}
-    omp.target map_entries(%6 -> %arg0 : !llvm.ptr) {
-      %7 = llvm.mlir.constant(20 : i32) : i32
-      %8 = llvm.mlir.constant(0 : i64) : i64
-      %9 = llvm.getelementptr %arg0[0, %8] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
-      llvm.store %7, %9 : i32, !llvm.ptr
-      %10 = llvm.mlir.constant(10 : i32) : i32
-      %11 = llvm.mlir.constant(4 : i64) : i64
-      %12 = llvm.getelementptr %arg0[0, %11] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
-      llvm.store %10, %12 : i32, !llvm.ptr
+    %1 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"}
+    omp.target map_entries(%1 -> %arg0 : !llvm.ptr) {
+      %2 = llvm.mlir.constant(20 : i32) : i32
+      %3 = llvm.mlir.constant(0 : i64) : i64
+      %4 = llvm.getelementptr %arg0[0, %3] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
+      llvm.store %2, %4 : i32, !llvm.ptr
+      %5 = llvm.mlir.constant(10 : i32) : i32
+      %6 = llvm.mlir.constant(4 : i64) : i64
+      %7 = llvm.getelementptr %arg0[0, %6] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
+      llvm.store %5, %7 : i32, !llvm.ptr
       omp.terminator
     }
     llvm.return
 
@@ -30,19 +30,14 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
   llvm.func @test() {
     %0 = llvm.mlir.constant(1 : i64) : i64
     %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr<5>
-    %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
+    %2 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
     %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
-    %ascast2 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
-    %6 = llvm.mlir.constant(9 : index) : i64
-    %7 = llvm.mlir.constant(0 : index) : i64
-    %8 = llvm.mlir.constant(1 : index) : i64
-    %10 = llvm.mlir.constant(10 : index) : i64
-    %11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
-    %14 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
-    %15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64)
-    %16 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr
-    %17 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
-    omp.target map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+    %ascast2 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr
+    %3 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
+    %4 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+    %5 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+    %6 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
+    omp.target map_entries(%4 -> %arg0, %5 -> %arg1, %6 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
       llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr
       llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr
       llvm.intr.dbg.declare #var_i = %arg2 : !llvm.ptr
 
@@ -7,49 +7,36 @@
 module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
   omp.private {type = private} @_QFEk_private_i32 : i32
   llvm.func @_QQmain()  {
-    %0 = llvm.mlir.constant(1 : i32) : i32
-    %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
-    %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
-    %12 = llvm.mlir.constant(1 : i64) : i64
-    %13 = llvm.alloca %12 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
-    %14 = llvm.addrspacecast %13 : !llvm.ptr<5> to !llvm.ptr
-    %15 = llvm.mlir.constant(1 : i64) : i64
-    %16 = llvm.alloca %15 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
-    %17 = llvm.addrspacecast %16 : !llvm.ptr<5> to !llvm.ptr
-    %19 = llvm.mlir.constant(1 : index) : i64
-    %20 = llvm.mlir.constant(0 : index) : i64
-    %22 = llvm.mlir.addressof @_QFEa : !llvm.ptr
-    %25 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
-    %60 = llvm.getelementptr %8[0, 7, %20, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %61 = llvm.load %60 : !llvm.ptr -> i64
-    %62 = llvm.getelementptr %8[0, 7, %20, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %63 = llvm.load %62 : !llvm.ptr -> i64
-    %64 = llvm.getelementptr %8[0, 7, %20, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %65 = llvm.load %64 : !llvm.ptr -> i64
-    %66 = llvm.sub %63, %19 : i64
-    %67 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%66 : i64) extent(%63 : i64) stride(%65 : i64) start_idx(%61 : i64) {stride_in_bytes = true}
-    %68 = llvm.getelementptr %22[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %69 = omp.map.info var_ptr(%22 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%68 : !llvm.ptr) bounds(%67) -> !llvm.ptr {name = ""}
-    %70 = omp.map.info var_ptr(%22 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%69 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
-    %71 = omp.map.info var_ptr(%17 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
-    %72 = omp.map.info var_ptr(%14 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
-    %73 = omp.map.info var_ptr(%25 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
-    omp.target map_entries(%70 -> %arg0, %71 -> %arg1, %72 -> %arg2, %73 -> %arg3, %69 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-      %106 = llvm.mlir.constant(0 : index) : i64
-      %107 = llvm.mlir.constant(13 : i32) : i32
-      %108 = llvm.mlir.constant(1000 : i32) : i32
-      %109 = llvm.mlir.constant(1 : i32) : i32
+    %0 = llvm.mlir.constant(1 : i64) : i64
+    %1 = llvm.alloca %0 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
+    %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %3 = llvm.mlir.constant(1 : i64) : i64
+    %4 = llvm.alloca %3 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
+    %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
+    %6 = llvm.mlir.addressof @_QFEa : !llvm.ptr
+    %7 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
+    %8 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+    %9 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr) -> !llvm.ptr {name = ""}
+    %10 = omp.map.info var_ptr(%6 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
+    %11 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
+    %12 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
+    %13 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
+    omp.target map_entries(%10 -> %arg0, %11 -> %arg1, %12 -> %arg2, %13 -> %arg3, %9 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      %14 = llvm.mlir.constant(0 : index) : i64
+      %15 = llvm.mlir.constant(13 : i32) : i32
+      %16 = llvm.mlir.constant(1000 : i32) : i32
+      %17 = llvm.mlir.constant(1 : i32) : i32
       omp.teams {
         omp.parallel private(@_QFEk_private_i32 %arg2 -> %arg5 : !llvm.ptr) {
-          %110 = llvm.mlir.constant(1 : i32) : i32
-          %111 = llvm.alloca %110 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
-          %112 = llvm.addrspacecast %111 : !llvm.ptr<5> to !llvm.ptr
+          %18 = llvm.mlir.constant(1 : i32) : i32
+          %19 = llvm.alloca %18 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+          %20 = llvm.addrspacecast %19 : !llvm.ptr<5> to !llvm.ptr
           omp.distribute {
             omp.wsloop {
-              omp.loop_nest (%arg6) : i32 = (%109) to (%108) inclusive step (%109) {
+              omp.loop_nest (%arg6) : i32 = (%17) to (%16) inclusive step (%17) {
                 llvm.store %arg6, %arg5  : i32, !llvm.ptr
                 %115 = llvm.mlir.constant(48 : i32) : i32
-                "llvm.intr.memcpy"(%112, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+                "llvm.intr.memcpy"(%20, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
                 omp.yield
               }
             } {omp.composite}
 
@@ -89,3 +89,48 @@ llvm.func @omp_threadprivate() {
   llvm.store %3, %5 : i32, !llvm.ptr
   llvm.return
 }
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device(%arg0 : !llvm.ptr) {
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.threadprivate}}
+    %0 = omp.threadprivate %arg0 : !llvm.ptr -> !llvm.ptr
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device_nested_target(%arg0 : !llvm.ptr) {
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.parallel}}
+    omp.parallel {
+      omp.target {
+        omp.terminator
+      }
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device_sibling_target(%x: !llvm.ptr, %expr: i32) {
+    omp.target {
+      omp.terminator
+    }
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.atomic.update}}
+    omp.atomic.update %x : !llvm.ptr {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    llvm.return
+  }
+}