From 194946241541917b4c3279d64f12dd2624e3c6c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Wed, 6 Aug 2025 15:55:17 -0700 Subject: [PATCH] =?UTF-8?q?Revert=20"[flang][cuda]=20Set=20the=20allocator?= =?UTF-8?q?=20of=20derived=20type=20component=20after=20alloc=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d897355876287e410d35f1f0ac74d79955d50dd4. --- flang/include/flang/Lower/{CUDA.h => Cuda.h} | 12 +- flang/lib/Lower/Allocatable.cpp | 14 +- flang/lib/Lower/Bridge.cpp | 2 +- flang/lib/Lower/CMakeLists.txt | 1 - flang/lib/Lower/CUDA.cpp | 148 ------------------- flang/lib/Lower/ConvertVariable.cpp | 75 ++++++++-- flang/test/Lower/CUDA/cuda-set-allocator.cuf | 42 ++---- 7 files changed, 89 insertions(+), 205 deletions(-) rename flang/include/flang/Lower/{CUDA.h => Cuda.h} (74%) delete mode 100644 flang/lib/Lower/CUDA.cpp diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/Cuda.h similarity index 74% rename from flang/include/flang/Lower/CUDA.h rename to flang/include/flang/Lower/Cuda.h index 6aad06e150ad1..b6f849e3d63f0 100644 --- a/flang/include/flang/Lower/CUDA.h +++ b/flang/include/flang/Lower/Cuda.h @@ -1,4 +1,4 @@ -//===-- Lower/CUDA.h -- CUDA Fortran utilities ------------------*- C++ -*-===// +//===-- Lower/Cuda.h -- Cuda Fortran utilities ------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -15,7 +15,6 @@ #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" -#include "flang/Runtime/allocator-registry-consts.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/OpenACC/OpenACC.h" @@ -38,15 +37,6 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) { return kDefaultAllocator; } -void initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box); - -mlir::Type gatherDeviceComponentCoordinatesAndType( - fir::FirOpBuilder &builder, mlir::Location loc, - const Fortran::semantics::Symbol &sym, fir::RecordType recTy, - llvm::SmallVector &coordinates); - } // end namespace Fortran::lower #endif // FORTRAN_LOWER_CUDA_H diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index ce9d8944387e1..219f9205f45d5 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -13,9 +13,9 @@ #include "flang/Lower/Allocatable.h" #include "flang/Evaluate/tools.h" #include "flang/Lower/AbstractConverter.h" -#include "flang/Lower/CUDA.h" #include "flang/Lower/ConvertType.h" #include "flang/Lower/ConvertVariable.h" +#include "flang/Lower/Cuda.h" #include "flang/Lower/IterationSpace.h" #include "flang/Lower/Mangler.h" #include "flang/Lower/OpenACC.h" @@ -445,14 +445,10 @@ class AllocateStmtHelper { /*mustBeHeap=*/true); } - void postAllocationAction(const Allocation &alloc, - const fir::MutableBoxValue &box) { + void postAllocationAction(const Allocation &alloc) { if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare)) Fortran::lower::attachDeclarePostAllocAction(converter, builder, alloc.getSymbol()); - if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol())) - Fortran::lower::initializeDeviceComponentAllocator( - converter, alloc.getSymbol(), box); } void setPinnedToFalse() { @@ -485,7 +481,7 @@ class AllocateStmtHelper { // Pointers must use PointerAllocate so that their deallocations // can be validated. genInlinedAllocation(alloc, box); - postAllocationAction(alloc, box); + postAllocationAction(alloc); setPinnedToFalse(); return; } @@ -508,7 +504,7 @@ class AllocateStmtHelper { genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol()); } fir::factory::syncMutableBoxFromIRBox(builder, loc, box); - postAllocationAction(alloc, box); + postAllocationAction(alloc); errorManager.assignStat(builder, loc, stat); } @@ -651,7 +647,7 @@ class AllocateStmtHelper { setPinnedToFalse(); } fir::factory::syncMutableBoxFromIRBox(builder, loc, box); - postAllocationAction(alloc, box); + postAllocationAction(alloc); errorManager.assignStat(builder, loc, stat); } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 1bad46f80612b..6b7efe6b57db3 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -13,7 +13,6 @@ #include "flang/Lower/Bridge.h" #include "flang/Lower/Allocatable.h" -#include "flang/Lower/CUDA.h" #include "flang/Lower/CallInterface.h" #include "flang/Lower/Coarray.h" #include "flang/Lower/ConvertCall.h" @@ -21,6 +20,7 @@ #include "flang/Lower/ConvertExprToHLFIR.h" #include "flang/Lower/ConvertType.h" #include "flang/Lower/ConvertVariable.h" +#include "flang/Lower/Cuda.h" #include "flang/Lower/DirectivesCommon.h" #include "flang/Lower/HostAssociations.h" #include "flang/Lower/IO.h" diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index 1d1c7ddda8e9b..8e20abf0e9f2d 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -15,7 +15,6 @@ add_flang_library(FortranLower ConvertProcedureDesignator.cpp ConvertType.cpp ConvertVariable.cpp - CUDA.cpp CustomIntrinsicCall.cpp HlfirIntrinsics.cpp HostAssociations.cpp diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp deleted file mode 100644 index 60b33bb5680c2..0000000000000 --- a/flang/lib/Lower/CUDA.cpp +++ /dev/null @@ -1,148 +0,0 @@ -//===-- CUDA.cpp -- CUDA Fortran specific lowering ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ -// -//===----------------------------------------------------------------------===// - -#include "flang/Lower/CUDA.h" - -#define DEBUG_TYPE "flang-lower-cuda" - -void Fortran::lower::initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) { - if (const auto *details{ - sym.GetUltimate() - .detailsIf()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType()); - - // Only pointer and allocatable needs post allocation initialization - // of components descriptors. - if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy)) - return; - - // Extract the derived type. - mlir::Type ty = fir::getDerivedType(baseTy); - auto recTy = mlir::dyn_cast(ty); - assert(recTy && "expected fir::RecordType"); - - if (auto boxTy = mlir::dyn_cast(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr()); - mlir::Value addr; - if (auto seqTy = mlir::dyn_cast(baseTy)) { - mlir::Type idxTy = builder.getIndexType(); - mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); - mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); - llvm::SmallVector loops; - llvm::SmallVector indices; - llvm::SmallVector extents; - for (unsigned i = 0; i < seqTy.getDimension(); ++i) { - mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); - auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy, - idxTy, loadedBox, dim); - mlir::Value lbub = mlir::arith::AddIOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1)); - mlir::Value ext = - mlir::arith::SubIOp::create(builder, loc, lbub, one); - mlir::Value cmp = mlir::arith::CmpIOp::create( - builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero); - ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero); - extents.push_back(ext); - - auto loop = fir::DoLoopOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1), - dimInfo.getResult(2), /*isUnordered=*/true, - /*finalCount=*/false, mlir::ValueRange{}); - loops.push_back(loop); - indices.push_back(loop.getInductionVar()); - builder.setInsertionPointToStart(loop.getBody()); - } - mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox); - auto shape = fir::ShapeOp::create(builder, loc, extents); - addr = fir::ArrayCoorOp::create( - builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape, - /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{}); - } else { - addr = fir::BoxAddrOp::create(builder, loc, loadedBox); - } - for (const auto &compSym : components) { - if (Fortran::semantics::IsDeviceAllocatable(compSym)) { - llvm::SmallVector coord; - mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType( - builder, loc, compSym, recTy, coord); - assert(coord.size() == 1 && "expect one coordinate"); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), addr, coord[0]); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), compSym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - -mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType( - fir::FirOpBuilder &builder, mlir::Location loc, - const Fortran::semantics::Symbol &sym, fir::RecordType recTy, - llvm::SmallVector &coordinates) { - unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString()); - mlir::Type fieldTy; - if (fieldIdx != std::numeric_limits::max()) { - // Field found in the base record type. - auto fieldName = recTy.getTypeList()[fieldIdx].first; - fieldTy = recTy.getTypeList()[fieldIdx].second; - mlir::Value fieldIndex = fir::FieldIndexOp::create( - builder, loc, fir::FieldType::get(fieldTy.getContext()), fieldName, - recTy, - /*typeParams=*/mlir::ValueRange{}); - coordinates.push_back(fieldIndex); - } else { - // Field not found in base record type, search in potential - // record type components. - for (auto component : recTy.getTypeList()) { - if (auto childRecTy = mlir::dyn_cast(component.second)) { - fieldIdx = childRecTy.getFieldIndex(sym.name().ToString()); - if (fieldIdx != std::numeric_limits::max()) { - mlir::Value parentFieldIndex = fir::FieldIndexOp::create( - builder, loc, fir::FieldType::get(childRecTy.getContext()), - component.first, recTy, - /*typeParams=*/mlir::ValueRange{}); - coordinates.push_back(parentFieldIndex); - auto fieldName = childRecTy.getTypeList()[fieldIdx].first; - fieldTy = childRecTy.getTypeList()[fieldIdx].second; - mlir::Value childFieldIndex = fir::FieldIndexOp::create( - builder, loc, fir::FieldType::get(fieldTy.getContext()), - fieldName, childRecTy, - /*typeParams=*/mlir::ValueRange{}); - coordinates.push_back(childFieldIndex); - break; - } - } - } - } - if (coordinates.empty()) - TODO(loc, "device resident component in complex derived-type hierarchy"); - return fieldTy; -} diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 6a64320965095..a4a8a697e02ae 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -14,12 +14,12 @@ #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/Allocatable.h" #include "flang/Lower/BoxAnalyzer.h" -#include "flang/Lower/CUDA.h" #include "flang/Lower/CallInterface.h" #include "flang/Lower/ConvertConstant.h" #include "flang/Lower/ConvertExpr.h" #include "flang/Lower/ConvertExprToHLFIR.h" #include "flang/Lower/ConvertProcedureDesignator.h" +#include "flang/Lower/Cuda.h" #include "flang/Lower/Mangler.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" @@ -814,24 +814,81 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter, baseTy = boxTy.getEleTy(); baseTy = fir::unwrapRefType(baseTy); - if (fir::isAllocatableType(fir::getBase(exv).getType()) || - fir::isPointerType(fir::getBase(exv).getType())) + if (mlir::isa(baseTy) && + (fir::isAllocatableType(fir::getBase(exv).getType()) || + fir::isPointerType(fir::getBase(exv).getType()))) return; // Allocator index need to be set after allocation. auto recTy = mlir::dyn_cast(fir::unwrapSequenceType(baseTy)); assert(recTy && "expected fir::RecordType"); + llvm::SmallVector coordinates; Fortran::semantics::UltimateComponentIterator components{*derived}; for (const auto &sym : components) { if (Fortran::semantics::IsDeviceAllocatable(sym)) { - llvm::SmallVector coord; - mlir::Type fieldTy = - Fortran::lower::gatherDeviceComponentCoordinatesAndType( - builder, loc, sym, recTy, coord); + unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString()); + mlir::Type fieldTy; + llvm::SmallVector coordinates; + + if (fieldIdx != std::numeric_limits::max()) { + // Field found in the base record type. + auto fieldName = recTy.getTypeList()[fieldIdx].first; + fieldTy = recTy.getTypeList()[fieldIdx].second; + mlir::Value fieldIndex = fir::FieldIndexOp::create( + builder, loc, fir::FieldType::get(fieldTy.getContext()), + fieldName, recTy, + /*typeParams=*/mlir::ValueRange{}); + coordinates.push_back(fieldIndex); + } else { + // Field not found in base record type, search in potential + // record type components. + for (auto component : recTy.getTypeList()) { + if (auto childRecTy = + mlir::dyn_cast(component.second)) { + fieldIdx = childRecTy.getFieldIndex(sym.name().ToString()); + if (fieldIdx != std::numeric_limits::max()) { + mlir::Value parentFieldIndex = fir::FieldIndexOp::create( + builder, loc, + fir::FieldType::get(childRecTy.getContext()), + component.first, recTy, + /*typeParams=*/mlir::ValueRange{}); + coordinates.push_back(parentFieldIndex); + auto fieldName = childRecTy.getTypeList()[fieldIdx].first; + fieldTy = childRecTy.getTypeList()[fieldIdx].second; + mlir::Value childFieldIndex = fir::FieldIndexOp::create( + builder, loc, fir::FieldType::get(fieldTy.getContext()), + fieldName, childRecTy, + /*typeParams=*/mlir::ValueRange{}); + coordinates.push_back(childFieldIndex); + break; + } + } + } + } + + if (coordinates.empty()) + TODO(loc, "device resident component in complex derived-type " + "hierarchy"); + mlir::Value base = fir::getBase(exv); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), base, coord); + mlir::Value comp; + if (mlir::isa(fir::unwrapRefType(base.getType()))) { + mlir::Value box = fir::LoadOp::create(builder, loc, base); + mlir::Value addr = fir::BoxAddrOp::create(builder, loc, box); + llvm::SmallVector lenParams; + assert(coordinates.size() == 1 && "expect one coordinate"); + auto field = mlir::dyn_cast( + coordinates[0].getDefiningOp()); + comp = hlfir::DesignateOp::create( + builder, loc, builder.getRefType(fieldTy), addr, + /*component=*/field.getFieldName(), + /*componentShape=*/mlir::Value{}, + hlfir::DesignateOp::Subscripts{}); + } else { + comp = fir::CoordinateOp::create( + builder, loc, builder.getRefType(fieldTy), base, coordinates); + } cuf::DataAttributeAttr dataAttr = Fortran::lower::translateSymbolCUFDataAttribute( builder.getContext(), sym); diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf index d783f340fe9a4..e3bb181f65398 100644 --- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf +++ b/flang/test/Lower/CUDA/cuda-set-allocator.cuf @@ -23,44 +23,34 @@ contains subroutine sub2() type(ty_device), pointer :: d1 - allocate(d1) end subroutine ! CHECK-LABEL: func.func @_QMm1Psub2() ! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box>>,y:i32,z:!fir.box>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref>>,y:i32,z:!fir.box>>}>>>> ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref>>,y:i32,z:!fir.box>>}>>>>) -> (!fir.ref>>,y:i32,z:!fir.box>>}>>>>, !fir.ref>>,y:i32,z:!fir.box>>}>>>>) -! CHECK: cuf.allocate -! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.ptr>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref>>> {data_attr = #cuf.cuda} -! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref>>> {data_attr = #cuf.cuda} +! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> +! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.ptr>>,y:i32,z:!fir.box>>}>> +! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> +! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref>>> {data_attr = #cuf.cuda} +! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> +! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.ptr>>,y:i32,z:!fir.box>>}>> +! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> +! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref>>> {data_attr = #cuf.cuda} subroutine sub3() type(ty_device), allocatable :: d1 - allocate(d1) end subroutine ! CHECK-LABEL: func.func @_QMm1Psub3() ! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box>>,y:i32,z:!fir.box>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref>>,y:i32,z:!fir.box>>}>>>> ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref>>,y:i32,z:!fir.box>>}>>>>) -> (!fir.ref>>,y:i32,z:!fir.box>>}>>>>, !fir.ref>>,y:i32,z:!fir.box>>}>>>>) -! CHECK: cuf.allocate -! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.heap>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref>>> {data_attr = #cuf.cuda} -! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref>>> {data_attr = #cuf.cuda} - - subroutine sub4() - type(ty_device), allocatable :: d1(:,:) - allocate(d1(10, 10)) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub4() -! CHECK: cuf.allocate -! CHECK-COUNT-2: fir.do_loop -! CHECK-COUNT-2: cuf.set_allocator_idx +! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> +! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.heap>>,y:i32,z:!fir.box>>}>> +! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> +! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref>>> {data_attr = #cuf.cuda} +! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> +! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.heap>>,y:i32,z:!fir.box>>}>> +! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> +! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref>>> {data_attr = #cuf.cuda} end module