diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt index 7e0f8a145505e..c8ef0ef6f7e70 100644 --- a/llvm/lib/Target/DirectX/CMakeLists.txt +++ b/llvm/lib/Target/DirectX/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(DirectXCodeGen DirectXTargetMachine.cpp DirectXTargetTransformInfo.cpp DXContainerGlobals.cpp + DXILDataScalarization.cpp DXILFinalizeLinkage.cpp DXILIntrinsicExpansion.cpp DXILOpBuilder.cpp diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp new file mode 100644 index 0000000000000..0e6cf59e25750 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -0,0 +1,300 @@ +//===- DXILDataScalarization.cpp - Perform DXIL Data Legalization ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "DXILDataScalarization.h" +#include "DirectX.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/DXILResource.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ReplaceConstant.h" +#include "llvm/IR/Type.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" + +#define DEBUG_TYPE "dxil-data-scalarization" +static const int MaxVecSize = 4; + +using namespace llvm; + +class DXILDataScalarizationLegacy : public ModulePass { + +public: + bool runOnModule(Module &M) override; + DXILDataScalarizationLegacy() : ModulePass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override; + static char ID; // Pass identification. +}; + +static bool findAndReplaceVectors(Module &M); + +class DataScalarizerVisitor : public InstVisitor { +public: + DataScalarizerVisitor() : GlobalMap() {} + bool visit(Function &F); + // InstVisitor methods. They return true if the instruction was scalarized, + // false if nothing changed. + bool visitInstruction(Instruction &I) { return false; } + bool visitSelectInst(SelectInst &SI) { return false; } + bool visitICmpInst(ICmpInst &ICI) { return false; } + bool visitFCmpInst(FCmpInst &FCI) { return false; } + bool visitUnaryOperator(UnaryOperator &UO) { return false; } + bool visitBinaryOperator(BinaryOperator &BO) { return false; } + bool visitGetElementPtrInst(GetElementPtrInst &GEPI); + bool visitCastInst(CastInst &CI) { return false; } + bool visitBitCastInst(BitCastInst &BCI) { return false; } + bool visitInsertElementInst(InsertElementInst &IEI) { return false; } + bool visitExtractElementInst(ExtractElementInst &EEI) { return false; } + bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; } + bool visitPHINode(PHINode &PHI) { return false; } + bool visitLoadInst(LoadInst &LI); + bool visitStoreInst(StoreInst &SI); + bool visitCallInst(CallInst &ICI) { return false; } + bool visitFreezeInst(FreezeInst &FI) { return false; } + friend bool findAndReplaceVectors(llvm::Module &M); + +private: + GlobalVariable *lookupReplacementGlobal(Value *CurrOperand); + DenseMap GlobalMap; + SmallVector PotentiallyDeadInstrs; + bool finish(); +}; + +bool DataScalarizerVisitor::visit(Function &F) { + assert(!GlobalMap.empty()); + ReversePostOrderTraversal RPOT(&F.getEntryBlock()); + for (BasicBlock *BB : RPOT) { + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { + Instruction *I = &*II; + bool Done = InstVisitor::visit(I); + ++II; + if (Done && I->getType()->isVoidTy()) + I->eraseFromParent(); + } + } + return finish(); +} + +bool DataScalarizerVisitor::finish() { + RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); + return true; +} + +GlobalVariable * +DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) { + if (GlobalVariable *OldGlobal = dyn_cast(CurrOperand)) { + auto It = GlobalMap.find(OldGlobal); + if (It != GlobalMap.end()) { + return It->second; // Found, return the new global + } + } + return nullptr; // Not found +} + +bool DataScalarizerVisitor::visitLoadInst(LoadInst &LI) { + unsigned NumOperands = LI.getNumOperands(); + for (unsigned I = 0; I < NumOperands; ++I) { + Value *CurrOpperand = LI.getOperand(I); + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) + LI.setOperand(I, NewGlobal); + } + return false; +} + +bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) { + unsigned NumOperands = SI.getNumOperands(); + for (unsigned I = 0; I < NumOperands; ++I) { + Value *CurrOpperand = SI.getOperand(I); + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) { + SI.setOperand(I, NewGlobal); + } + } + return false; +} + +bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { + unsigned NumOperands = GEPI.getNumOperands(); + for (unsigned I = 0; I < NumOperands; ++I) { + Value *CurrOpperand = GEPI.getOperand(I); + GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand); + if (!NewGlobal) + continue; + IRBuilder<> Builder(&GEPI); + + SmallVector Indices; + for (auto &Index : GEPI.indices()) + Indices.push_back(Index); + + Value *NewGEP = + Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices); + + GEPI.replaceAllUsesWith(NewGEP); + PotentiallyDeadInstrs.emplace_back(&GEPI); + } + return true; +} + +// Recursively Creates and Array like version of the given vector like type. +static Type *replaceVectorWithArray(Type *T, LLVMContext &Ctx) { + if (auto *VecTy = dyn_cast(T)) + return ArrayType::get(VecTy->getElementType(), + dyn_cast(VecTy)->getNumElements()); + if (auto *ArrayTy = dyn_cast(T)) { + Type *NewElementType = + replaceVectorWithArray(ArrayTy->getElementType(), Ctx); + return ArrayType::get(NewElementType, ArrayTy->getNumElements()); + } + // If it's not a vector or array, return the original type. + return T; +} + +Constant *transformInitializer(Constant *Init, Type *OrigType, Type *NewType, + LLVMContext &Ctx) { + // Handle ConstantAggregateZero (zero-initialized constants) + if (isa(Init)) { + return ConstantAggregateZero::get(NewType); + } + + // Handle UndefValue (undefined constants) + if (isa(Init)) { + return UndefValue::get(NewType); + } + + // Handle vector to array transformation + if (isa(OrigType) && isa(NewType)) { + // Convert vector initializer to array initializer + SmallVector ArrayElements; + if (ConstantVector *ConstVecInit = dyn_cast(Init)) { + for (unsigned I = 0; I < ConstVecInit->getNumOperands(); ++I) + ArrayElements.push_back(ConstVecInit->getOperand(I)); + } else if (ConstantDataVector *ConstDataVecInit = + llvm::dyn_cast(Init)) { + for (unsigned I = 0; I < ConstDataVecInit->getNumElements(); ++I) + ArrayElements.push_back(ConstDataVecInit->getElementAsConstant(I)); + } else { + assert(false && "Expected a ConstantVector or ConstantDataVector for " + "vector initializer!"); + } + + return ConstantArray::get(cast(NewType), ArrayElements); + } + + // Handle array of vectors transformation + if (auto *ArrayTy = dyn_cast(OrigType)) { + auto *ArrayInit = dyn_cast(Init); + assert(ArrayInit && "Expected a ConstantArray for array initializer!"); + + SmallVector NewArrayElements; + for (unsigned I = 0; I < ArrayTy->getNumElements(); ++I) { + // Recursively transform array elements + Constant *NewElemInit = transformInitializer( + ArrayInit->getOperand(I), ArrayTy->getElementType(), + cast(NewType)->getElementType(), Ctx); + NewArrayElements.push_back(NewElemInit); + } + + return ConstantArray::get(cast(NewType), NewArrayElements); + } + + // If not a vector or array, return the original initializer + return Init; +} + +static bool findAndReplaceVectors(Module &M) { + bool MadeChange = false; + LLVMContext &Ctx = M.getContext(); + IRBuilder<> Builder(Ctx); + DataScalarizerVisitor Impl; + for (GlobalVariable &G : M.globals()) { + Type *OrigType = G.getValueType(); + + Type *NewType = replaceVectorWithArray(OrigType, Ctx); + if (OrigType != NewType) { + // Create a new global variable with the updated type + // Note: Initializer is set via transformInitializer + GlobalVariable *NewGlobal = new GlobalVariable( + M, NewType, G.isConstant(), G.getLinkage(), + /*Initializer=*/nullptr, G.getName() + ".scalarized", &G, + G.getThreadLocalMode(), G.getAddressSpace(), + G.isExternallyInitialized()); + + // Copy relevant attributes + NewGlobal->setUnnamedAddr(G.getUnnamedAddr()); + if (G.getAlignment() > 0) { + NewGlobal->setAlignment(G.getAlign()); + } + + if (G.hasInitializer()) { + Constant *Init = G.getInitializer(); + Constant *NewInit = transformInitializer(Init, OrigType, NewType, Ctx); + NewGlobal->setInitializer(NewInit); + } + + // Note: we want to do G.replaceAllUsesWith(NewGlobal);, but it assumes + // type equality. Instead we will use the visitor pattern. + Impl.GlobalMap[&G] = NewGlobal; + for (User *U : make_early_inc_range(G.users())) { + if (isa(U) && isa(U)) { + ConstantExpr *CE = cast(U); + convertUsersOfConstantsToInstructions(CE, + /*RestrictToFunc=*/nullptr, + /*RemoveDeadConstants=*/false, + /*IncludeSelf=*/true); + } + if (isa(U)) { + Instruction *Inst = cast(U); + Function *F = Inst->getFunction(); + if (F) + Impl.visit(*F); + } + } + } + } + + // Remove the old globals after the iteration + for (auto &[Old, New] : Impl.GlobalMap) { + Old->eraseFromParent(); + MadeChange = true; + } + return MadeChange; +} + +PreservedAnalyses DXILDataScalarization::run(Module &M, + ModuleAnalysisManager &) { + bool MadeChanges = findAndReplaceVectors(M); + if (!MadeChanges) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +bool DXILDataScalarizationLegacy::runOnModule(Module &M) { + return findAndReplaceVectors(M); +} + +void DXILDataScalarizationLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); +} + +char DXILDataScalarizationLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(DXILDataScalarizationLegacy, DEBUG_TYPE, + "DXIL Data Scalarization", false, false) +INITIALIZE_PASS_END(DXILDataScalarizationLegacy, DEBUG_TYPE, + "DXIL Data Scalarization", false, false) + +ModulePass *llvm::createDXILDataScalarizationLegacyPass() { + return new DXILDataScalarizationLegacy(); +} diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.h b/llvm/lib/Target/DirectX/DXILDataScalarization.h new file mode 100644 index 0000000000000..560e061db96d0 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.h @@ -0,0 +1,25 @@ +//===- DXILDataScalarization.h - Perform DXIL Data Legalization -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H +#define LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H + +#include "DXILResource.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// A pass that transforms Vectors to Arrays +class DXILDataScalarization : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); +}; +} // namespace llvm + +#endif // LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h index 60fc5094542b3..3221779be2f31 100644 --- a/llvm/lib/Target/DirectX/DirectX.h +++ b/llvm/lib/Target/DirectX/DirectX.h @@ -34,6 +34,12 @@ void initializeDXILIntrinsicExpansionLegacyPass(PassRegistry &); /// Pass to expand intrinsic operations that lack DXIL opCodes ModulePass *createDXILIntrinsicExpansionLegacyPass(); +/// Initializer for DXIL Data Scalarization Pass +void initializeDXILDataScalarizationLegacyPass(PassRegistry &); + +/// Pass to scalarize llvm global data into a DXIL legal form +ModulePass *createDXILDataScalarizationLegacyPass(); + /// Initializer for DXILOpLowering void initializeDXILOpLoweringLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index 606022a9835f0..f358215ecf373 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -46,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { RegisterTargetMachine X(getTheDirectXTarget()); auto *PR = PassRegistry::getPassRegistry(); initializeDXILIntrinsicExpansionLegacyPass(*PR); + initializeDXILDataScalarizationLegacyPass(*PR); initializeScalarizerLegacyPassPass(*PR); initializeDXILPrepareModulePass(*PR); initializeEmbedDXILPassPass(*PR); @@ -86,6 +87,7 @@ class DirectXPassConfig : public TargetPassConfig { FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } void addCodeGenPrepare() override { addPass(createDXILIntrinsicExpansionLegacyPass()); + addPass(createDXILDataScalarizationLegacyPass()); ScalarizerPassOptions DxilScalarOptions; DxilScalarOptions.ScalarizeLoadStore = true; addPass(createScalarizerPass(DxilScalarOptions)); diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 46326d6917587..102748508b4ad 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -8,6 +8,7 @@ ; CHECK-NEXT: Target Transform Information ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: DXIL Data Scalarization ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Scalarize vector operations diff --git a/llvm/test/CodeGen/DirectX/scalar-data.ll b/llvm/test/CodeGen/DirectX/scalar-data.ll new file mode 100644 index 0000000000000..4438604a3a879 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalar-data.ll @@ -0,0 +1,12 @@ +; RUN: opt -S -dxil-data-scalarization -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +; Make sure we don't touch arrays without vectors and that can recurse multiple-dimension arrays of vectors + +@staticArray = internal global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 +@"groushared3dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x <4 x i32>]]] zeroinitializer, align 16 + +; CHECK @staticArray +; CHECK-NOT: @staticArray.scalarized +; CHECK: @groushared3dArrayofVectors.scalarized = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x [4 x i32]]]] zeroinitializer, align 16 +; CHECK-NOT: @groushared3dArrayofVectors diff --git a/llvm/test/CodeGen/DirectX/scalar-load.ll b/llvm/test/CodeGen/DirectX/scalar-load.ll new file mode 100644 index 0000000000000..11678f48a5e01 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalar-load.ll @@ -0,0 +1,58 @@ +; RUN: opt -S -dxil-data-scalarization -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +; Make sure we can load groupshared, static vectors and arrays of vectors + +@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +@"vecData" = external addrspace(3) global <4 x i32>, align 4 +@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> , <4 x i32> , <4 x i32> ], align 4 +@"groushared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16 + +; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16 +; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 +; CHECK: @staticArrayOfVecData.scalarized = internal global [3 x [4 x i32]] {{\[}}[4 x i32] [i32 1, i32 2, i32 3, i32 4], [4 x i32] [i32 5, i32 6, i32 7, i32 8], [4 x i32] [i32 9, i32 10, i32 11, i32 12]], align 4 +; CHECK: @groushared2dArrayofVectors.scalarized = local_unnamed_addr addrspace(3) global [3 x [3 x [4 x i32]]] zeroinitializer, align 16 + +; CHECK-NOT: @arrayofVecData +; CHECK-NOT: @vecData +; CHECK-NOT: @staticArrayOfVecData +; CHECK-NOT: @groushared2dArrayofVectors + + +; CHECK-LABEL: load_array_vec_test +define <4 x i32> @load_array_vec_test() { + ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4 + ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 + %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 + %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4 + %3 = add <4 x i32> %1, %2 + ret <4 x i32> %3 +} + +; CHECK-LABEL: load_vec_test +define <4 x i32> @load_vec_test() { + ; CHECK-COUNT-4: load i32, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align {{.*}} + ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 + %1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4 + ret <4 x i32> %1 +} + +; CHECK-LABEL: load_static_array_of_vec_test +define <4 x i32> @load_static_array_of_vec_test(i32 %index) { + ; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index + ; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4 + ; CHECK-NOT: load i32, ptr {{.*}}, align 4 + %3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index + %4 = load <4 x i32>, <4 x i32>* %3, align 4 + ret <4 x i32> %4 +} + +; CHECK-LABEL: multid_load_test +define <4 x i32> @multid_load_test() { + ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@groushared2dArrayofVectors.scalarized.*|%.*)}}, align 4 + ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 + %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4 + %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4 + %3 = add <4 x i32> %1, %2 + ret <4 x i32> %3 +} diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll index b970a2842e5a8..08d8a2c57c6c3 100644 --- a/llvm/test/CodeGen/DirectX/scalar-store.ll +++ b/llvm/test/CodeGen/DirectX/scalar-store.ll @@ -1,17 +1,29 @@ -; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-data-scalarization -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s -@"sharedData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 -; CHECK-LABEL: store_test -define void @store_test () local_unnamed_addr { - ; CHECK: store float 1.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 3.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 4.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 6.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} +; Make sure we can store groupshared, static vectors and arrays of vectors - store <3 x float> , ptr addrspace(3) @"sharedData", align 16 - store <3 x float> , ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"sharedData", i32 16), align 16 +@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +@"vecData" = external addrspace(3) global <4 x i32>, align 4 + +; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16 +; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 +; CHECK-NOT: @arrayofVecData +; CHECK-NOT: @vecData + +; CHECK-LABEL: store_array_vec_test +define void @store_array_vec_test () local_unnamed_addr { + ; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} + ; CHECK-NOT: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} + store <3 x float> , ptr addrspace(3) @"arrayofVecData", align 16 + store <3 x float> , ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16 ret void } + +; CHECK-LABEL: store_vec_test +define void @store_vec_test(<4 x i32> %inputVec) { + ; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4 + ; CHECK-NOT: store i32 %inputVec.{{.*}}, ptr addrspace(3) + store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4 + ret void +}