17
17
#include " mlir/Dialect/LLVMIR/ROCDLDialect.h"
18
18
#include " mlir/Support/FileUtilities.h"
19
19
#include " mlir/Target/LLVM/ROCDL/Utils.h"
20
- #include " mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
21
- #include " mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
22
- #include " mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
23
20
#include " mlir/Target/LLVMIR/Export.h"
24
21
25
22
#include " llvm/IR/Constants.h"
@@ -112,8 +109,9 @@ SerializeGPUModuleBase::SerializeGPUModuleBase(
112
109
if (auto file = dyn_cast<StringAttr>(attr))
113
110
fileList.push_back (file.str ());
114
111
115
- // Append standard ROCm device bitcode libraries to the files to be loaded.
116
- (void )appendStandardLibs ();
112
+ // By default add all libraries if the toolkit path is not empty.
113
+ if (!getToolkitPath ().empty ())
114
+ deviceLibs = AMDGCNLibraryList (AMDGCNLibraryList::All);
117
115
}
118
116
119
117
void SerializeGPUModuleBase::init () {
@@ -138,29 +136,70 @@ ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
138
136
return fileList;
139
137
}
140
138
141
- LogicalResult SerializeGPUModuleBase::appendStandardLibs () {
139
+ LogicalResult
140
+ SerializeGPUModuleBase::appendStandardLibs (AMDGCNLibraryList libs) {
141
+ if (libs.isEmpty ())
142
+ return success ();
142
143
StringRef pathRef = getToolkitPath ();
143
- if (!pathRef.empty ()) {
144
- SmallVector<char , 256 > path;
145
- path.insert (path.begin (), pathRef.begin (), pathRef.end ());
146
- llvm::sys::path::append (path, " amdgcn" , " bitcode" );
147
- pathRef = StringRef (path.data (), path.size ());
148
- if (!llvm::sys::fs::is_directory (pathRef)) {
149
- getOperation ().emitRemark () << " ROCm amdgcn bitcode path: " << pathRef
150
- << " does not exist or is not a directory." ;
151
- return failure ();
152
- }
153
- StringRef isaVersion =
154
- llvm::AMDGPU::getArchNameAMDGCN (llvm::AMDGPU::parseArchAMDGCN (chip));
155
- isaVersion.consume_front (" gfx" );
156
- return getCommonBitcodeLibs (fileList, path, isaVersion);
144
+ // Fail if the toolkit is empty.
145
+ if (pathRef.empty ())
146
+ return failure ();
147
+
148
+ // Get the path for the device libraries
149
+ SmallString<256 > path;
150
+ path.insert (path.begin (), pathRef.begin (), pathRef.end ());
151
+ llvm::sys::path::append (path, " amdgcn" , " bitcode" );
152
+ pathRef = StringRef (path.data (), path.size ());
153
+
154
+ // Fail if the path is invalid.
155
+ if (!llvm::sys::fs::is_directory (pathRef)) {
156
+ getOperation ().emitRemark () << " ROCm amdgcn bitcode path: " << pathRef
157
+ << " does not exist or is not a directory." ;
158
+ return failure ();
157
159
}
160
+
161
+ // Get the ISA version.
162
+ StringRef isaVersion =
163
+ llvm::AMDGPU::getArchNameAMDGCN (llvm::AMDGPU::parseArchAMDGCN (chip));
164
+ isaVersion.consume_front (" gfx" );
165
+
166
+ // Helper function for adding a library.
167
+ auto addLib = [&](const Twine &lib) -> bool {
168
+ auto baseSize = path.size ();
169
+ llvm::sys::path::append (path, lib);
170
+ StringRef pathRef (path.data (), path.size ());
171
+ if (!llvm::sys::fs::is_regular_file (pathRef)) {
172
+ getOperation ().emitRemark () << " Bitcode library path: " << pathRef
173
+ << " does not exist or is not a file.\n " ;
174
+ return true ;
175
+ }
176
+ fileList.push_back (pathRef.str ());
177
+ path.truncate (baseSize);
178
+ return false ;
179
+ };
180
+
181
+ // Add ROCm device libraries. Fail if any of the libraries is not found, ie.
182
+ // if any of the `addLib` failed.
183
+ if ((libs.requiresLibrary (AMDGCNLibraryList::Ocml) && addLib (" ocml.bc" )) ||
184
+ (libs.requiresLibrary (AMDGCNLibraryList::Ockl) && addLib (" ockl.bc" )) ||
185
+ (libs.requiresLibrary (AMDGCNLibraryList::Hip) && addLib (" hip.bc" )) ||
186
+ (libs.requiresLibrary (AMDGCNLibraryList::OpenCL) &&
187
+ addLib (" opencl.bc" )) ||
188
+ (libs.containLibraries (AMDGCNLibraryList::Ocml |
189
+ AMDGCNLibraryList::Ockl) &&
190
+ addLib (" oclc_isa_version_" + isaVersion + " .bc" )))
191
+ return failure ();
158
192
return success ();
159
193
}
160
194
161
195
std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
162
196
SerializeGPUModuleBase::loadBitcodeFiles (llvm::Module &module ) {
163
197
SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
198
+ // Return if there are no libs to load.
199
+ if (deviceLibs.isEmpty () && fileList.empty ())
200
+ return bcFiles;
201
+ if (failed (appendStandardLibs (deviceLibs)))
202
+ return std::nullopt;
164
203
if (failed (loadBitcodeFilesFromList (module .getContext (), fileList, bcFiles,
165
204
true )))
166
205
return std::nullopt;
@@ -174,80 +213,79 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
174
213
// Stop spamming us with clang version numbers
175
214
if (auto *ident = module .getNamedMetadata (" llvm.ident" ))
176
215
module .eraseNamedMetadata (ident);
216
+ // Override the libModules datalayout and target triple with the compiler's
217
+ // data layout should there be a discrepency.
218
+ setDataLayoutAndTriple (module );
177
219
return success ();
178
220
}
179
221
180
222
void SerializeGPUModuleBase::handleModulePreLink (llvm::Module &module ) {
181
- [[maybe_unused]] std::optional<llvm::TargetMachine *> targetMachine =
223
+ std::optional<llvm::TargetMachine *> targetMachine =
182
224
getOrCreateTargetMachine ();
183
225
assert (targetMachine && " expect a TargetMachine" );
184
- addControlVariables (module , target.hasWave64 (), target.hasDaz (),
226
+ // If all libraries are not set, traverse the module to determine which
227
+ // libraries are required.
228
+ if (!deviceLibs.requiresLibrary (AMDGCNLibraryList::All)) {
229
+ for (llvm::Function &f : module .functions ()) {
230
+ if (f.hasExternalLinkage () && f.hasName () && !f.hasExactDefinition ()) {
231
+ StringRef funcName = f.getName ();
232
+ if (" printf" == funcName)
233
+ deviceLibs.addList (AMDGCNLibraryList::getOpenCL ());
234
+ if (funcName.starts_with (" __ockl_" ))
235
+ deviceLibs.addLibrary (AMDGCNLibraryList::Ockl);
236
+ if (funcName.starts_with (" __ocml_" ))
237
+ deviceLibs.addLibrary (AMDGCNLibraryList::Ocml);
238
+ }
239
+ }
240
+ }
241
+ addControlVariables (module , deviceLibs, target.hasWave64 (), target.hasDaz (),
185
242
target.hasFiniteOnly (), target.hasUnsafeMath (),
186
243
target.hasFastMath (), target.hasCorrectSqrt (),
187
244
target.getAbi ());
188
245
}
189
246
190
- // Get the paths of ROCm device libraries.
191
- LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs (
192
- llvm::SmallVector<std::string> &libs, SmallVector<char , 256 > &libPath,
193
- StringRef isaVersion) {
194
- auto addLib = [&](StringRef path) -> bool {
195
- if (!llvm::sys::fs::is_regular_file (path)) {
196
- getOperation ().emitRemark () << " Bitcode library path: " << path
197
- << " does not exist or is not a file.\n " ;
198
- return true ;
199
- }
200
- libs.push_back (path.str ());
201
- return false ;
202
- };
203
- auto getLibPath = [&libPath](Twine lib) {
204
- auto baseSize = libPath.size ();
205
- llvm::sys::path::append (libPath, lib + " .bc" );
206
- std::string path (StringRef (libPath.data (), libPath.size ()).str ());
207
- libPath.truncate (baseSize);
208
- return path;
209
- };
210
-
211
- // Add ROCm device libraries. Fail if any of the libraries is not found.
212
- if (addLib (getLibPath (" ocml" )) || addLib (getLibPath (" ockl" )) ||
213
- addLib (getLibPath (" hip" )) || addLib (getLibPath (" opencl" )) ||
214
- addLib (getLibPath (" oclc_isa_version_" + isaVersion)))
215
- return failure ();
216
- return success ();
217
- }
218
-
219
247
void SerializeGPUModuleBase::addControlVariables (
220
- llvm::Module &module , bool wave64, bool daz, bool finiteOnly,
221
- bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) {
222
- llvm::Type *i8Ty = llvm::Type::getInt8Ty (module .getContext ());
223
- auto addControlVariable = [i8Ty, &module ](StringRef name, bool enable) {
248
+ llvm::Module &module , AMDGCNLibraryList libs, bool wave64, bool daz,
249
+ bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt,
250
+ StringRef abiVer) {
251
+ // Return if no device libraries are required.
252
+ if (libs.isEmpty ())
253
+ return ;
254
+ // Helper function for adding control variables.
255
+ auto addControlVariable = [&module ](StringRef name, uint32_t value,
256
+ uint32_t bitwidth) {
257
+ if (module .getNamedGlobal (name)) {
258
+ return ;
259
+ }
260
+ llvm::IntegerType *type =
261
+ llvm::IntegerType::getIntNTy (module .getContext (), bitwidth);
224
262
llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable (
225
- module , i8Ty, true , llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
226
- llvm::ConstantInt::get (i8Ty, enable), name, nullptr ,
227
- llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4 );
263
+ module , /* isConstant=*/ type, true ,
264
+ llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
265
+ llvm::ConstantInt::get (type, value), name, /* before=*/ nullptr ,
266
+ /* threadLocalMode=*/ llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
267
+ /* addressSpace=*/ 4 );
228
268
controlVariable->setVisibility (
229
269
llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
230
- controlVariable->setAlignment (llvm::MaybeAlign (1 ));
270
+ controlVariable->setAlignment (llvm::MaybeAlign (bitwidth / 8 ));
231
271
controlVariable->setUnnamedAddr (llvm::GlobalValue::UnnamedAddr::Local);
232
272
};
233
- addControlVariable (" __oclc_finite_only_opt" , finiteOnly || fastMath);
234
- addControlVariable (" __oclc_unsafe_math_opt" , unsafeMath || fastMath);
235
- addControlVariable (" __oclc_daz_opt" , daz || fastMath);
236
- addControlVariable (" __oclc_correctly_rounded_sqrt32" ,
237
- correctSqrt && !fastMath);
238
- addControlVariable (" __oclc_wavefrontsize64" , wave64);
239
-
240
- llvm::Type *i32Ty = llvm::Type::getInt32Ty (module .getContext ());
241
- int abi = 500 ;
242
- abiVer.getAsInteger (0 , abi);
243
- llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable (
244
- module , i32Ty, true , llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
245
- llvm::ConstantInt::get (i32Ty, abi), " __oclc_ABI_version" , nullptr ,
246
- llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4 );
247
- abiVersion->setVisibility (
248
- llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
249
- abiVersion->setAlignment (llvm::MaybeAlign (4 ));
250
- abiVersion->setUnnamedAddr (llvm::GlobalValue::UnnamedAddr::Local);
273
+ // Add ocml related control variables.
274
+ if (libs.requiresLibrary (AMDGCNLibraryList::Ocml)) {
275
+ addControlVariable (" __oclc_finite_only_opt" , finiteOnly || fastMath, 8 );
276
+ addControlVariable (" __oclc_daz_opt" , daz || fastMath, 8 );
277
+ addControlVariable (" __oclc_correctly_rounded_sqrt32" ,
278
+ correctSqrt && !fastMath, 8 );
279
+ addControlVariable (" __oclc_unsafe_math_opt" , unsafeMath || fastMath, 8 );
280
+ }
281
+ // Add ocml or ockl related control variables.
282
+ if (libs.containLibraries (AMDGCNLibraryList::Ocml |
283
+ AMDGCNLibraryList::Ockl)) {
284
+ addControlVariable (" __oclc_wavefrontsize64" , wave64, 8 );
285
+ int abi = 500 ;
286
+ abiVer.getAsInteger (0 , abi);
287
+ addControlVariable (" __oclc_ABI_version" , abi, 32 );
288
+ }
251
289
}
252
290
253
291
std::optional<SmallVector<char , 0 >>
@@ -312,43 +350,11 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) {
312
350
313
351
parser->setTargetParser (*tap);
314
352
parser->Run (false );
315
-
316
353
return result;
317
354
}
318
355
319
- #if MLIR_ENABLE_ROCM_CONVERSIONS
320
- namespace {
321
- class AMDGPUSerializer : public SerializeGPUModuleBase {
322
- public:
323
- AMDGPUSerializer (Operation &module , ROCDLTargetAttr target,
324
- const gpu::TargetOptions &targetOptions);
325
-
326
- gpu::GPUModuleOp getOperation ();
327
-
328
- // Compile to HSA.
329
- std::optional<SmallVector<char , 0 >>
330
- compileToBinary (const std::string &serializedISA);
331
-
332
- std::optional<SmallVector<char , 0 >>
333
- moduleToObject (llvm::Module &llvmModule) override ;
334
-
335
- private:
336
- // Target options.
337
- gpu::TargetOptions targetOptions;
338
- };
339
- } // namespace
340
-
341
- AMDGPUSerializer::AMDGPUSerializer (Operation &module , ROCDLTargetAttr target,
342
- const gpu::TargetOptions &targetOptions)
343
- : SerializeGPUModuleBase(module , target, targetOptions),
344
- targetOptions(targetOptions) {}
345
-
346
- gpu::GPUModuleOp AMDGPUSerializer::getOperation () {
347
- return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation ());
348
- }
349
-
350
356
std::optional<SmallVector<char , 0 >>
351
- AMDGPUSerializer ::compileToBinary (const std::string &serializedISA) {
357
+ SerializeGPUModuleBase ::compileToBinary (const std::string &serializedISA) {
352
358
// Assemble the ISA.
353
359
std::optional<SmallVector<char , 0 >> isaBinary = assembleIsa (serializedISA);
354
360
@@ -407,13 +413,13 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
407
413
return SmallVector<char , 0 >(buffer.begin (), buffer.end ());
408
414
}
409
415
410
- std::optional<SmallVector<char , 0 >>
411
- AMDGPUSerializer::moduleToObject ( llvm::Module &llvmModule) {
416
+ std::optional<SmallVector<char , 0 >> SerializeGPUModuleBase::moduleToObjectImpl (
417
+ const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) {
412
418
// Return LLVM IR if the compilation target is offload.
413
419
#define DEBUG_TYPE " serialize-to-llvm"
414
420
LLVM_DEBUG ({
415
- llvm::dbgs () << " LLVM IR for module: " << getOperation (). getNameAttr ()
416
- << " \n "
421
+ llvm::dbgs () << " LLVM IR for module: "
422
+ << cast<gpu::GPUModuleOp>( getOperation ()). getNameAttr () << " \n "
417
423
<< llvmModule << " \n " ;
418
424
});
419
425
#undef DEBUG_TYPE
@@ -437,7 +443,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
437
443
}
438
444
#define DEBUG_TYPE " serialize-to-isa"
439
445
LLVM_DEBUG ({
440
- llvm::dbgs () << " ISA for module: " << getOperation ().getNameAttr () << " \n "
446
+ llvm::dbgs () << " ISA for module: "
447
+ << cast<gpu::GPUModuleOp>(getOperation ()).getNameAttr () << " \n "
441
448
<< *serializedISA << " \n " ;
442
449
});
443
450
#undef DEBUG_TYPE
@@ -448,6 +455,38 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
448
455
// Compile to binary.
449
456
return compileToBinary (*serializedISA);
450
457
}
458
+
459
+ #if MLIR_ENABLE_ROCM_CONVERSIONS
460
+ namespace {
461
+ class AMDGPUSerializer : public SerializeGPUModuleBase {
462
+ public:
463
+ AMDGPUSerializer (Operation &module , ROCDLTargetAttr target,
464
+ const gpu::TargetOptions &targetOptions);
465
+
466
+ gpu::GPUModuleOp getOperation ();
467
+
468
+ std::optional<SmallVector<char , 0 >>
469
+ moduleToObject (llvm::Module &llvmModule) override ;
470
+
471
+ private:
472
+ // Target options.
473
+ gpu::TargetOptions targetOptions;
474
+ };
475
+ } // namespace
476
+
477
+ AMDGPUSerializer::AMDGPUSerializer (Operation &module , ROCDLTargetAttr target,
478
+ const gpu::TargetOptions &targetOptions)
479
+ : SerializeGPUModuleBase(module , target, targetOptions),
480
+ targetOptions(targetOptions) {}
481
+
482
+ gpu::GPUModuleOp AMDGPUSerializer::getOperation () {
483
+ return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation ());
484
+ }
485
+
486
+ std::optional<SmallVector<char , 0 >>
487
+ AMDGPUSerializer::moduleToObject (llvm::Module &llvmModule) {
488
+ return moduleToObjectImpl (targetOptions, llvmModule);
489
+ }
451
490
#endif // MLIR_ENABLE_ROCM_CONVERSIONS
452
491
453
492
std::optional<SmallVector<char , 0 >> ROCDLTargetAttrImpl::serializeToObject (
@@ -477,10 +516,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute,
477
516
const SmallVector<char , 0 > &object,
478
517
const gpu::TargetOptions &options) const {
479
518
gpu::CompilationTarget format = options.getCompilationTarget ();
519
+ // If format is `fatbin` transform it to binary as `fatbin` is not yet
520
+ // supported.
521
+ if (format > gpu::CompilationTarget::Binary)
522
+ format = gpu::CompilationTarget::Binary;
523
+
524
+ DictionaryAttr properties{};
480
525
Builder builder (attribute.getContext ());
481
526
return builder.getAttr <gpu::ObjectAttr>(
482
- attribute,
483
- format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary
484
- : format,
485
- builder.getStringAttr (StringRef (object.data (), object.size ())), nullptr );
527
+ attribute, format,
528
+ builder.getStringAttr (StringRef (object.data (), object.size ())),
529
+ properties);
486
530
}
0 commit comments