Skip to content

Commit 861bd85

Browse files
committed
[LLVM] Update CUDA ABI for all SM values
Summary: Turns out the new CUDA ABI now applies retroactively to all the other SMs if you upgrade to CUDA 13.0. This patch changes the scheme, keeping all the SM flags consistent but using an offset. Fixes: #159088
1 parent e7101da commit 861bd85

File tree

4 files changed

+65
-26
lines changed

4 files changed

+65
-26
lines changed

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,12 @@ enum : unsigned {
931931
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
932932
EF_CUDA_SM = 0xff,
933933

934+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
935+
EF_CUDA_SM_MASK = 0xff00,
936+
937+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
938+
EF_CUDA_SM_OFFSET = 8,
939+
934940
// SM based processor values.
935941
EF_CUDA_SM20 = 0x14,
936942
EF_CUDA_SM21 = 0x15,
@@ -951,8 +957,12 @@ enum : unsigned {
951957
EF_CUDA_SM86 = 0x56,
952958
EF_CUDA_SM87 = 0x57,
953959
EF_CUDA_SM89 = 0x59,
954-
// The sm_90a variant uses the same machine flag.
955960
EF_CUDA_SM90 = 0x5a,
961+
EF_CUDA_SM100 = 0x64,
962+
EF_CUDA_SM101 = 0x65,
963+
EF_CUDA_SM103 = 0x67,
964+
EF_CUDA_SM120 = 0x78,
965+
EF_CUDA_SM121 = 0x79,
956966

957967
// Unified texture binding is enabled.
958968
EF_CUDA_TEXMODE_UNIFIED = 0x100,
@@ -968,17 +978,7 @@ enum : unsigned {
968978
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
969979
EF_CUDA_VIRTUAL_SM = 0xff0000,
970980

971-
// Processor selection mask for EF_CUDA_SM* values following blackwell.
972-
EF_CUDA_SM_MASK = 0xff00,
973-
974-
// SM based processor values.
975-
EF_CUDA_SM100 = 0x6400,
976-
EF_CUDA_SM101 = 0x6500,
977-
EF_CUDA_SM103 = 0x6700,
978-
EF_CUDA_SM120 = 0x7800,
979-
EF_CUDA_SM121 = 0x7900,
980-
981-
// Set when using an accelerator variant like sm_100a.
981+
// Set when using an accelerator variant like sm_100a in the new ABI.
982982
EF_CUDA_ACCELERATORS = 0x8,
983983
};
984984

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
622622
assert(getEMachine() == ELF::EM_CUDA);
623623
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
624624
? getPlatformFlags() & ELF::EF_CUDA_SM
625-
: getPlatformFlags() & ELF::EF_CUDA_SM_MASK;
625+
: (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >>
626+
ELF::EF_CUDA_SM_OFFSET;
626627

627628
switch (SM) {
628629
// Fermi architecture.

llvm/tools/llvm-readobj/ELFDumper.cpp

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,7 @@ const EnumEntry<unsigned> ElfOSABI[] = {
11141114
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
11151115
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
11161116
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1117+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
11171118
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
11181119

11191120
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
@@ -1679,19 +1680,56 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
16791680
};
16801681

16811682
const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
1682-
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1683-
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1684-
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1685-
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1686-
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1687-
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1688-
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1689-
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1690-
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1691-
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1692-
ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM101, "sm_101"),
1693-
ENUM_ENT(EF_CUDA_SM103, "sm_103"), ENUM_ENT(EF_CUDA_SM120, "sm_120"),
1683+
ENUM_ENT(EF_CUDA_SM20, "sm_20"),
1684+
ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1685+
ENUM_ENT(EF_CUDA_SM30, "sm_30"),
1686+
ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1687+
ENUM_ENT(EF_CUDA_SM35, "sm_35"),
1688+
ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1689+
ENUM_ENT(EF_CUDA_SM50, "sm_50"),
1690+
ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1691+
ENUM_ENT(EF_CUDA_SM53, "sm_53"),
1692+
ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1693+
ENUM_ENT(EF_CUDA_SM61, "sm_61"),
1694+
ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1695+
ENUM_ENT(EF_CUDA_SM70, "sm_70"),
1696+
ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1697+
ENUM_ENT(EF_CUDA_SM75, "sm_75"),
1698+
ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1699+
ENUM_ENT(EF_CUDA_SM86, "sm_86"),
1700+
ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1701+
ENUM_ENT(EF_CUDA_SM89, "sm_89"),
1702+
ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1703+
ENUM_ENT(EF_CUDA_SM100, "sm_100"),
1704+
ENUM_ENT(EF_CUDA_SM101, "sm_101"),
1705+
ENUM_ENT(EF_CUDA_SM103, "sm_103"),
1706+
ENUM_ENT(EF_CUDA_SM120, "sm_120"),
16941707
ENUM_ENT(EF_CUDA_SM121, "sm_121"),
1708+
ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"),
1709+
ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"),
1710+
ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"),
1711+
ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"),
1712+
ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"),
1713+
ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"),
1714+
ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"),
1715+
ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"),
1716+
ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"),
1717+
ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"),
1718+
ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"),
1719+
ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"),
1720+
ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"),
1721+
ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"),
1722+
ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"),
1723+
ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"),
1724+
ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"),
1725+
ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"),
1726+
ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"),
1727+
ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"),
1728+
ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"),
1729+
ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"),
1730+
ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"),
1731+
ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"),
1732+
ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"),
16951733
};
16961734

16971735
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1581,7 +1581,7 @@ struct CUDAPluginTy final : public GenericPluginTy {
15811581
unsigned SM =
15821582
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
15831583
? Header.e_flags & ELF::EF_CUDA_SM
1584-
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 8;
1584+
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET;
15851585

15861586
CUdevice Device;
15871587
CUresult Res = cuDeviceGet(&Device, DeviceId);

0 commit comments

Comments
 (0)