Skip to content

Commit a6c656e

Browse files
authored
Make have_fma consistent between interpreter and compiled (#52206)
Currently the interpreter always returns false. Which isn't very good. Make it follow whatever the JIT will do.
1 parent 0402c78 commit a6c656e

File tree

9 files changed

+49
-5
lines changed

9 files changed

+49
-5
lines changed

src/jl_exported_funcs.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@
200200
XX(jl_get_binding_wr) \
201201
XX(jl_get_cpu_name) \
202202
XX(jl_get_cpu_features) \
203+
XX(jl_cpu_has_fma) \
203204
XX(jl_get_current_task) \
204205
XX(jl_get_default_sysimg_path) \
205206
XX(jl_get_excstack) \

src/llvm-cpufeatures.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,15 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS
5959
StringRef FS =
6060
FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
6161

62-
SmallVector<StringRef, 6> Features;
62+
SmallVector<StringRef, 128> Features;
6363
FS.split(Features, ',');
6464
for (StringRef Feature : Features)
6565
if (TT.isARM()) {
6666
if (Feature == "+vfp4")
6767
return typ == "f32" || typ == "f64";
6868
else if (Feature == "+vfp4sp")
6969
return typ == "f32";
70-
} else {
70+
} else if (TT.isX86()) {
7171
if (Feature == "+fma" || Feature == "+fma4")
7272
return typ == "f32" || typ == "f64";
7373
}

src/processor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
226226
// Return the features of the host CPU as a julia string.
227227
JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
228228
// Dump the name and feature set of the host CPU
229+
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits);
230+
// Check if the CPU has native FMA instructions;
229231
// For debugging only
230232
JL_DLLEXPORT void jl_dump_host_cpu(void);
231233
JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);

src/processor_arm.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1808,6 +1808,22 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
18081808
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
18091809
}
18101810

1811+
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
1812+
{
1813+
#ifdef _CPU_AARCH64_
1814+
return jl_true;
1815+
#else
1816+
TargetData<feature_sz> target = jit_targets.front();
1817+
FeatureList<feature_sz> features = target.en.features;
1818+
if (bits == 32 && test_nbit(features, Feature::vfp4sp))
1819+
return jl_true;
1820+
else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4))
1821+
return jl_true;
1822+
else
1823+
return jl_false;
1824+
#endif
1825+
}
1826+
18111827
jl_image_t jl_init_processor_sysimg(void *hdl)
18121828
{
18131829
if (!jit_targets.empty())

src/processor_fallback.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
172172
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
173173
}
174174

175+
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
176+
{
177+
return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false)
178+
}
179+
175180
JL_DLLEXPORT void jl_dump_host_cpu(void)
176181
{
177182
jl_safe_printf("CPU: %s\n", host_cpu_name().c_str());

src/processor_x86.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
// CPUID
66

7+
#include "julia.h"
78
extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
89
{
910
asm volatile (
@@ -1062,6 +1063,16 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
10621063
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
10631064
}
10641065

1066+
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
1067+
{
1068+
TargetData<feature_sz> target = jit_targets.front();
1069+
FeatureList<feature_sz> features = target.en.features;
1070+
if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4)))
1071+
return jl_true;
1072+
else
1073+
return jl_false;
1074+
}
1075+
10651076
jl_image_t jl_init_processor_sysimg(void *hdl)
10661077
{
10671078
if (!jit_targets.empty())

src/runtime_intrinsics.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,10 +1671,15 @@ un_fintrinsic(trunc_float,trunc_llvm)
16711671
un_fintrinsic(rint_float,rint_llvm)
16721672
un_fintrinsic(sqrt_float,sqrt_llvm)
16731673
un_fintrinsic(sqrt_float,sqrt_llvm_fast)
1674+
jl_value_t *jl_cpu_has_fma(int bits);
16741675

16751676
JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
16761677
{
1677-
JL_TYPECHK(have_fma, datatype, typ);
1678-
// TODO: run-time feature check?
1679-
return jl_false;
1678+
JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16?
1679+
if (typ == (jl_value_t*)jl_float32_type)
1680+
return jl_cpu_has_fma(32);
1681+
else if (typ == (jl_value_t*)jl_float64_type)
1682+
return jl_cpu_has_fma(64);
1683+
else
1684+
return jl_false;
16801685
}

test/llvmpasses/cpu-features.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
44

55
; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
6+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
7+
target triple = "x86_64-linux-gnu"
68

79
declare i1 @julia.cpu.have_fma.f64()
810
declare double @with_fma(double %0, double %1, double %2)

test/sysinfo.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Base.Sys.loadavg()
1212

1313
@test length(ccall(:jl_get_cpu_name, String, ())) != 0
1414
@test length(ccall(:jl_get_cpu_features, String, ())) >= 0
15+
foo_fma() = Core.Intrinsics.have_fma(Int64)
16+
@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma()
1517

1618
if Sys.isunix()
1719
mktempdir() do tempdir

0 commit comments

Comments
 (0)