Skip to content

Commit 6d262db

Browse files
committed
Auto merge of #32066 - ruud-v-a:fma, r=alexcrichton
This adds support for fused multiply-add and multiply-subtract vector intrinsics for 128 and 256-bit vectors of `f32` and `f64`. These correspond to the intrinsics [listed here](https://software.intel.com/en-us/node/523929) except for the `_ss` and `_sd` variants. The intrinsics added are: * `fmadd` * `fmaddsub` * `fmsub` * `fmsubadd` * `fnmadd` * `fnmsub` The “fma” target feature must be enabled by passing `-C target-feature=+fma` to rustc when using these, otherwise LLVM will complain. I verified locally that the `x86_mm256_fmadd_ps` and `x86_mm256_fmsub_ps` work.
2 parents 6eb81a1 + a409076 commit 6d262db

File tree

3 files changed

+170
-3
lines changed

3 files changed

+170
-3
lines changed

src/etc/platform-intrinsics/generator.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,7 @@ def parse_args():
691691
parser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout,
692692
help = 'File to output to (default stdout).')
693693
parser.add_argument('-i', '--info', type=argparse.FileType('r'),
694-
help = 'File containing platform specific information to merge into'
694+
help = 'File containing platform specific information to merge into '
695695
'the input files\' header.')
696696
parser.add_argument('in_', metavar="FILE", type=argparse.FileType('r'), nargs='+',
697697
help = 'JSON files to load')
@@ -735,12 +735,12 @@ def open(self, platform):
735735
736736
use {{Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}};
737737
use IntrinsicDef::Named;
738-
use rustc::middle::ty;
738+
use rustc::middle::ty::TyCtxt;
739739
740740
// The default inlining settings trigger a pathological behaviour in
741741
// LLVM, which causes makes compilation very slow. See #28273.
742742
#[inline(never)]
743-
pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {{
743+
pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {{
744744
if !name.starts_with("{0}") {{ return None }}
745745
Some(match &name["{0}".len()..] {{'''.format(platform.intrinsic_prefix())
746746

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
{
2+
"llvm_prefix": "llvm.x86.fma.",
3+
"intrinsics": [
4+
{
5+
"intrinsic": "{0.width_mm}_fmadd_{0.data_type}",
6+
"width": [128, 256],
7+
"llvm": "vfmadd.{0.data_type_short}{0.width_suffix}",
8+
"ret": "f(32-64)",
9+
"args": ["0", "0", "0"]
10+
},
11+
{
12+
"intrinsic": "{0.width_mm}_fmaddsub_{0.data_type}",
13+
"width": [128, 256],
14+
"llvm": "vfmaddsub.{0.data_type_short}{0.width_suffix}",
15+
"ret": "f(32-64)",
16+
"args": ["0", "0", "0"]
17+
},
18+
{
19+
"intrinsic": "{0.width_mm}_fmsub_{0.data_type}",
20+
"width": [128, 256],
21+
"llvm": "vfmsub.{0.data_type_short}{0.width_suffix}",
22+
"ret": "f(32-64)",
23+
"args": ["0", "0", "0"]
24+
},
25+
{
26+
"intrinsic": "{0.width_mm}_fmsubadd_{0.data_type}",
27+
"width": [128, 256],
28+
"llvm": "vfmsubadd.{0.data_type_short}{0.width_suffix}",
29+
"ret": "f(32-64)",
30+
"args": ["0", "0", "0"]
31+
},
32+
{
33+
"intrinsic": "{0.width_mm}_fnmadd_{0.data_type}",
34+
"width": [128, 256],
35+
"llvm": "vfnmadd.{0.data_type_short}{0.width_suffix}",
36+
"ret": "f(32-64)",
37+
"args": ["0", "0", "0"]
38+
},
39+
{
40+
"intrinsic": "{0.width_mm}_fnmsub_{0.data_type}",
41+
"width": [128, 256],
42+
"llvm": "vfnmsub.{0.data_type_short}{0.width_suffix}",
43+
"ret": "f(32-64)",
44+
"args": ["0", "0", "0"]
45+
}
46+
]
47+
}

src/librustc_platform_intrinsics/x86.rs

+120
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,126 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {
11081108
output: v(u(16), 16),
11091109
definition: Named("llvm.x86.avx2.psubus.w")
11101110
},
1111+
"_fmadd_ps" => Intrinsic {
1112+
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
1113+
output: v(f(32), 4),
1114+
definition: Named("llvm.x86.fma.vfmadd.ps")
1115+
},
1116+
"_fmadd_pd" => Intrinsic {
1117+
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
1118+
output: v(f(64), 2),
1119+
definition: Named("llvm.x86.fma.vfmadd.pd")
1120+
},
1121+
"256_fmadd_ps" => Intrinsic {
1122+
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
1123+
output: v(f(32), 8),
1124+
definition: Named("llvm.x86.fma.vfmadd.ps.256")
1125+
},
1126+
"256_fmadd_pd" => Intrinsic {
1127+
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
1128+
output: v(f(64), 4),
1129+
definition: Named("llvm.x86.fma.vfmadd.pd.256")
1130+
},
1131+
"_fmaddsub_ps" => Intrinsic {
1132+
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
1133+
output: v(f(32), 4),
1134+
definition: Named("llvm.x86.fma.vfmaddsub.ps")
1135+
},
1136+
"_fmaddsub_pd" => Intrinsic {
1137+
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
1138+
output: v(f(64), 2),
1139+
definition: Named("llvm.x86.fma.vfmaddsub.pd")
1140+
},
1141+
"256_fmaddsub_ps" => Intrinsic {
1142+
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
1143+
output: v(f(32), 8),
1144+
definition: Named("llvm.x86.fma.vfmaddsub.ps.256")
1145+
},
1146+
"256_fmaddsub_pd" => Intrinsic {
1147+
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
1148+
output: v(f(64), 4),
1149+
definition: Named("llvm.x86.fma.vfmaddsub.pd.256")
1150+
},
1151+
"_fmsub_ps" => Intrinsic {
1152+
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
1153+
output: v(f(32), 4),
1154+
definition: Named("llvm.x86.fma.vfmsub.ps")
1155+
},
1156+
"_fmsub_pd" => Intrinsic {
1157+
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
1158+
output: v(f(64), 2),
1159+
definition: Named("llvm.x86.fma.vfmsub.pd")
1160+
},
1161+
"256_fmsub_ps" => Intrinsic {
1162+
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
1163+
output: v(f(32), 8),
1164+
definition: Named("llvm.x86.fma.vfmsub.ps.256")
1165+
},
1166+
"256_fmsub_pd" => Intrinsic {
1167+
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
1168+
output: v(f(64), 4),
1169+
definition: Named("llvm.x86.fma.vfmsub.pd.256")
1170+
},
1171+
"_fmsubadd_ps" => Intrinsic {
1172+
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
1173+
output: v(f(32), 4),
1174+
definition: Named("llvm.x86.fma.vfmsubadd.ps")
1175+
},
1176+
"_fmsubadd_pd" => Intrinsic {
1177+
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
1178+
output: v(f(64), 2),
1179+
definition: Named("llvm.x86.fma.vfmsubadd.pd")
1180+
},
1181+
"256_fmsubadd_ps" => Intrinsic {
1182+
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
1183+
output: v(f(32), 8),
1184+
definition: Named("llvm.x86.fma.vfmsubadd.ps.256")
1185+
},
1186+
"256_fmsubadd_pd" => Intrinsic {
1187+
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
1188+
output: v(f(64), 4),
1189+
definition: Named("llvm.x86.fma.vfmsubadd.pd.256")
1190+
},
1191+
"_fnmadd_ps" => Intrinsic {
1192+
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
1193+
output: v(f(32), 4),
1194+
definition: Named("llvm.x86.fma.vfnmadd.ps")
1195+
},
1196+
"_fnmadd_pd" => Intrinsic {
1197+
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
1198+
output: v(f(64), 2),
1199+
definition: Named("llvm.x86.fma.vfnmadd.pd")
1200+
},
1201+
"256_fnmadd_ps" => Intrinsic {
1202+
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
1203+
output: v(f(32), 8),
1204+
definition: Named("llvm.x86.fma.vfnmadd.ps.256")
1205+
},
1206+
"256_fnmadd_pd" => Intrinsic {
1207+
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
1208+
output: v(f(64), 4),
1209+
definition: Named("llvm.x86.fma.vfnmadd.pd.256")
1210+
},
1211+
"_fnmsub_ps" => Intrinsic {
1212+
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
1213+
output: v(f(32), 4),
1214+
definition: Named("llvm.x86.fma.vfnmsub.ps")
1215+
},
1216+
"_fnmsub_pd" => Intrinsic {
1217+
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
1218+
output: v(f(64), 2),
1219+
definition: Named("llvm.x86.fma.vfnmsub.pd")
1220+
},
1221+
"256_fnmsub_ps" => Intrinsic {
1222+
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
1223+
output: v(f(32), 8),
1224+
definition: Named("llvm.x86.fma.vfnmsub.ps.256")
1225+
},
1226+
"256_fnmsub_pd" => Intrinsic {
1227+
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
1228+
output: v(f(64), 4),
1229+
definition: Named("llvm.x86.fma.vfnmsub.pd.256")
1230+
},
11111231
_ => return None,
11121232
})
11131233
}

0 commit comments

Comments
 (0)