Skip to content
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1800,6 +1800,12 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(sigReader.op1JitType);
break;

case NI_Sve_ShiftLeftLogical:
case NI_Sve_ShiftRightArithmetic:
case NI_Sve_ShiftRightLogical:
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd));
break;

default:
break;
}
Expand Down
36 changes: 30 additions & 6 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,30 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
assert(instrIsRMW);

insScalableOpts sopt;

switch (intrinEmbMask.id)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this is the best place to put this switch statement, but these intrinsics need to be wrapped with ConditionalSelect, so implementing special codegen for them off this path doesn't seem feasible without some more invasive refactoring.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we do similar handling for ConvertTo for numOperands == 1 case.

{
case NI_Sve_ShiftLeftLogical:
case NI_Sve_ShiftRightArithmetic:
case NI_Sve_ShiftRightLogical:
{
const emitAttr op2Size = emitTypeSize(op2->AsHWIntrinsic()->GetAuxiliaryType());
if (op2Size != emitTypeSize(intrinEmbMask.baseType))
{
assert(emitter::optGetSveInsOpt(op2Size) == INS_OPTS_SCALABLE_D);
sopt = INS_SCALABLE_OPTS_WIDE;
break;
}

FALLTHROUGH;
}

default:
sopt = INS_SCALABLE_OPTS_NONE;
break;
}

if (intrin.op3->IsVectorZero())
{
// If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the
Expand All @@ -568,7 +592,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

// Finally, perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt);
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
}
else if (targetReg != falseReg)
{
Expand All @@ -583,7 +607,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
// If the embedded instruction supports optional mask operation, use the "unpredicated"
// version of the instruction, followed by "sel" to select the active lanes.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg,
embMaskOp2Reg, opt);
embMaskOp2Reg, opt, sopt);
}
else
{
Expand All @@ -598,7 +622,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);

GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
opt);
opt, sopt);
}

GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
Expand All @@ -615,13 +639,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

// Finally, perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt);
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
}
else
{
// Just perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt);
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
}

break;
Expand Down Expand Up @@ -2168,7 +2192,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
/* canSkip */ true);
}

GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt);
GetEmitter()->emitInsSve_R_R(ins, emitSize, targetReg, op2Reg, opt);
break;
}

Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCount,
HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy8BitElementCount, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincb, INS_sve_uqincb, INS_sve_sqincb, INS_sve_uqincb, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SaturatingIncrementByActiveElementCount, -1, 2, true, {INS_invalid, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, Scale, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fscale, INS_sve_fscale}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, false, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, false, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightLogical, -1, -1, false, {INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
Loading