Skip to content

Commit b1fe03f

Browse files
authored
[APFloat] Add APFloat support for FP6 data types (#94735)
This patch adds APFloat type support for two FP6 data types, E2M3 and E3M2. The definitions for the two formats are detailed in section 5.3.2 of the OCP specification, which can be accessed here: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf Signed-off-by: Durgadoss R <[email protected]>
1 parent bd9a525 commit b1fe03f

File tree

4 files changed

+563
-35
lines changed

4 files changed

+563
-35
lines changed

clang/lib/AST/MicrosoftMangle.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,8 @@ void MicrosoftCXXNameMangler::mangleFloat(llvm::APFloat Number) {
899899
case APFloat::S_Float8E4M3FNUZ:
900900
case APFloat::S_Float8E4M3B11FNUZ:
901901
case APFloat::S_FloatTF32:
902+
case APFloat::S_Float6E3M2FN:
903+
case APFloat::S_Float6E2M3FN:
902904
llvm_unreachable("Tried to mangle unexpected APFloat semantics");
903905
}
904906

llvm/include/llvm/ADT/APFloat.h

+25
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,14 @@ struct APFloatBase {
189189
// improved range compared to half (16-bit) formats, at (potentially)
190190
// greater throughput than single precision (32-bit) formats.
191191
S_FloatTF32,
192+
// 6-bit floating point number with bit layout S1E3M2. Unlike IEEE-754
193+
// types, there are no infinity or NaN values. The format is detailed in
194+
// https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
195+
S_Float6E3M2FN,
196+
// 6-bit floating point number with bit layout S1E2M3. Unlike IEEE-754
197+
// types, there are no infinity or NaN values. The format is detailed in
198+
// https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
199+
S_Float6E2M3FN,
192200

193201
S_x87DoubleExtended,
194202
S_MaxSemantics = S_x87DoubleExtended,
@@ -209,6 +217,8 @@ struct APFloatBase {
209217
static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE;
210218
static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE;
211219
static const fltSemantics &FloatTF32() LLVM_READNONE;
220+
static const fltSemantics &Float6E3M2FN() LLVM_READNONE;
221+
static const fltSemantics &Float6E2M3FN() LLVM_READNONE;
212222
static const fltSemantics &x87DoubleExtended() LLVM_READNONE;
213223

214224
/// A Pseudo fltsemantic used to construct APFloats that cannot conflict with
@@ -627,6 +637,8 @@ class IEEEFloat final : public APFloatBase {
627637
APInt convertFloat8E4M3FNUZAPFloatToAPInt() const;
628638
APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const;
629639
APInt convertFloatTF32APFloatToAPInt() const;
640+
APInt convertFloat6E3M2FNAPFloatToAPInt() const;
641+
APInt convertFloat6E2M3FNAPFloatToAPInt() const;
630642
void initFromAPInt(const fltSemantics *Sem, const APInt &api);
631643
template <const fltSemantics &S> void initFromIEEEAPInt(const APInt &api);
632644
void initFromHalfAPInt(const APInt &api);
@@ -642,6 +654,8 @@ class IEEEFloat final : public APFloatBase {
642654
void initFromFloat8E4M3FNUZAPInt(const APInt &api);
643655
void initFromFloat8E4M3B11FNUZAPInt(const APInt &api);
644656
void initFromFloatTF32APInt(const APInt &api);
657+
void initFromFloat6E3M2FNAPInt(const APInt &api);
658+
void initFromFloat6E2M3FNAPInt(const APInt &api);
645659

646660
void assign(const IEEEFloat &);
647661
void copySignificand(const IEEEFloat &);
@@ -1046,6 +1060,17 @@ class APFloat : public APFloatBase {
10461060
/// \param Semantics - type float semantics
10471061
static APFloat getAllOnesValue(const fltSemantics &Semantics);
10481062

1063+
static bool hasNanOrInf(const fltSemantics &Sem) {
1064+
switch (SemanticsToEnum(Sem)) {
1065+
default:
1066+
return true;
1067+
// Below Semantics do not support {NaN or Inf}
1068+
case APFloat::S_Float6E3M2FN:
1069+
case APFloat::S_Float6E2M3FN:
1070+
return false;
1071+
}
1072+
}
1073+
10491074
/// Used to insert APFloat objects, or objects that contain APFloat objects,
10501075
/// into FoldingSets.
10511076
void Profile(FoldingSetNodeID &NID) const;

llvm/lib/Support/APFloat.cpp

+74-13
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ enum class fltNonfiniteBehavior {
6868
// `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
6969
// encodings do not distinguish between signalling and quiet NaN.
7070
NanOnly,
71+
72+
// This behavior is present in Float6E3M2FN and Float6E2M3FN types,
73+
// which do not support Inf or NaN values.
74+
FiniteOnly,
7175
};
7276

7377
// How NaN values are represented. This is curently only used in combination
@@ -139,6 +143,10 @@ static constexpr fltSemantics semFloat8E4M3FNUZ = {
139143
static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
140144
4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
141145
static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
146+
static constexpr fltSemantics semFloat6E3M2FN = {
147+
4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
148+
static constexpr fltSemantics semFloat6E2M3FN = {
149+
2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
142150
static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
143151
static constexpr fltSemantics semBogus = {0, 0, 0, 0};
144152

@@ -206,6 +214,10 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
206214
return Float8E4M3B11FNUZ();
207215
case S_FloatTF32:
208216
return FloatTF32();
217+
case S_Float6E3M2FN:
218+
return Float6E3M2FN();
219+
case S_Float6E2M3FN:
220+
return Float6E2M3FN();
209221
case S_x87DoubleExtended:
210222
return x87DoubleExtended();
211223
}
@@ -238,6 +250,10 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
238250
return S_Float8E4M3B11FNUZ;
239251
else if (&Sem == &llvm::APFloat::FloatTF32())
240252
return S_FloatTF32;
253+
else if (&Sem == &llvm::APFloat::Float6E3M2FN())
254+
return S_Float6E3M2FN;
255+
else if (&Sem == &llvm::APFloat::Float6E2M3FN())
256+
return S_Float6E2M3FN;
241257
else if (&Sem == &llvm::APFloat::x87DoubleExtended())
242258
return S_x87DoubleExtended;
243259
else
@@ -260,6 +276,8 @@ const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
260276
return semFloat8E4M3B11FNUZ;
261277
}
262278
const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
279+
const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
280+
const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
263281
const fltSemantics &APFloatBase::x87DoubleExtended() {
264282
return semX87DoubleExtended;
265283
}
@@ -878,6 +896,9 @@ void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
878896
for the significand. If double or longer, this is a signalling NaN,
879897
which may not be ideal. If float, this is QNaN(0). */
880898
void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
899+
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
900+
llvm_unreachable("This floating point format does not support NaN");
901+
881902
category = fcNaN;
882903
sign = Negative;
883904
exponent = exponentNaN();
@@ -1499,16 +1520,18 @@ static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
14991520
/* Handle overflow. Sign is preserved. We either become infinity or
15001521
the largest finite number. */
15011522
IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1502-
/* Infinity? */
1503-
if (rounding_mode == rmNearestTiesToEven ||
1504-
rounding_mode == rmNearestTiesToAway ||
1505-
(rounding_mode == rmTowardPositive && !sign) ||
1506-
(rounding_mode == rmTowardNegative && sign)) {
1507-
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1508-
makeNaN(false, sign);
1509-
else
1510-
category = fcInfinity;
1511-
return (opStatus) (opOverflow | opInexact);
1523+
if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1524+
/* Infinity? */
1525+
if (rounding_mode == rmNearestTiesToEven ||
1526+
rounding_mode == rmNearestTiesToAway ||
1527+
(rounding_mode == rmTowardPositive && !sign) ||
1528+
(rounding_mode == rmTowardNegative && sign)) {
1529+
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1530+
makeNaN(false, sign);
1531+
else
1532+
category = fcInfinity;
1533+
return static_cast<opStatus>(opOverflow | opInexact);
1534+
}
15121535
}
15131536

15141537
/* Otherwise we become the largest finite number. */
@@ -3518,13 +3541,15 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const {
35183541
myexponent = ::exponentZero(S) + bias;
35193542
mysignificand.fill(0);
35203543
} else if (category == fcInfinity) {
3521-
if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
3544+
if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3545+
S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
35223546
llvm_unreachable("semantics don't support inf!");
3523-
}
35243547
myexponent = ::exponentInf(S) + bias;
35253548
mysignificand.fill(0);
35263549
} else {
35273550
assert(category == fcNaN && "Unknown category!");
3551+
if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3552+
llvm_unreachable("semantics don't support NaN!");
35283553
myexponent = ::exponentNaN(S) + bias;
35293554
std::copy_n(significandParts(), mysignificand.size(),
35303555
mysignificand.begin());
@@ -3605,6 +3630,16 @@ APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
36053630
return convertIEEEFloatToAPInt<semFloatTF32>();
36063631
}
36073632

3633+
APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3634+
assert(partCount() == 1);
3635+
return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3636+
}
3637+
3638+
APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3639+
assert(partCount() == 1);
3640+
return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3641+
}
3642+
36083643
// This function creates an APInt that is just a bit map of the floating
36093644
// point constant as it would appear in memory. It is not a conversion,
36103645
// and treating the result as a normal integer is unlikely to be useful.
@@ -3646,6 +3681,12 @@ APInt IEEEFloat::bitcastToAPInt() const {
36463681
if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
36473682
return convertFloatTF32APFloatToAPInt();
36483683

3684+
if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3685+
return convertFloat6E3M2FNAPFloatToAPInt();
3686+
3687+
if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3688+
return convertFloat6E2M3FNAPFloatToAPInt();
3689+
36493690
assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
36503691
"unknown format!");
36513692
return convertF80LongDoubleAPFloatToAPInt();
@@ -3862,6 +3903,14 @@ void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
38623903
initFromIEEEAPInt<semFloatTF32>(api);
38633904
}
38643905

3906+
void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3907+
initFromIEEEAPInt<semFloat6E3M2FN>(api);
3908+
}
3909+
3910+
void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3911+
initFromIEEEAPInt<semFloat6E2M3FN>(api);
3912+
}
3913+
38653914
/// Treat api as containing the bits of a floating point number.
38663915
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
38673916
assert(api.getBitWidth() == Sem->sizeInBits);
@@ -3891,6 +3940,10 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
38913940
return initFromFloat8E4M3B11FNUZAPInt(api);
38923941
if (Sem == &semFloatTF32)
38933942
return initFromFloatTF32APInt(api);
3943+
if (Sem == &semFloat6E3M2FN)
3944+
return initFromFloat6E3M2FNAPInt(api);
3945+
if (Sem == &semFloat6E2M3FN)
3946+
return initFromFloat6E2M3FNAPInt(api);
38943947

38953948
llvm_unreachable(nullptr);
38963949
}
@@ -4328,7 +4381,8 @@ int IEEEFloat::getExactLog2Abs() const {
43284381
bool IEEEFloat::isSignaling() const {
43294382
if (!isNaN())
43304383
return false;
4331-
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4384+
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4385+
semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
43324386
return false;
43334387

43344388
// IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
@@ -4387,6 +4441,10 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
43874441
// nextUp(getLargest()) == NAN
43884442
makeNaN();
43894443
break;
4444+
} else if (semantics->nonFiniteBehavior ==
4445+
fltNonfiniteBehavior::FiniteOnly) {
4446+
// nextUp(getLargest()) == getLargest()
4447+
break;
43904448
} else {
43914449
// nextUp(getLargest()) == INFINITY
43924450
APInt::tcSet(significandParts(), 0, partCount());
@@ -4477,6 +4535,9 @@ APFloatBase::ExponentType IEEEFloat::exponentZero() const {
44774535
}
44784536

44794537
void IEEEFloat::makeInf(bool Negative) {
4538+
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4539+
llvm_unreachable("This floating point format does not support Inf");
4540+
44804541
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
44814542
// There is no Inf, so make NaN instead.
44824543
makeNaN(false, Negative);

0 commit comments

Comments
 (0)