From 40825fdd4305516814a0977fb9fe3daffb16af62 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 8 May 2024 11:25:14 -0700 Subject: [PATCH 1/5] Use compile-time promotion to reduce max/min size & build time (#3459) Summary: Yet another smaller pair of ops. Reviewed By: manuelcandales Differential Revision: D56807402 --- kernels/portable/cpu/op_maximum.cpp | 68 ++++++++++++++++++++++------ kernels/portable/cpu/op_minimum.cpp | 69 ++++++++++++++++++++++------- 2 files changed, 108 insertions(+), 29 deletions(-) diff --git a/kernels/portable/cpu/op_maximum.cpp b/kernels/portable/cpu/op_maximum.cpp index 3e34035d5f6..4091f2cf8ca 100644 --- a/kernels/portable/cpu/op_maximum.cpp +++ b/kernels/portable/cpu/op_maximum.cpp @@ -20,6 +20,50 @@ const T& max(const T& a, const T& b) { return (b > a) ? b : a; } +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MaximumInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MaximumInner { + static void run(const Tensor& a, const Tensor& b, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = max(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MaximumInner + : public ReportCanCastBug {}; + } // namespace Tensor& maximum_out( @@ -44,20 +88,16 @@ Tensor& maximum_out( ET_SWITCH_REALHB_TYPES(a_type, ctx, "maximum.out", CTYPE_A, [&]() { ET_SWITCH_REALHB_TYPES(b_type, ctx, "maximum.out", CTYPE_B, [&]() { - ET_SWITCH_REALB_TYPES(common_type, ctx, "maximum.out", CTYPE_IN, [&]() { - ET_SWITCH_REALHB_TYPES(out_type, ctx, "maximum.out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = static_cast(val_a); - CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = max(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); + ET_SWITCH_REALHB_TYPES(out_type, ctx, "maximum.out", CTYPE_OUT, [&]() { + MaximumInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); }); }); }); diff --git a/kernels/portable/cpu/op_minimum.cpp b/kernels/portable/cpu/op_minimum.cpp index 767a2c4ca59..7c106a63c4f 100644 --- a/kernels/portable/cpu/op_minimum.cpp +++ b/kernels/portable/cpu/op_minimum.cpp @@ -20,6 +20,50 @@ const T& min(const T& a, const T& b) { return (b < a) ? b : a; } +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MinimumInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MinimumInner { + static void run(const Tensor& a, const Tensor& b, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = min(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MinimumInner + : public ReportCanCastBug {}; + } // namespace Tensor& minimum_out( @@ -44,22 +88,17 @@ Tensor& minimum_out( ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "minimum.out", CTYPE_A, [&]() { ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "minimum.out", CTYPE_B, [&]() { + using CTYPE_IN = + typename torch::executor::promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); ET_SWITCH_REAL_TYPES_AND( - Bool, common_type, ctx, "minimum.out", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES_AND( - Bool, out_type, ctx, "minimum.out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = static_cast(val_a); - CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = min(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + Bool, out_type, ctx, "minimum.out", CTYPE_OUT, [&]() { + MinimumInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); }); }); }); From acd27cb3a5623fed1ed05592b01e0d00d90aeb35 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 8 May 2024 11:25:14 -0700 Subject: [PATCH 2/5] Use compile-time promotion to reduce floor_divide size & build time (#3455) Summary: Continuing rollout of this technique. Reviewed By: manuelcandales Differential Revision: D56827786 --- kernels/portable/cpu/op_floor_divide.cpp | 93 +++++++++++++------ .../core/exec_aten/util/scalar_type_util.h | 6 ++ 2 files changed, 70 insertions(+), 29 deletions(-) diff --git a/kernels/portable/cpu/op_floor_divide.cpp b/kernels/portable/cpu/op_floor_divide.cpp index 261f77ce617..0514df0ca25 100644 --- a/kernels/portable/cpu/op_floor_divide.cpp +++ b/kernels/portable/cpu/op_floor_divide.cpp @@ -20,6 +20,60 @@ namespace native { using Tensor = exec_aten::Tensor; using ScalarType = exec_aten::ScalarType; +namespace { +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FloorDivideInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FloorDivideInner { + static void + run(const Tensor& a, const Tensor& b, Tensor& out, bool& div_by_zero_error) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [&div_by_zero_error](const CTYPE_A val_a, const CTYPE_B val_b) { + if (is_integral_type::value) { + if (val_b == 0) { + div_by_zero_error = true; + return static_cast(0); + } + } + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = utils::floor_divide(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&, bool&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FloorDivideInner + : public ReportCanCastBug {}; + +} // namespace + Tensor& floor_divide_out( RuntimeContext& ctx, const Tensor& a, @@ -46,36 +100,17 @@ Tensor& floor_divide_out( Bool, a_type, ctx, "floor_divide.out", CTYPE_A, [&]() { ET_SWITCH_REAL_TYPES_AND( Bool, b_type, ctx, "floor_divide.out", CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); ET_SWITCH_REAL_TYPES( - common_type, ctx, "floor_divide.out", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES( - out_type, ctx, "floor_divide.out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [common_type, &div_by_zero_error]( - const CTYPE_A val_a, const CTYPE_B val_b) { - if (isIntegralType( - common_type, /*includeBool=*/true)) { - if (val_b == 0) { - div_by_zero_error = true; - return static_cast(0); - } - } - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = utils::floor_divide( - a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + out_type, ctx, "floor_divide.out", CTYPE_OUT, [&]() { + FloorDivideInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out, div_by_zero_error); }); }); }); diff --git a/runtime/core/exec_aten/util/scalar_type_util.h b/runtime/core/exec_aten/util/scalar_type_util.h index 595ed7a1c02..084289520aa 100644 --- a/runtime/core/exec_aten/util/scalar_type_util.h +++ b/runtime/core/exec_aten/util/scalar_type_util.h @@ -349,6 +349,12 @@ inline constexpr bool isIntegralType( t == exec_aten::ScalarType::Short); } +template +struct is_integral_type + : public std::integral_constant< + bool, + isIntegralType(CppTypeToScalarType::value, includeBool)> {}; + inline constexpr bool isFloatingType(exec_aten::ScalarType t) { return ( t == exec_aten::ScalarType::Double || t == exec_aten::ScalarType::Float || From f2668fa5a0ec27bb42fc4488d4a535a296e79509 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 8 May 2024 11:25:14 -0700 Subject: [PATCH 3/5] Use compile-time promotion to reduce remainder size & build time (#3458) Summary: Yet another op that can benefit from compile-time type promotion. Reviewed By: manuelcandales Differential Revision: D56831293 --- kernels/portable/cpu/op_remainder.cpp | 81 ++++++++++++++++++--------- kernels/test/op_remainder_test.cpp | 14 +++++ 2 files changed, 70 insertions(+), 25 deletions(-) diff --git a/kernels/portable/cpu/op_remainder.cpp b/kernels/portable/cpu/op_remainder.cpp index 9e48374a81a..7c858c1c08a 100644 --- a/kernels/portable/cpu/op_remainder.cpp +++ b/kernels/portable/cpu/op_remainder.cpp @@ -20,6 +20,52 @@ namespace native { using Tensor = exec_aten::Tensor; +namespace { +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct RemainderInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct RemainderInner { + static void run(const Tensor& a, const Tensor& b, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = utils::remainder_override(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct RemainderInner + : public ReportCanCastBug {}; + +} // namespace Tensor& remainder_Tensor_out( RuntimeContext& ctx, const Tensor& a, @@ -45,32 +91,17 @@ Tensor& remainder_Tensor_out( Bool, a_type, ctx, "remainder.Tensor_out", CTYPE_A, [&]() { ET_SWITCH_REAL_TYPES_AND( Bool, b_type, ctx, "remainder.Tensor_out", CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); ET_SWITCH_REAL_TYPES( - common_type, ctx, "remainder.Tensor_out", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES( - out_type, - ctx, - "remainder.Tensor_out", - CTYPE_OUT, - [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = utils::remainder_override( - a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + out_type, ctx, "remainder.Tensor_out", CTYPE_OUT, [&]() { + RemainderInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); }); }); }); diff --git a/kernels/test/op_remainder_test.cpp b/kernels/test/op_remainder_test.cpp index 4a550958a1a..254e8122b61 100644 --- a/kernels/test/op_remainder_test.cpp +++ b/kernels/test/op_remainder_test.cpp @@ -21,6 +21,7 @@ using exec_aten::Tensor; using torch::executor::testing::TensorFactory; class OpRemainderOutTest : public OperatorTest { + protected: Tensor& op_remainder_tensor_out( const Tensor& self, const Tensor& other, @@ -35,3 +36,16 @@ class OpRemainderOutTest : public OperatorTest { return torch::executor::aten::remainder_outf(context_, self, other, out); } }; + +TEST_F(OpRemainderOutTest, SmokeTest) { + TensorFactory tfDouble; + TensorFactory tfLong; + TensorFactory tfInt; + + Tensor self = tfLong.full({2, 2}, 46); + Tensor other = tfInt.full({2, 2}, 4); + Tensor out = tfDouble.zeros({2, 2}); + Tensor out_expected = tfDouble.full({2, 2}, 2.0); + op_remainder_tensor_out(self, other, out); + EXPECT_TENSOR_CLOSE(out, out_expected); +} From 66bbb076450825768bd8dd35b97f71a623321e3c Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 8 May 2024 11:25:14 -0700 Subject: [PATCH 4/5] Use compile-time promotion to reduce fmod size & build time (#3456) Summary: Almost done with Tensor ops that can benefit from compile-time promotion! Reviewed By: manuelcandales Differential Revision: D56835200 --- kernels/portable/cpu/op_fmod.cpp | 93 ++++++++++++++++++++++---------- kernels/test/op_fmod_test.cpp | 13 +++++ 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/kernels/portable/cpu/op_fmod.cpp b/kernels/portable/cpu/op_fmod.cpp index 0083c1379d5..42f83731199 100644 --- a/kernels/portable/cpu/op_fmod.cpp +++ b/kernels/portable/cpu/op_fmod.cpp @@ -19,6 +19,60 @@ namespace native { using Tensor = exec_aten::Tensor; +namespace { +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FmodInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FmodInner { + static void + run(const Tensor& a, const Tensor& b, Tensor& out, bool& div_by_zero_error) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [&div_by_zero_error](const CTYPE_A val_a, const CTYPE_B val_b) { + if (is_integral_type::value) { + if (val_b == 0) { + div_by_zero_error = true; + return static_cast(0); + } + } + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = std::fmod(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&, bool&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FmodInner + : public ReportCanCastBug {}; + +} // namespace + Tensor& fmod_Tensor_out( RuntimeContext& ctx, const Tensor& a, @@ -44,35 +98,18 @@ Tensor& fmod_Tensor_out( Bool, a_type, ctx, "fmod.Tensor_out", CTYPE_A, [&]() { ET_SWITCH_REAL_TYPES_AND( Bool, b_type, ctx, "fmod.Tensor_out", CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); ET_SWITCH_REAL_TYPES( - common_type, ctx, "fmod.Tensor_out", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES( - out_type, ctx, "fmod.Tensor_out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [common_type, &div_by_zero_error]( - const CTYPE_A val_a, const CTYPE_B val_b) { - if (isIntegralType( - common_type, /*includeBool=*/true)) { - if (val_b == 0) { - div_by_zero_error = true; - return static_cast(0); - } - } - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = std::fmod(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + out_type, ctx, "fmod.Tensor_out", CTYPE_OUT, [&]() { + FmodInner< + !std::is_same::value && + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out, div_by_zero_error); }); }); }); diff --git a/kernels/test/op_fmod_test.cpp b/kernels/test/op_fmod_test.cpp index 475d4ea5cb4..4ee4d84c1cc 100644 --- a/kernels/test/op_fmod_test.cpp +++ b/kernels/test/op_fmod_test.cpp @@ -32,3 +32,16 @@ class OpFmodTest : public OperatorTest { return torch::executor::aten::fmod_outf(context_, self, other, out); } }; + +TEST_F(OpFmodTest, SmokeTest) { + TensorFactory tfDouble; + TensorFactory tfLong; + TensorFactory tfInt; + + Tensor self = tfLong.full({2, 2}, 46); + Tensor other = tfInt.full({2, 2}, 4); + Tensor out = tfDouble.zeros({2, 2}); + Tensor out_expected = tfDouble.full({2, 2}, 2.0); + op_fmod_tensor_out(self, other, out); + EXPECT_TENSOR_CLOSE(out, out_expected); +} From 41cf1aadf35654df72b8a2df9884e5fa4773fb8b Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 8 May 2024 11:25:14 -0700 Subject: [PATCH 5/5] support Half in minimum and clamp (#3457) Summary: IIUC, these ops need to support Half but don't. Noticed it as a difference from maximum. Reviewed By: manuelcandales Differential Revision: D56846242 --- kernels/portable/cpu/op_clamp.cpp | 18 ++++++------ kernels/portable/cpu/op_minimum.cpp | 27 +++++++++-------- kernels/portable/cpu/util/math_util.h | 42 +++++++++++++++++++++++++++ kernels/test/op_clamp_test.cpp | 25 +++++++++++++--- kernels/test/op_minimum_test.cpp | 4 +++ 5 files changed, 89 insertions(+), 27 deletions(-) diff --git a/kernels/portable/cpu/op_clamp.cpp b/kernels/portable/cpu/op_clamp.cpp index 06c87d03f2d..50d7e8c374d 100644 --- a/kernels/portable/cpu/op_clamp.cpp +++ b/kernels/portable/cpu/op_clamp.cpp @@ -53,7 +53,7 @@ __ET_NODISCARD bool check_bounds( } }); } else if (isFloatingType(out_type)) { - ET_SWITCH_FLOAT_TYPES(out_type, ctx, "clamp", CTYPE_OUT, [&]() { + ET_SWITCH_FLOATH_TYPES(out_type, ctx, "clamp", CTYPE_OUT, [&]() { if (std::isfinite(val) && is_out_of_bounds(val)) { ET_LOG(Error, "%s value out of bounds", val_name); @@ -119,7 +119,7 @@ Tensor& clamp_out( ET_KERNEL_CHECK(ctx, common_type == out_type, InvalidArgument, out); - ET_SWITCH_REAL_TYPES(out_type, ctx, "clamp", CTYPE_OUT, [&]() { + ET_SWITCH_REALH_TYPES(out_type, ctx, "clamp", CTYPE_OUT, [&]() { // Extract optional min value CTYPE_OUT min = 0; if (has_min) { @@ -140,7 +140,7 @@ Tensor& clamp_out( }); } - ET_SWITCH_REAL_TYPES_AND(Bool, in_type, ctx, "clamp", CTYPE_IN, [&]() { + ET_SWITCH_REALHB_TYPES(in_type, ctx, "clamp", CTYPE_IN, [&]() { apply_unary_map_fn( [has_min, min, has_max, max](const CTYPE_IN val_in) { CTYPE_OUT val_out = static_cast(val_in); @@ -195,20 +195,20 @@ Tensor& clamp_tensor_out( ScalarType out_type = out.scalar_type(); if (has_min) { - common_type = promoteTypes(common_type, min_type); + common_type = promoteTypes(common_type, min_type, /*half_to_float*/ true); } if (has_max) { - common_type = promoteTypes(common_type, max_type); + common_type = promoteTypes(common_type, max_type, /*half_to_float*/ true); } ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out); constexpr auto name = "clamp.Tensor_out"; - ET_SWITCH_REALB_TYPES(in_type, ctx, name, CTYPE_IN, [&]() { - ET_SWITCH_REALB_TYPES(min_type, ctx, name, CTYPE_MIN, [&]() { - ET_SWITCH_REALB_TYPES(max_type, ctx, name, CTYPE_MAX, [&]() { - ET_SWITCH_REALB_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() { + ET_SWITCH_REALHB_TYPES(in_type, ctx, name, CTYPE_IN, [&]() { + ET_SWITCH_REALHB_TYPES(min_type, ctx, name, CTYPE_MIN, [&]() { + ET_SWITCH_REALHB_TYPES(max_type, ctx, name, CTYPE_MAX, [&]() { + ET_SWITCH_REALHB_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() { apply_ternary_elementwise_fn< CTYPE_IN, CTYPE_MIN, diff --git a/kernels/portable/cpu/op_minimum.cpp b/kernels/portable/cpu/op_minimum.cpp index 7c106a63c4f..44c0efa8a67 100644 --- a/kernels/portable/cpu/op_minimum.cpp +++ b/kernels/portable/cpu/op_minimum.cpp @@ -81,25 +81,24 @@ Tensor& minimum_out( ScalarType a_type = a.scalar_type(); ScalarType b_type = b.scalar_type(); - ScalarType common_type = promoteTypes(a_type, b_type); + ScalarType common_type = promoteTypes(a_type, b_type, /*half_to_float*/ true); ScalarType out_type = out.scalar_type(); ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out); - ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "minimum.out", CTYPE_A, [&]() { - ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "minimum.out", CTYPE_B, [&]() { - using CTYPE_IN = - typename torch::executor::promote_types::type; + ET_SWITCH_REALHB_TYPES(a_type, ctx, "minimum.out", CTYPE_A, [&]() { + ET_SWITCH_REALHB_TYPES(b_type, ctx, "minimum.out", CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; ET_DCHECK(CppTypeToScalarType::value == common_type); - ET_SWITCH_REAL_TYPES_AND( - Bool, out_type, ctx, "minimum.out", CTYPE_OUT, [&]() { - MinimumInner< - can_cast::value, - CTYPE_A, - CTYPE_B, - CTYPE_IN, - CTYPE_OUT>::run(a, b, out); - }); + ET_SWITCH_REALHB_TYPES(out_type, ctx, "minimum.out", CTYPE_OUT, [&]() { + MinimumInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); + }); }); }); diff --git a/kernels/portable/cpu/util/math_util.h b/kernels/portable/cpu/util/math_util.h index 44cb47f8cba..df175147062 100644 --- a/kernels/portable/cpu/util/math_util.h +++ b/kernels/portable/cpu/util/math_util.h @@ -94,6 +94,48 @@ INT_T max_override(INT_T a, INT_T b) { return std::max(a, b); } +template < + typename T, + typename std::enable_if< + std::is_same::value, + bool>::type = true> +T min_override(T a, T b) { + const auto float_a = static_cast(a); + if (std::isnan(float_a)) { + return a; + } + const auto float_b = static_cast(b); + if (std::isnan(float_b)) { + return b; + } + + if (float_a < float_b) { + return a; + } + return b; +} + +template < + typename T, + typename std::enable_if< + std::is_same::value, + bool>::type = true> +T max_override(T a, T b) { + const auto float_a = static_cast(a); + if (std::isnan(float_a)) { + return a; + } + const auto float_b = static_cast(b); + if (std::isnan(float_b)) { + return b; + } + + if (float_a > float_b) { + return a; + } + return b; +} + /** * There is a slight difference in how std::fmod works compared to how ATen * determines remainders: diff --git a/kernels/test/op_clamp_test.cpp b/kernels/test/op_clamp_test.cpp index 871333482c8..0244fd55700 100644 --- a/kernels/test/op_clamp_test.cpp +++ b/kernels/test/op_clamp_test.cpp @@ -147,8 +147,16 @@ class OpClampOutTest : public OperatorTest { // Test cases that are compatible with float and double. template void run_floating_point_test_cases() { - constexpr auto kInfinity = - std::numeric_limits::ctype>::infinity(); + using ctype = typename TensorFactory::ctype; + using opt_infinity_type = std::conditional_t< + std::is_same::value, + float, + ctype>; + constexpr auto kInfinity = std::numeric_limits::infinity(); + const auto kOptInfinity = + OptScalar(static_cast(kInfinity)); + const auto kOptMinusInfinity = + OptScalar(static_cast(-kInfinity)); std::vector> test_cases = { { std::string(__func__) + ": Simple negative/positive clamp", @@ -178,7 +186,7 @@ class OpClampOutTest : public OperatorTest { std::string(__func__) + ": Infinite min", {2, 2}, // sizes {-10.1, -1.1, 1.1, 10.1}, // input_data - OptScalar(-kInfinity), // min + kOptMinusInfinity, // min OptScalar(5.5), // max {-10.1, -1.1, 1.1, 5.5}, // expected_data }, @@ -187,7 +195,7 @@ class OpClampOutTest : public OperatorTest { {2, 2}, // sizes {-10.1, -1.1, 1.1, 10.1}, // input_data OptScalar(-5.5), // min - OptScalar(kInfinity), // max + kOptInfinity, // max {-5.5, -1.1, 1.1, 10.1}, // expected_data }, { @@ -285,6 +293,15 @@ TEST_F(OpClampOutTest, LongTensors) { run_signed_integer_test_cases(); } +TEST_F(OpClampOutTest, HalfTensors) { + // Note that the integer test cases test the situation where the min/max value + // Scalars are integer types, demonstrating that floating point types can be + // clamped to integer values. + run_unsigned_integer_test_cases(); + run_signed_integer_test_cases(); + run_floating_point_test_cases(); +} + TEST_F(OpClampOutTest, FloatTensors) { // Note that the integer test cases test the situation where the min/max value // Scalars are integer types, demonstrating that floating point types can be diff --git a/kernels/test/op_minimum_test.cpp b/kernels/test/op_minimum_test.cpp index be43e0af07d..7e12374b8d1 100644 --- a/kernels/test/op_minimum_test.cpp +++ b/kernels/test/op_minimum_test.cpp @@ -65,6 +65,10 @@ TEST_F(OpMinimumOutTest, LongTensors) { test_minimum_out_same_size(); } +TEST_F(OpMinimumOutTest, HalfTensors) { + test_minimum_out_same_size(); +} + TEST_F(OpMinimumOutTest, FloatTensors) { test_minimum_out_same_size(); }