From 665b37833c49816d38ffdc59533d5a6368c76b70 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 24 Jun 2025 15:44:21 -0700 Subject: [PATCH 1/2] Reapply "Implement unary_ufunc functions using elementwise_util (#9386)" This PR was reverted due to internal test failures, which should be fixed now (and is being sent as an exported diff to make sure). Original summary: One less set of independent implementations to worry about going forward (e.g., we don't have to vectorize these separately from elementwise_util and they get all benefits of elementwise_util). Differential Revision: [D76754824](https://our.internmc.facebook.com/intern/diff/D76754824/) [ghstack-poisoned] --- .../cadence/fusion_g3/operators/op_exp.cpp | 4 +- .../cadence/fusion_g3/operators/op_rsqrt.cpp | 14 +-- .../cadence/fusion_g3/operators/op_sqrt.cpp | 4 +- .../cadence/fusion_g3/operators/op_tanh.cpp | 4 +- backends/cadence/hifi/operators/op_rsqrt.cpp | 11 +-- backends/cadence/hifi/operators/op_tanh.cpp | 6 +- kernels/portable/cpu/op_acos.cpp | 5 +- kernels/portable/cpu/op_acosh.cpp | 5 +- kernels/portable/cpu/op_asin.cpp | 5 +- kernels/portable/cpu/op_asinh.cpp | 5 +- kernels/portable/cpu/op_atan.cpp | 5 +- kernels/portable/cpu/op_atanh.cpp | 5 +- kernels/portable/cpu/op_ceil.cpp | 4 +- kernels/portable/cpu/op_cos.cpp | 4 +- kernels/portable/cpu/op_cosh.cpp | 5 +- kernels/portable/cpu/op_erf.cpp | 4 +- kernels/portable/cpu/op_exp.cpp | 4 +- kernels/portable/cpu/op_expm1.cpp | 7 +- kernels/portable/cpu/op_floor.cpp | 4 +- kernels/portable/cpu/op_isinf.cpp | 5 +- kernels/portable/cpu/op_isnan.cpp | 5 +- kernels/portable/cpu/op_log.cpp | 4 +- kernels/portable/cpu/op_log10.cpp | 5 +- kernels/portable/cpu/op_log1p.cpp | 5 +- kernels/portable/cpu/op_log2.cpp | 5 +- kernels/portable/cpu/op_reciprocal.cpp | 13 +-- kernels/portable/cpu/op_rsqrt.cpp | 11 +-- kernels/portable/cpu/op_sin.cpp | 4 +- kernels/portable/cpu/op_sinh.cpp | 5 +- kernels/portable/cpu/op_sqrt.cpp | 5 +- kernels/portable/cpu/op_tan.cpp | 4 +- kernels/portable/cpu/op_tanh.cpp | 5 +- kernels/portable/cpu/op_trunc.cpp | 4 +- kernels/portable/cpu/pattern/pattern.cpp | 28 ++++++ kernels/portable/cpu/pattern/pattern.h | 94 +++++++++++++++++-- kernels/portable/cpu/pattern/targets.bzl | 8 +- .../pattern/unary_ufunc_realhb_to_bool.cpp | 60 ------------ .../unary_ufunc_realhbbf16_to_floathbf16.cpp | 60 ------------ .../cpu/pattern/unary_ufunc_realhbf16.cpp | 53 ----------- kernels/portable/cpu/util/vectorized_math.h | 27 +++++- .../kernels/portable/op_registration_util.bzl | 1 + 41 files changed, 242 insertions(+), 274 deletions(-) create mode 100644 kernels/portable/cpu/pattern/pattern.cpp delete mode 100644 kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp delete mode 100644 kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp delete mode 100644 kernels/portable/cpu/pattern/unary_ufunc_realhbf16.cpp diff --git a/backends/cadence/fusion_g3/operators/op_exp.cpp b/backends/cadence/fusion_g3/operators/op_exp.cpp index 41b5d70b222..6b45b37a7b1 100644 --- a/backends/cadence/fusion_g3/operators/op_exp.cpp +++ b/backends/cadence/fusion_g3/operators/op_exp.cpp @@ -59,8 +59,10 @@ Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { return out; } else { + static constexpr const char op_name[] = "exp.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16(std::exp, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::exp(x); }, ctx, in, out); } } diff --git a/backends/cadence/fusion_g3/operators/op_rsqrt.cpp b/backends/cadence/fusion_g3/operators/op_rsqrt.cpp index 5a869fadd09..8c40abb631f 100644 --- a/backends/cadence/fusion_g3/operators/op_rsqrt.cpp +++ b/backends/cadence/fusion_g3/operators/op_rsqrt.cpp @@ -25,14 +25,6 @@ namespace impl { namespace G3 { namespace native { -namespace { - -double rsqrt(double x) { - return 1.0 / std::sqrt(x); -} - -} // namespace - Tensor& rsqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { #ifdef OP_ARG_CHECK // Resize for dynamic shape @@ -60,12 +52,14 @@ Tensor& rsqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { return out; } else { + static constexpr const char op_name[] = "rsqrt.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16(rsqrt, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::rsqrt(x); }, ctx, in, out); } } } // namespace native } // namespace G3 } // namespace impl -} // namespace cadence \ No newline at end of file +} // namespace cadence diff --git a/backends/cadence/fusion_g3/operators/op_sqrt.cpp b/backends/cadence/fusion_g3/operators/op_sqrt.cpp index c6a5a29fab8..0a583dde597 100644 --- a/backends/cadence/fusion_g3/operators/op_sqrt.cpp +++ b/backends/cadence/fusion_g3/operators/op_sqrt.cpp @@ -54,8 +54,10 @@ Tensor& sqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { return out; } else { + static constexpr const char op_name[] = "sqrt.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16(std::sqrt, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::sqrt(x); }, ctx, in, out); } } diff --git a/backends/cadence/fusion_g3/operators/op_tanh.cpp b/backends/cadence/fusion_g3/operators/op_tanh.cpp index 05f39f1361e..5819c76ec61 100644 --- a/backends/cadence/fusion_g3/operators/op_tanh.cpp +++ b/backends/cadence/fusion_g3/operators/op_tanh.cpp @@ -54,8 +54,10 @@ Tensor& tanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { return out; } else { + static constexpr const char op_name[] = "tanh.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16(std::tanh, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::tanh(x); }, ctx, in, out); } } diff --git a/backends/cadence/hifi/operators/op_rsqrt.cpp b/backends/cadence/hifi/operators/op_rsqrt.cpp index 885c26723ae..3ec80fed3eb 100644 --- a/backends/cadence/hifi/operators/op_rsqrt.cpp +++ b/backends/cadence/hifi/operators/op_rsqrt.cpp @@ -19,13 +19,6 @@ namespace cadence { namespace impl { namespace HiFi { namespace native { -namespace { - -double rsqrt(double x) { - return 1.0 / std::sqrt(x); -} - -} // namespace Tensor& rsqrt_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { bool optimized = true; @@ -45,8 +38,10 @@ Tensor& rsqrt_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { return out; } + static constexpr const char op_name[] = "rsqrt.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16(rsqrt, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::rsqrt(x); }, ctx, in, out); } } // namespace native diff --git a/backends/cadence/hifi/operators/op_tanh.cpp b/backends/cadence/hifi/operators/op_tanh.cpp index 3fdd3111ef8..6034ae0d0c1 100644 --- a/backends/cadence/hifi/operators/op_tanh.cpp +++ b/backends/cadence/hifi/operators/op_tanh.cpp @@ -34,11 +34,13 @@ Tensor& tanh_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { return out; } + static constexpr const char op_name[] = "tanh.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16(std::tanh, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::tanh(x); }, ctx, in, out); } } // namespace native } // namespace HiFi } // namespace impl -} // namespace cadence \ No newline at end of file +} // namespace cadence diff --git a/kernels/portable/cpu/op_acos.cpp b/kernels/portable/cpu/op_acos.cpp index dac3b1546f3..81daf10c9a6 100644 --- a/kernels/portable/cpu/op_acos.cpp +++ b/kernels/portable/cpu/op_acos.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& acos_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::acos, ctx, in, out); + static constexpr const char op_name[] = "acos.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::acos(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_acosh.cpp b/kernels/portable/cpu/op_acosh.cpp index 77f7edf4c5d..b402698d761 100644 --- a/kernels/portable/cpu/op_acosh.cpp +++ b/kernels/portable/cpu/op_acosh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& acosh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::acosh, ctx, in, out); + static constexpr const char op_name[] = "acosh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::acosh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_asin.cpp b/kernels/portable/cpu/op_asin.cpp index 6affa6e4122..ddb52c70e84 100644 --- a/kernels/portable/cpu/op_asin.cpp +++ b/kernels/portable/cpu/op_asin.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& asin_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::asin, ctx, in, out); + static constexpr const char op_name[] = "asin.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::asin(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_asinh.cpp b/kernels/portable/cpu/op_asinh.cpp index bce8dcf6d5a..9441db09589 100644 --- a/kernels/portable/cpu/op_asinh.cpp +++ b/kernels/portable/cpu/op_asinh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& asinh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::asinh, ctx, in, out); + static constexpr const char op_name[] = "asinh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::asinh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_atan.cpp b/kernels/portable/cpu/op_atan.cpp index 23549627a3b..6a73341bf0d 100644 --- a/kernels/portable/cpu/op_atan.cpp +++ b/kernels/portable/cpu/op_atan.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& atan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::atan, ctx, in, out); + static constexpr const char op_name[] = "atan.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::atan(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_atanh.cpp b/kernels/portable/cpu/op_atanh.cpp index 13e6e8ca141..9e036a5fb3b 100644 --- a/kernels/portable/cpu/op_atanh.cpp +++ b/kernels/portable/cpu/op_atanh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& atanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::atanh, ctx, in, out); + static constexpr const char op_name[] = "atanh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::atanh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_ceil.cpp b/kernels/portable/cpu/op_ceil.cpp index 5aa09ba0084..e2c8e6f07b6 100644 --- a/kernels/portable/cpu/op_ceil.cpp +++ b/kernels/portable/cpu/op_ceil.cpp @@ -17,7 +17,9 @@ namespace native { using executorch::aten::Tensor; Tensor& ceil_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbf16(std::ceil, ctx, in, out); + static constexpr const char op_name[] = "ceil.out"; + return internal::unary_ufunc_realhbf16( + [](auto x) { return executorch::math::ceil(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_cos.cpp b/kernels/portable/cpu/op_cos.cpp index e536060d162..e7876116f94 100644 --- a/kernels/portable/cpu/op_cos.cpp +++ b/kernels/portable/cpu/op_cos.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& cos_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::cos, ctx, in, out); + static constexpr const char op_name[] = "cos.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::cos(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_cosh.cpp b/kernels/portable/cpu/op_cosh.cpp index e622bbe6fcd..9703ff0336c 100644 --- a/kernels/portable/cpu/op_cosh.cpp +++ b/kernels/portable/cpu/op_cosh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& cosh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::cosh, ctx, in, out); + static constexpr const char op_name[] = "cosh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::cosh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_erf.cpp b/kernels/portable/cpu/op_erf.cpp index 6897bcda95b..aee0101fdb4 100644 --- a/kernels/portable/cpu/op_erf.cpp +++ b/kernels/portable/cpu/op_erf.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& erf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::erf, ctx, in, out); + static constexpr const char op_name[] = "erf.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::erf(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_exp.cpp b/kernels/portable/cpu/op_exp.cpp index cbfc8924cb0..f2241613609 100644 --- a/kernels/portable/cpu/op_exp.cpp +++ b/kernels/portable/cpu/op_exp.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::exp, ctx, in, out); + static constexpr const char op_name[] = "exp.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::exp(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_expm1.cpp b/kernels/portable/cpu/op_expm1.cpp index f2d49f615b1..67af9b343bb 100644 --- a/kernels/portable/cpu/op_expm1.cpp +++ b/kernels/portable/cpu/op_expm1.cpp @@ -7,16 +7,19 @@ */ #include +#include #include #include +#include namespace torch { namespace executor { namespace native { Tensor& expm1_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::expm1, ctx, in, out); + static constexpr const char op_name[] = "expm1.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::expm1(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_floor.cpp b/kernels/portable/cpu/op_floor.cpp index 4061722bd27..14b49cafbc1 100644 --- a/kernels/portable/cpu/op_floor.cpp +++ b/kernels/portable/cpu/op_floor.cpp @@ -17,7 +17,9 @@ namespace native { using executorch::aten::Tensor; Tensor& floor_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbf16(std::floor, ctx, in, out); + static constexpr const char op_name[] = "floor.out"; + return internal::unary_ufunc_realhbf16( + [](auto x) { return executorch::math::floor(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_isinf.cpp b/kernels/portable/cpu/op_isinf.cpp index 92d1e563a2e..42798231a84 100644 --- a/kernels/portable/cpu/op_isinf.cpp +++ b/kernels/portable/cpu/op_isinf.cpp @@ -17,8 +17,9 @@ namespace native { Tensor& isinf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { // Lambda is syntactic sugar needed to workaround compilation on some older // non-compatible distros where isnan is returning int rather than bool - return internal::unary_ufunc_realhb_to_bool( - [](double x) -> bool { return std::isinf(x); }, ctx, in, out); + static constexpr const char op_name[] = "isinf.out"; + return internal::unary_ufunc_realhb_to_bool( + [](auto x) -> bool { return std::isinf(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_isnan.cpp b/kernels/portable/cpu/op_isnan.cpp index 51e189992ee..817d314fd2b 100644 --- a/kernels/portable/cpu/op_isnan.cpp +++ b/kernels/portable/cpu/op_isnan.cpp @@ -17,8 +17,9 @@ namespace native { Tensor& isnan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { // Lambda is syntactic sugar needed to workaround compilation on some older // non-compatible distros where isnan is returning int rather than bool - return internal::unary_ufunc_realhb_to_bool( - [](double x) -> bool { return std::isnan(x); }, ctx, in, out); + static constexpr const char op_name[] = "isnan.out"; + return internal::unary_ufunc_realhb_to_bool( + [](auto x) -> bool { return std::isnan(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_log.cpp b/kernels/portable/cpu/op_log.cpp index 8a36bce8c49..5b0c32549aa 100644 --- a/kernels/portable/cpu/op_log.cpp +++ b/kernels/portable/cpu/op_log.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& log_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::log, ctx, in, out); + static constexpr const char op_name[] = "log.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::log(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_log10.cpp b/kernels/portable/cpu/op_log10.cpp index 89f9b672476..5251aea201d 100644 --- a/kernels/portable/cpu/op_log10.cpp +++ b/kernels/portable/cpu/op_log10.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& log10_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::log10, ctx, in, out); + static constexpr const char op_name[] = "log10.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::log10(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_log1p.cpp b/kernels/portable/cpu/op_log1p.cpp index 2daa31e37ff..f352750a944 100644 --- a/kernels/portable/cpu/op_log1p.cpp +++ b/kernels/portable/cpu/op_log1p.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& log1p_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::log1p, ctx, in, out); + static constexpr const char op_name[] = "log1p.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::log1p(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_log2.cpp b/kernels/portable/cpu/op_log2.cpp index 4d7406832e4..42d17ea83b9 100644 --- a/kernels/portable/cpu/op_log2.cpp +++ b/kernels/portable/cpu/op_log2.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& log2_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::log2, ctx, in, out); + static constexpr const char op_name[] = "log2.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::log2(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_reciprocal.cpp b/kernels/portable/cpu/op_reciprocal.cpp index f22f9883858..a1bd116a962 100644 --- a/kernels/portable/cpu/op_reciprocal.cpp +++ b/kernels/portable/cpu/op_reciprocal.cpp @@ -12,18 +12,11 @@ namespace torch { namespace executor { namespace native { -namespace { - -double reciprocal(double x) { - return 1.0 / x; -} - -} // namespace - Tensor& reciprocal_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - reciprocal, ctx, in, out); + static constexpr const char op_name[] = "reciprocal.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::reciprocal(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_rsqrt.cpp b/kernels/portable/cpu/op_rsqrt.cpp index 19c4c6c1a57..a14eb15d7ec 100644 --- a/kernels/portable/cpu/op_rsqrt.cpp +++ b/kernels/portable/cpu/op_rsqrt.cpp @@ -12,16 +12,11 @@ namespace torch { namespace executor { namespace native { -namespace { - -double rsqrt(double x) { - return 1.0 / std::sqrt(x); -} - -} // namespace Tensor& rsqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(rsqrt, ctx, in, out); + static constexpr const char op_name[] = "rsqrt.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::rsqrt(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_sin.cpp b/kernels/portable/cpu/op_sin.cpp index ad65c4be18b..aeb73009729 100644 --- a/kernels/portable/cpu/op_sin.cpp +++ b/kernels/portable/cpu/op_sin.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& sin_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::sin, ctx, in, out); + static constexpr const char op_name[] = "sin.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::sin(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_sinh.cpp b/kernels/portable/cpu/op_sinh.cpp index 21666392392..f4cc67ad35f 100644 --- a/kernels/portable/cpu/op_sinh.cpp +++ b/kernels/portable/cpu/op_sinh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& sinh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::sinh, ctx, in, out); + static constexpr const char op_name[] = "sinh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::sinh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_sqrt.cpp b/kernels/portable/cpu/op_sqrt.cpp index bd2075f5b04..1b3d2ff6de5 100644 --- a/kernels/portable/cpu/op_sqrt.cpp +++ b/kernels/portable/cpu/op_sqrt.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& sqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::sqrt, ctx, in, out); + static constexpr const char op_name[] = "sqrt.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::sqrt(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_tan.cpp b/kernels/portable/cpu/op_tan.cpp index a2b921d5146..19ccb84935b 100644 --- a/kernels/portable/cpu/op_tan.cpp +++ b/kernels/portable/cpu/op_tan.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& tan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::tan, ctx, in, out); + static constexpr const char op_name[] = "tan.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::tan(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_tanh.cpp b/kernels/portable/cpu/op_tanh.cpp index ae9f93dc62c..623968ac721 100644 --- a/kernels/portable/cpu/op_tanh.cpp +++ b/kernels/portable/cpu/op_tanh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& tanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::tanh, ctx, in, out); + static constexpr const char op_name[] = "tanh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::tanh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_trunc.cpp b/kernels/portable/cpu/op_trunc.cpp index 2d70a3b1724..9c96865db0e 100644 --- a/kernels/portable/cpu/op_trunc.cpp +++ b/kernels/portable/cpu/op_trunc.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& trunc_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbf16(std::trunc, ctx, in, out); + static constexpr const char op_name[] = "trunc.out"; + return internal::unary_ufunc_realhbf16( + [](auto x) { return executorch::math::trunc(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/pattern/pattern.cpp b/kernels/portable/cpu/pattern/pattern.cpp new file mode 100644 index 00000000000..61571f25ddc --- /dev/null +++ b/kernels/portable/cpu/pattern/pattern.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace torch::executor::native::internal { + +bool check_and_resize_inputs( + KernelRuntimeContext& ctx, + const Tensor& in, + Tensor& out) { + ET_KERNEL_CHECK( + ctx, tensors_have_same_dim_order(in, out), InvalidArgument, false); + ET_KERNEL_CHECK_MSG( + ctx, + resize_tensor(out, in.sizes()) == Error::Ok, + InvalidArgument, + false, + "Failed to resize output tensor."); + return true; +} + +} // namespace torch::executor::native::internal diff --git a/kernels/portable/cpu/pattern/pattern.h b/kernels/portable/cpu/pattern/pattern.h index 2d4b2ac509c..02690739a01 100644 --- a/kernels/portable/cpu/pattern/pattern.h +++ b/kernels/portable/cpu/pattern/pattern.h @@ -46,6 +46,7 @@ question is a bit more specific, then add a descriptive sufix. */ #pragma once +#include #include namespace torch { @@ -53,29 +54,78 @@ namespace executor { namespace native { namespace internal { +// Implementation detail for the other helpers in this header. Returns +// true on success, false on failure. +bool check_and_resize_inputs( + KernelRuntimeContext& ctx, + const Tensor& in, + Tensor& out); + /** * Implements an op pattern for ops that take a single input tensor of any - * realh dtye, no additional arguments, and outputs a tensor of the same size - * and dtype. The function fn specifies the math operation which is applied to - * the input tensor element-wise. + * realhbf16 dtype, no additional arguments, and outputs a tensor of the same + * size and dtype. The function fn specifies the math operation which is applied + * to the input tensor element-wise. */ +template Tensor& unary_ufunc_realhbf16( - double (*fn)(double), + const Op& fn, KernelRuntimeContext& ctx, const Tensor& in, - Tensor& out); + Tensor& out) { + if (!check_and_resize_inputs(ctx, in, out)) { + return out; + } + ET_KERNEL_CHECK( + ctx, tensors_have_same_shape_and_dtype(in, out), InvalidArgument, out); + + ET_SWITCH_REALHBF16_TYPES(out.scalar_type(), ctx, op_name, CTYPE, [&] { + utils::apply_unitensor_elementwise_fn< + CTYPE, + op_name, + utils::SupportedTensorDtypes::SAME_AS_COMMON>( + fn, ctx, in, utils::SupportedTensorDtypes::REALHBF16, out); + }); + return out; +} /** * Implements an op pattern for ops that take a single input tensor of any - * realhb dtye (real, half and boolean), no additional arguments, and outputs a + * realhb dtype (real, half and boolean), no additional arguments, and outputs a * boolean tensor of the same size. The function fn specifies the math * operation which is applied to the input tensor element-wise. */ +template Tensor& unary_ufunc_realhb_to_bool( - bool (*fn)(double), + const Op& fn, KernelRuntimeContext& ctx, const Tensor& in, - Tensor& out); + Tensor& out) { + if (!check_and_resize_inputs(ctx, in, out)) { + return out; + } + ET_KERNEL_CHECK_MSG( + ctx, + out.scalar_type() == executorch::aten::ScalarType::Bool, + InvalidArgument, + out, + "Expected out tensor to have dtype Bool, but got %" PRId8 " instead.", + static_cast(out.scalar_type())); + + ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE_IN, [&] { + utils::apply_unitensor_elementwise_fn< + CTYPE_IN, + op_name, + utils::SupportedTensorDtypes::BOOL>( + [fn](const CTYPE_IN val_in) { return fn(val_in); }, + ctx, + in, + utils::SupportedTensorDtypes::REALHBBF16, + out); + }); + + return out; +} /** * Implements an op pattern for ops that take a single input tensor of any @@ -83,11 +133,35 @@ Tensor& unary_ufunc_realhb_to_bool( * outputs a floating point tensor of the same size. The function fn specifies * the math operation which is applied to the input tensor element-wise. */ +template Tensor& unary_ufunc_realhbbf16_to_floathbf16( - double (*fn)(double), + const Op& fn, KernelRuntimeContext& ctx, const Tensor& in, - Tensor& out); + Tensor& out) { + ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out); + + if (!check_and_resize_inputs(ctx, in, out)) { + return out; + } + + ScalarType compute_type = in.scalar_type() == ScalarType::Double + ? ScalarType::Double + : ScalarType::Float; + ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&] { + utils::apply_unitensor_elementwise_fn< + CTYPE_COMPUTE, + op_name, + utils::SupportedTensorDtypes::FLOATHBF16>( + [fn](const auto val_in) { return fn(val_in); }, + ctx, + in, + utils::SupportedTensorDtypes::REALHBBF16, + out); + }); + + return out; +} } // namespace internal } // namespace native diff --git a/kernels/portable/cpu/pattern/targets.bzl b/kernels/portable/cpu/pattern/targets.bzl index 5fc73ccd911..4140e4e0f14 100644 --- a/kernels/portable/cpu/pattern/targets.bzl +++ b/kernels/portable/cpu/pattern/targets.bzl @@ -49,18 +49,14 @@ def define_common_targets(): runtime.cxx_library( name = "pattern", - srcs = [ - "unary_ufunc_realhb_to_bool.cpp", - "unary_ufunc_realhbbf16_to_floathbf16.cpp", - "unary_ufunc_realhbf16.cpp", - ], + srcs = ["pattern.cpp"], exported_headers = [ "pattern.h", ], compiler_flags = ["-Wno-missing-prototypes"], exported_deps = [ "//executorch/kernels/portable/cpu/util:broadcast_util", - "//executorch/kernels/portable/cpu/util:functional_util", + "//executorch/kernels/portable/cpu/util:elementwise_util", "//executorch/runtime/kernel:kernel_includes", ], visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/optimized/cpu/..."], diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp deleted file mode 100644 index 367137ad02c..00000000000 --- a/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -namespace torch { -namespace executor { -namespace native { -namespace internal { - -Tensor& unary_ufunc_realhb_to_bool( - bool (*fn)(double), - KernelRuntimeContext& ctx, - const Tensor& in, - Tensor& out) { - (void)ctx; - - // Resize for dynamic shape - ET_KERNEL_CHECK_MSG( - ctx, - resize_tensor(out, in.sizes()) == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - ET_KERNEL_CHECK_MSG( - ctx, - out.scalar_type() == executorch::aten::ScalarType::Bool, - InvalidArgument, - out, - "Expected out tensor to have dtype Bool, but got %" PRId8 " instead.", - static_cast(out.scalar_type())); - - ET_KERNEL_CHECK( - ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); - - const auto in_type = in.scalar_type(); - - ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, __func__, CTYPE_IN, [&] { - apply_unary_map_fn( - [fn](const CTYPE_IN val_in) { return fn(val_in); }, - in.const_data_ptr(), - out.mutable_data_ptr(), - in.numel()); - }); - - return out; -} - -} // namespace internal -} // namespace native -} // namespace executor -} // namespace torch diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp deleted file mode 100644 index 602b5b1bfd2..00000000000 --- a/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -namespace torch { -namespace executor { -namespace native { -namespace internal { - -Tensor& unary_ufunc_realhbbf16_to_floathbf16( - double (*fn)(double), - KernelRuntimeContext& ctx, - const Tensor& in, - Tensor& out) { - (void)ctx; - - ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out); - - // Resize for dynamic shape - ET_KERNEL_CHECK_MSG( - ctx, - resize_tensor(out, in.sizes()) == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - ET_KERNEL_CHECK( - ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); - - const auto in_type = in.scalar_type(); - const auto out_type = out.scalar_type(); - - ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, __func__, CTYPE_IN, [&] { - ET_SWITCH_FLOATHBF16_TYPES(out_type, ctx, __func__, CTYPE_OUT, [&] { - apply_unary_map_fn( - [fn](const CTYPE_IN val_in) { - CTYPE_OUT xi = static_cast(val_in); - return static_cast(fn(xi)); - }, - in.const_data_ptr(), - out.mutable_data_ptr(), - in.numel()); - }); - }); - - return out; -} - -} // namespace internal -} // namespace native -} // namespace executor -} // namespace torch diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhbf16.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhbf16.cpp deleted file mode 100644 index 3672e223b7e..00000000000 --- a/kernels/portable/cpu/pattern/unary_ufunc_realhbf16.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -namespace torch { -namespace executor { -namespace native { -namespace internal { - -Tensor& unary_ufunc_realhbf16( - double (*fn)(double), - KernelRuntimeContext& ctx, - const Tensor& in, - Tensor& out) { - (void)ctx; - - // Resize for dynamic shape - ET_KERNEL_CHECK_MSG( - ctx, - resize_tensor(out, in.sizes()) == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - ET_KERNEL_CHECK( - ctx, tensors_have_same_shape_and_dtype(in, out), InvalidArgument, out); - - ET_KERNEL_CHECK( - ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); - - ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, __func__, CTYPE, [&] { - apply_unary_map_fn( - [fn](const CTYPE val_in) { return static_cast(fn(val_in)); }, - in.const_data_ptr(), - out.mutable_data_ptr(), - in.numel()); - }); - - return out; -} - -} // namespace internal -} // namespace native -} // namespace executor -} // namespace torch diff --git a/kernels/portable/cpu/util/vectorized_math.h b/kernels/portable/cpu/util/vectorized_math.h index e67e862ef62..13339873b90 100644 --- a/kernels/portable/cpu/util/vectorized_math.h +++ b/kernels/portable/cpu/util/vectorized_math.h @@ -104,11 +104,14 @@ auto convert_to_vectorized_n_of_float(at::vec::Vectorized vec) { #endif // ET_USE_PYTORCH_HEADERS // To simplify client code, we provide coverage for a bunch of float ops (the -// same ones listed in ATen vml.h) here. +// same ones listed in ATen vml.h, plus acosh, asinh, atanh) here. ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(abs) ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(acos) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(acosh) ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(asin) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(asinh) ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(atan) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(atanh) ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(ceil) ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(cos) ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(cosh) @@ -131,12 +134,30 @@ ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(trunc) ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(lgamma) #if defined(ET_USE_PYTORCH_HEADERS) && ET_USE_PYTORCH_HEADERS -ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(rsqrt) +ET_INTERNAL_VECTORIZED_FLOAT_UNARY_FUNC(reciprocal) +ET_INTERNAL_VECTORIZED_FLOAT_UNARY_FUNC(rsqrt) #endif // ET_USE_PYTORCH_HEADERS namespace executorch { inline namespace math { -template >> +inline float reciprocal(float x) { + return 1.0f / x; +} + +inline double reciprocal(double x) { + return 1.0 / x; +} + +template < + typename Integer, + std::enable_if_t, bool> = true> +double reciprocal(Integer x) { + return reciprocal((double)x); +} + +template < + typename T, + std::enable_if_t, bool> = true> T rsqrt(T x) { return T(1) / std::sqrt(x); } diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl index 1ae20ca7c61..baefcb8f00c 100644 --- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl @@ -533,6 +533,7 @@ ATEN_OPS = ( name = "op_expm1", deps = [ "//executorch/kernels/portable/cpu/pattern:pattern", + "//executorch/kernels/portable/cpu/util:elementwise_util", ], ), op_target( From fa745c35a9ef401ddc0d3dffcc1aa0f7d27da5d3 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 25 Jun 2025 08:35:49 -0700 Subject: [PATCH 2/2] lintrunner on "Reapply "Implement unary_ufunc functions using elementwise_util (#9386)"" This PR was reverted due to internal test failures, which should be fixed now (and is being sent as an exported diff to make sure). Original summary: One less set of independent implementations to worry about going forward (e.g., we don't have to vectorize these separately from elementwise_util and they get all benefits of elementwise_util). Differential Revision: [D76754824](https://our.internmc.facebook.com/intern/diff/D76754824/) [ghstack-poisoned] --- backends/cadence/fusion_g3/operators/op_exp.cpp | 4 ++-- backends/cadence/fusion_g3/operators/op_rsqrt.cpp | 4 ++-- backends/cadence/fusion_g3/operators/op_sqrt.cpp | 4 ++-- backends/cadence/fusion_g3/operators/op_tanh.cpp | 4 ++-- backends/cadence/hifi/operators/op_rsqrt.cpp | 4 ++-- backends/cadence/hifi/operators/op_tanh.cpp | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/backends/cadence/fusion_g3/operators/op_exp.cpp b/backends/cadence/fusion_g3/operators/op_exp.cpp index 6b45b37a7b1..4c06e8d2c24 100644 --- a/backends/cadence/fusion_g3/operators/op_exp.cpp +++ b/backends/cadence/fusion_g3/operators/op_exp.cpp @@ -61,8 +61,8 @@ Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { } else { static constexpr const char op_name[] = "exp.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16( - [](auto x) { return executorch::math::exp(x); }, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::exp(x); }, ctx, in, out); } } diff --git a/backends/cadence/fusion_g3/operators/op_rsqrt.cpp b/backends/cadence/fusion_g3/operators/op_rsqrt.cpp index 8c40abb631f..d79625cb45f 100644 --- a/backends/cadence/fusion_g3/operators/op_rsqrt.cpp +++ b/backends/cadence/fusion_g3/operators/op_rsqrt.cpp @@ -54,8 +54,8 @@ Tensor& rsqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { } else { static constexpr const char op_name[] = "rsqrt.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16( - [](auto x) { return executorch::math::rsqrt(x); }, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::rsqrt(x); }, ctx, in, out); } } diff --git a/backends/cadence/fusion_g3/operators/op_sqrt.cpp b/backends/cadence/fusion_g3/operators/op_sqrt.cpp index 0a583dde597..923fbc9209b 100644 --- a/backends/cadence/fusion_g3/operators/op_sqrt.cpp +++ b/backends/cadence/fusion_g3/operators/op_sqrt.cpp @@ -56,8 +56,8 @@ Tensor& sqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { } else { static constexpr const char op_name[] = "sqrt.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16( - [](auto x) { return executorch::math::sqrt(x); }, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::sqrt(x); }, ctx, in, out); } } diff --git a/backends/cadence/fusion_g3/operators/op_tanh.cpp b/backends/cadence/fusion_g3/operators/op_tanh.cpp index 5819c76ec61..179786aff2b 100644 --- a/backends/cadence/fusion_g3/operators/op_tanh.cpp +++ b/backends/cadence/fusion_g3/operators/op_tanh.cpp @@ -56,8 +56,8 @@ Tensor& tanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { } else { static constexpr const char op_name[] = "tanh.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16( - [](auto x) { return executorch::math::tanh(x); }, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::tanh(x); }, ctx, in, out); } } diff --git a/backends/cadence/hifi/operators/op_rsqrt.cpp b/backends/cadence/hifi/operators/op_rsqrt.cpp index 3ec80fed3eb..df6ad844f01 100644 --- a/backends/cadence/hifi/operators/op_rsqrt.cpp +++ b/backends/cadence/hifi/operators/op_rsqrt.cpp @@ -40,8 +40,8 @@ Tensor& rsqrt_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { static constexpr const char op_name[] = "rsqrt.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16( - [](auto x) { return executorch::math::rsqrt(x); }, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::rsqrt(x); }, ctx, in, out); } } // namespace native diff --git a/backends/cadence/hifi/operators/op_tanh.cpp b/backends/cadence/hifi/operators/op_tanh.cpp index 6034ae0d0c1..d97c6bf6f33 100644 --- a/backends/cadence/hifi/operators/op_tanh.cpp +++ b/backends/cadence/hifi/operators/op_tanh.cpp @@ -36,8 +36,8 @@ Tensor& tanh_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { static constexpr const char op_name[] = "tanh.out"; return torch::executor::native::internal:: - unary_ufunc_realhbbf16_to_floathbf16( - [](auto x) { return executorch::math::tanh(x); }, ctx, in, out); + unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return executorch::math::tanh(x); }, ctx, in, out); } } // namespace native