Skip to content

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Aug 29, 2024

Show fallback cases in amdlibm tests where it doesn't have that specific op

@llvmbot llvmbot added llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Aug 29, 2024
@llvmbot
Copy link
Member

llvmbot commented Aug 29, 2024

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Simon Pilgrim (RKSimon)

Changes

Show fallback cases in amdlibm tests where it doesn't have that specific op


Full diff: https://github.com/llvm/llvm-project/pull/106584.diff

2 Files Affected:

  • (modified) llvm/lib/Analysis/VectorUtils.cpp (+6)
  • (modified) llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll (+13)
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index cc742ab35f4498..32ce34114b2f50 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -66,9 +66,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::umul_fix:
   case Intrinsic::umul_fix_sat:
   case Intrinsic::sqrt: // Begin floating-point.
+  case Intrinsic::asin:
+  case Intrinsic::acos:
+  case Intrinsic::atan:
   case Intrinsic::sin:
   case Intrinsic::cos:
   case Intrinsic::tan:
+  case Intrinsic::sinh:
+  case Intrinsic::cosh:
+  case Intrinsic::tanh:
   case Intrinsic::exp:
   case Intrinsic::exp2:
   case Intrinsic::log:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index 04289d43f40e2f..c051e2f18380bd 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -414,6 +414,7 @@ for.end:
 
 define void @acos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @acos_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
@@ -487,7 +488,10 @@ for.end:
 
 define void @asin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @asin_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -510,6 +514,7 @@ for.end:
 
 define void @asin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @asin_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
@@ -588,6 +593,7 @@ define void @atan_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -610,6 +616,7 @@ for.end:
 
 define void @atan_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
@@ -683,6 +690,9 @@ for.end:
 define void @cosh_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cosh_f64_intrinsic(
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -705,8 +715,10 @@ for.end:
 
 define void @cosh_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cosh_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -754,6 +766,7 @@ for.end:
 
 define void @tanh_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @tanh_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])

@farzonl
Copy link
Member

farzonl commented Aug 29, 2024

@RKSimon I didn't add isTriviallyVectorizable for these because it broke a number of tests in the RISCV backend. isTriviallyVectorizable is exposed across all backends and I think what we would want is something that could be opt in per backend. https://github.com/llvm/llvm-project/blob/main/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll#L488

@alexey-bataev
Copy link
Member

Could you add the tests for other targets?

@RKSimon
Copy link
Collaborator Author

RKSimon commented Aug 29, 2024

@RKSimon I didn't add isTriviallyVectorizable for these because it broke a number of tests in the RISCV backend. isTriviallyVectorizable is exposed across all backends and I think what we would want is something that could be opt in per backend. https://github.com/llvm/llvm-project/blob/main/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll#L488

This no longer appears to be an issue.

RKSimon added a commit that referenced this pull request Aug 30, 2024
@RKSimon RKSimon force-pushed the vectorize-arc-hypot branch from 8d660e1 to 519c030 Compare August 30, 2024 10:31
@alexey-bataev
Copy link
Member

Could you add the tests for AArch64 to RISCV too?

@RKSimon
Copy link
Collaborator Author

RKSimon commented Aug 30, 2024

What attributes / veclibs should I use for riscv? llvm-project\llvm\test\Transforms\SLPVectorizer\RISCV\math-function.ll doesn't seem to do much

@alexey-bataev
Copy link
Member

What attributes / veclibs should I use for riscv? llvm-project\llvm\test\Transforms\SLPVectorizer\RISCV\math-function.ll doesn't seem to do much

Without veclibs, just some basic stuff

@RKSimon
Copy link
Collaborator Author

RKSimon commented Aug 30, 2024

Thats what math-function.ll already does already

@alexey-bataev
Copy link
Member

Thats what math-function.ll already does already

But not newly added functions

RKSimon added a commit that referenced this pull request Aug 30, 2024
@RKSimon RKSimon force-pushed the vectorize-arc-hypot branch from 519c030 to 86e6eb7 Compare August 30, 2024 13:04
@RKSimon
Copy link
Collaborator Author

RKSimon commented Aug 30, 2024

ceb613a adds missing riscv coverage - checks that there is no vectorisation

Copy link
Member

@alexey-bataev alexey-bataev left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG, thanks!

@RKSimon RKSimon merged commit d58d105 into llvm:main Aug 30, 2024
8 checks passed
@RKSimon RKSimon deleted the vectorize-arc-hypot branch August 30, 2024 15:49
RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Aug 31, 2024
…os/asin/atan and cosh/sinh/tanh libcalls

Followup to #llvm#106584 - ensure acos/asin/atan and cosh/sinh/tanh libcalls correctly map to the llvm intrinsic equivalents
RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Sep 2, 2024
…os/asin/atan and cosh/sinh/tanh libcalls

Followup to #llvm#106584 - ensure acos/asin/atan and cosh/sinh/tanh libcalls correctly map to the llvm intrinsic equivalents
RKSimon added a commit that referenced this pull request Sep 3, 2024
…os/asin/atan and cosh/sinh/tanh libcalls (#106844)

Followup to #106584 - ensure acos/asin/atan and cosh/sinh/tanh libcalls correctly map to the llvm intrinsic equivalents
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants