[SimplifyLibCalls] Merge sqrt into the power of exp (#79146)
Sync https://gitee.com/openeuler/llvm-project/pulls/76
This commit is contained in:
parent
1906f58887
commit
049a6307bf
246
0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
Normal file
246
0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
Normal file
@ -0,0 +1,246 @@
|
||||
From 60ff801d1ea96ab964039cc1ed42e1dca0a63d54 Mon Sep 17 00:00:00 2001
|
||||
From: Anton Sidorenko <anton.sidorenko@syntacore.com>
|
||||
Date: Tue, 6 Feb 2024 12:02:06 +0300
|
||||
Subject: [PATCH] [SimplifyLibCalls] Merge sqrt into the power of exp (#79146)
|
||||
|
||||
Under fast-math flags it's possible to convert `sqrt(exp(X)) `into
|
||||
`exp(X * 0.5)`. I suppose that this transformation is always profitable.
|
||||
This is similar to the optimization existing in GCC.
|
||||
---
|
||||
.../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 +
|
||||
.../lib/Transforms/Utils/SimplifyLibCalls.cpp | 67 ++++++++++
|
||||
llvm/test/Transforms/InstCombine/sqrt.ll | 120 ++++++++++++++++++
|
||||
3 files changed, 188 insertions(+)
|
||||
|
||||
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
|
||||
index eb10545ee149..1aad0b298845 100644
|
||||
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
|
||||
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
|
||||
@@ -201,6 +201,7 @@ private:
|
||||
Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B);
|
||||
Value *optimizeLog(CallInst *CI, IRBuilderBase &B);
|
||||
Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B);
|
||||
+ Value *mergeSqrtToExp(CallInst *CI, IRBuilderBase &B);
|
||||
Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B);
|
||||
Value *optimizeTan(CallInst *CI, IRBuilderBase &B);
|
||||
// Wrapper for all floating point library call optimizations
|
||||
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
|
||||
index 3ad97613fe7a..dd5bbdaaf6d3 100644
|
||||
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
|
||||
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
|
||||
@@ -2539,6 +2539,70 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
|
||||
return Ret;
|
||||
}
|
||||
|
||||
+// sqrt(exp(X)) -> exp(X * 0.5)
|
||||
+Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) {
|
||||
+ if (!CI->hasAllowReassoc())
|
||||
+ return nullptr;
|
||||
+
|
||||
+ Function *SqrtFn = CI->getCalledFunction();
|
||||
+ CallInst *Arg = dyn_cast<CallInst>(CI->getArgOperand(0));
|
||||
+ if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse())
|
||||
+ return nullptr;
|
||||
+ Intrinsic::ID ArgID = Arg->getIntrinsicID();
|
||||
+ LibFunc ArgLb = NotLibFunc;
|
||||
+ TLI->getLibFunc(*Arg, ArgLb);
|
||||
+
|
||||
+ LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb;
|
||||
+
|
||||
+ if (TLI->getLibFunc(SqrtFn->getName(), SqrtLb))
|
||||
+ switch (SqrtLb) {
|
||||
+ case LibFunc_sqrtf:
|
||||
+ ExpLb = LibFunc_expf;
|
||||
+ Exp2Lb = LibFunc_exp2f;
|
||||
+ Exp10Lb = LibFunc_exp10f;
|
||||
+ break;
|
||||
+ case LibFunc_sqrt:
|
||||
+ ExpLb = LibFunc_exp;
|
||||
+ Exp2Lb = LibFunc_exp2;
|
||||
+ Exp10Lb = LibFunc_exp10;
|
||||
+ break;
|
||||
+ case LibFunc_sqrtl:
|
||||
+ ExpLb = LibFunc_expl;
|
||||
+ Exp2Lb = LibFunc_exp2l;
|
||||
+ Exp10Lb = LibFunc_exp10l;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return nullptr;
|
||||
+ }
|
||||
+ else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) {
|
||||
+ if (CI->getType()->getScalarType()->isFloatTy()) {
|
||||
+ ExpLb = LibFunc_expf;
|
||||
+ Exp2Lb = LibFunc_exp2f;
|
||||
+ Exp10Lb = LibFunc_exp10f;
|
||||
+ } else if (CI->getType()->getScalarType()->isDoubleTy()) {
|
||||
+ ExpLb = LibFunc_exp;
|
||||
+ Exp2Lb = LibFunc_exp2;
|
||||
+ Exp10Lb = LibFunc_exp10;
|
||||
+ } else
|
||||
+ return nullptr;
|
||||
+ } else
|
||||
+ return nullptr;
|
||||
+
|
||||
+ if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb &&
|
||||
+ ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2)
|
||||
+ return nullptr;
|
||||
+
|
||||
+ IRBuilderBase::InsertPointGuard Guard(B);
|
||||
+ B.SetInsertPoint(Arg);
|
||||
+ auto *ExpOperand = Arg->getOperand(0);
|
||||
+ auto *FMul =
|
||||
+ B.CreateFMulFMF(ExpOperand, ConstantFP::get(ExpOperand->getType(), 0.5),
|
||||
+ CI, "merged.sqrt");
|
||||
+
|
||||
+ Arg->setOperand(0, FMul);
|
||||
+ return Arg;
|
||||
+}
|
||||
+
|
||||
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
|
||||
Module *M = CI->getModule();
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
@@ -2551,6 +2615,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
|
||||
Callee->getIntrinsicID() == Intrinsic::sqrt))
|
||||
Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
|
||||
|
||||
+ if (Value *Opt = mergeSqrtToExp(CI, B))
|
||||
+ return Opt;
|
||||
+
|
||||
if (!CI->isFast())
|
||||
return Ret;
|
||||
|
||||
diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll
|
||||
index 004df3e30c72..f72fe5a6a581 100644
|
||||
--- a/llvm/test/Transforms/InstCombine/sqrt.ll
|
||||
+++ b/llvm/test/Transforms/InstCombine/sqrt.ll
|
||||
@@ -88,7 +88,127 @@ define float @sqrt_call_fabs_f32(float %x) {
|
||||
ret float %sqrt
|
||||
}
|
||||
|
||||
+define double @sqrt_exp(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @llvm.exp.f64(double %x)
|
||||
+ %res = call reassoc double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp_2(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_2(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @exp(double %x)
|
||||
+ %res = call reassoc double @sqrt(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp2(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp2(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp2(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @exp2(double %x)
|
||||
+ %res = call reassoc double @sqrt(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp10(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp10(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp10(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @exp10(double %x)
|
||||
+ %res = call reassoc double @sqrt(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+; Negative test
|
||||
+define double @sqrt_exp_nofast_1(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_nofast_1(
|
||||
+; CHECK-NEXT: [[E:%.*]] = call double @llvm.exp.f64(double [[X:%.*]])
|
||||
+; CHECK-NEXT: [[RES:%.*]] = call reassoc double @llvm.sqrt.f64(double [[E]])
|
||||
+; CHECK-NEXT: ret double [[RES]]
|
||||
+;
|
||||
+ %e = call double @llvm.exp.f64(double %x)
|
||||
+ %res = call reassoc double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+; Negative test
|
||||
+define double @sqrt_exp_nofast_2(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_nofast_2(
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[X:%.*]])
|
||||
+; CHECK-NEXT: [[RES:%.*]] = call double @llvm.sqrt.f64(double [[E]])
|
||||
+; CHECK-NEXT: ret double [[RES]]
|
||||
+;
|
||||
+ %e = call reassoc double @llvm.exp.f64(double %x)
|
||||
+ %res = call double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp_merge_constant(double %x, double %y) {
|
||||
+; CHECK-LABEL: @sqrt_exp_merge_constant(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc nsz double [[X:%.*]], 5.000000e+00
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %mul = fmul reassoc nsz double %x, 10.0
|
||||
+ %e = call reassoc double @llvm.exp.f64(double %mul)
|
||||
+ %res = call reassoc nsz double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp_intr_and_libcall(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_intr_and_libcall(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @exp(double %x)
|
||||
+ %res = call reassoc double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp_intr_and_libcall_2(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_intr_and_libcall_2(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @llvm.exp.f64(double %x)
|
||||
+ %res = call reassoc double @sqrt(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define <2 x float> @sqrt_exp_vec(<2 x float> %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_vec(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], <float 5.000000e-01, float 5.000000e-01>
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret <2 x float> [[E]]
|
||||
+;
|
||||
+ %e = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> %x)
|
||||
+ %res = call reassoc <2 x float> @llvm.sqrt.v2f32(<2 x float> %e)
|
||||
+ ret <2 x float> %res
|
||||
+}
|
||||
+
|
||||
declare i32 @foo(double)
|
||||
declare double @sqrt(double) readnone
|
||||
declare float @sqrtf(float)
|
||||
declare float @llvm.fabs.f32(float)
|
||||
+declare double @llvm.exp.f64(double)
|
||||
+declare double @llvm.sqrt.f64(double)
|
||||
+declare double @exp(double)
|
||||
+declare double @exp2(double)
|
||||
+declare double @exp10(double)
|
||||
+declare <2 x float> @llvm.exp.v2f32(<2 x float>)
|
||||
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
|
||||
--
|
||||
2.38.1.windows.1
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
Name: %{pkg_name}
|
||||
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
|
||||
Release: 24
|
||||
Release: 25
|
||||
Summary: The Low Level Virtual Machine
|
||||
|
||||
License: NCSA
|
||||
@ -85,6 +85,7 @@ Patch25: 0025-Backport-Simple-check-to-ignore-Inline-asm-fwait-insertion.patc
|
||||
Patch26: 0026-Add-arch-restriction-for-BiSheng-Autotuner.patch
|
||||
Patch27: 0027-AArch64-Delete-hip09-macro.patch
|
||||
Patch28: 0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch
|
||||
Patch29: 0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
|
||||
|
||||
BuildRequires: binutils-devel
|
||||
BuildRequires: cmake
|
||||
@ -378,6 +379,9 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C %{__cmake
|
||||
%{install_includedir}/llvm-gmock
|
||||
|
||||
%changelog
|
||||
* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-25
|
||||
- [SimplifyLibCalls] Merge sqrt into the power of exp (#79146)
|
||||
|
||||
* Tue Nov 19 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-24
|
||||
- [backport][Clang] Fix crash with -fzero-call-used-regs
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user