llvm18/0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch

From 60ff801d1ea96ab964039cc1ed42e1dca0a63d54 Mon Sep 17 00:00:00 2001
From: Anton Sidorenko <anton.sidorenko@syntacore.com>
Date: Tue, 6 Feb 2024 12:02:06 +0300
Subject: [PATCH] [SimplifyLibCalls] Merge sqrt into the power of exp (#79146)

Under fast-math flags it's possible to convert `sqrt(exp(X)) `into
`exp(X * 0.5)`. I suppose that this transformation is always profitable.
This is similar to the optimization existing in GCC.
---
 .../llvm/Transforms/Utils/SimplifyLibCalls.h  |   1 +
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp |  67 ++++++++++
 llvm/test/Transforms/InstCombine/sqrt.ll      | 120 ++++++++++++++++++
 3 files changed, 188 insertions(+)

diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index eb10545ee149..1aad0b298845 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -201,6 +201,7 @@ private:
   Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B);
   Value *optimizeLog(CallInst *CI, IRBuilderBase &B);
   Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B);
+  Value *mergeSqrtToExp(CallInst *CI, IRBuilderBase &B);
   Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B);
   Value *optimizeTan(CallInst *CI, IRBuilderBase &B);
   // Wrapper for all floating point library call optimizations
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 3ad97613fe7a..dd5bbdaaf6d3 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2539,6 +2539,70 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
   return Ret;
 }

+// sqrt(exp(X)) -> exp(X * 0.5)
+Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) {
+  if (!CI->hasAllowReassoc())
+    return nullptr;
+
+  Function *SqrtFn = CI->getCalledFunction();
+  CallInst *Arg = dyn_cast<CallInst>(CI->getArgOperand(0));
+  if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse())
+    return nullptr;
+  Intrinsic::ID ArgID = Arg->getIntrinsicID();
+  LibFunc ArgLb = NotLibFunc;
+  TLI->getLibFunc(*Arg, ArgLb);
+
+  LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb;
+
+  if (TLI->getLibFunc(SqrtFn->getName(), SqrtLb))
+    switch (SqrtLb) {
+    case LibFunc_sqrtf:
+      ExpLb = LibFunc_expf;
+      Exp2Lb = LibFunc_exp2f;
+      Exp10Lb = LibFunc_exp10f;
+      break;
+    case LibFunc_sqrt:
+      ExpLb = LibFunc_exp;
+      Exp2Lb = LibFunc_exp2;
+      Exp10Lb = LibFunc_exp10;
+      break;
+    case LibFunc_sqrtl:
+      ExpLb = LibFunc_expl;
+      Exp2Lb = LibFunc_exp2l;
+      Exp10Lb = LibFunc_exp10l;
+      break;
+    default:
+      return nullptr;
+    }
+  else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) {
+    if (CI->getType()->getScalarType()->isFloatTy()) {
+      ExpLb = LibFunc_expf;
+      Exp2Lb = LibFunc_exp2f;
+      Exp10Lb = LibFunc_exp10f;
+    } else if (CI->getType()->getScalarType()->isDoubleTy()) {
+      ExpLb = LibFunc_exp;
+      Exp2Lb = LibFunc_exp2;
+      Exp10Lb = LibFunc_exp10;
+    } else
+      return nullptr;
+  } else
+    return nullptr;
+
+  if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb &&
+      ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2)
+    return nullptr;
+
+  IRBuilderBase::InsertPointGuard Guard(B);
+  B.SetInsertPoint(Arg);
+  auto *ExpOperand = Arg->getOperand(0);
+  auto *FMul =
+      B.CreateFMulFMF(ExpOperand, ConstantFP::get(ExpOperand->getType(), 0.5),
+                      CI, "merged.sqrt");
+
+  Arg->setOperand(0, FMul);
+  return Arg;
+}
+
 Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
   Module *M = CI->getModule();
   Function *Callee = CI->getCalledFunction();
@@ -2551,6 +2615,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
        Callee->getIntrinsicID() == Intrinsic::sqrt))
     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);

+  if (Value *Opt = mergeSqrtToExp(CI, B))
+    return Opt;
+
   if (!CI->isFast())
     return Ret;

diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll
index 004df3e30c72..f72fe5a6a581 100644
--- a/llvm/test/Transforms/InstCombine/sqrt.ll
+++ b/llvm/test/Transforms/InstCombine/sqrt.ll
@@ -88,7 +88,127 @@ define float @sqrt_call_fabs_f32(float %x) {
   ret float %sqrt
 }

+define double @sqrt_exp(double %x) {
+; CHECK-LABEL: @sqrt_exp(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @llvm.exp.f64(double %x)
+  %res = call reassoc double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp_2(double %x) {
+; CHECK-LABEL: @sqrt_exp_2(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @exp(double %x)
+  %res = call reassoc double @sqrt(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp2(double %x) {
+; CHECK-LABEL: @sqrt_exp2(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @exp2(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @exp2(double %x)
+  %res = call reassoc double @sqrt(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp10(double %x) {
+; CHECK-LABEL: @sqrt_exp10(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @exp10(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @exp10(double %x)
+  %res = call reassoc double @sqrt(double %e)
+  ret double %res
+}
+
+; Negative test
+define double @sqrt_exp_nofast_1(double %x) {
+; CHECK-LABEL: @sqrt_exp_nofast_1(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.exp.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = call reassoc double @llvm.sqrt.f64(double [[E]])
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %e = call double @llvm.exp.f64(double %x)
+  %res = call reassoc double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+; Negative test
+define double @sqrt_exp_nofast_2(double %x) {
+; CHECK-LABEL: @sqrt_exp_nofast_2(
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.sqrt.f64(double [[E]])
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %e = call reassoc double @llvm.exp.f64(double %x)
+  %res = call double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp_merge_constant(double %x, double %y) {
+; CHECK-LABEL: @sqrt_exp_merge_constant(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc nsz double [[X:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %mul = fmul reassoc nsz double %x, 10.0
+  %e = call reassoc double @llvm.exp.f64(double %mul)
+  %res = call reassoc nsz double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp_intr_and_libcall(double %x) {
+; CHECK-LABEL: @sqrt_exp_intr_and_libcall(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @exp(double %x)
+  %res = call reassoc double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp_intr_and_libcall_2(double %x) {
+; CHECK-LABEL: @sqrt_exp_intr_and_libcall_2(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @llvm.exp.f64(double %x)
+  %res = call reassoc double @sqrt(double %e)
+  ret double %res
+}
+
+define <2 x float> @sqrt_exp_vec(<2 x float> %x) {
+; CHECK-LABEL: @sqrt_exp_vec(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], <float 5.000000e-01, float 5.000000e-01>
+; CHECK-NEXT:    [[E:%.*]] = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> [[MERGED_SQRT]])
+; CHECK-NEXT:    ret <2 x float> [[E]]
+;
+  %e = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> %x)
+  %res = call reassoc <2 x float> @llvm.sqrt.v2f32(<2 x float> %e)
+  ret <2 x float> %res
+}
+
 declare i32 @foo(double)
 declare double @sqrt(double) readnone
 declare float @sqrtf(float)
 declare float @llvm.fabs.f32(float)
+declare double @llvm.exp.f64(double)
+declare double @llvm.sqrt.f64(double)
+declare double @exp(double)
+declare double @exp2(double)
+declare double @exp10(double)
+declare <2 x float> @llvm.exp.v2f32(<2 x float>)
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
--
2.38.1.windows.1