2475 lines
95 KiB
Diff
2475 lines
95 KiB
Diff
|
|
From 0bce68310dc0ff6a09ec2cf5c3ae32400c631324 Mon Sep 17 00:00:00 2001
|
||
|
|
From: zhanglimin <zhanglimin@loongson.cn>
|
||
|
|
Date: Tue, 12 Sep 2023 09:51:16 +0800
|
||
|
|
Subject: [PATCH 01/14] [sanitizer][msan] VarArgHelper for loongarch64
|
||
|
|
|
||
|
|
This patch adds support for variadic argument for loongarch64,
|
||
|
|
which is based on MIPS64. And `check-msan` all pass.
|
||
|
|
|
||
|
|
Reviewed By: vitalybuka
|
||
|
|
|
||
|
|
Differential Revision: https://reviews.llvm.org/D158587
|
||
|
|
|
||
|
|
(cherry picked from commit ec42c78cc43ac1e8364e5a0941aa5fc91b813dd3)
|
||
|
|
---
|
||
|
|
.../Instrumentation/MemorySanitizer.cpp | 7 ++
|
||
|
|
.../LoongArch/vararg-loongarch64.ll | 78 +++++++++++++++++++
|
||
|
|
2 files changed, 85 insertions(+)
|
||
|
|
create mode 100644 llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
|
||
|
|
index 83d90049abc3..362fd6e4151f 100644
|
||
|
|
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
|
||
|
|
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
|
||
|
|
@@ -4945,6 +4945,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
|
||
|
|
};
|
||
|
|
|
||
|
|
/// MIPS64-specific implementation of VarArgHelper.
|
||
|
|
+/// NOTE: This is also used for LoongArch64.
|
||
|
|
struct VarArgMIPS64Helper : public VarArgHelper {
|
||
|
|
Function &F;
|
||
|
|
MemorySanitizer &MS;
|
||
|
|
@@ -5836,6 +5837,10 @@ struct VarArgSystemZHelper : public VarArgHelper {
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
+// Loongarch64 is not a MIPS, but the current vargs calling convention matches
|
||
|
|
+// the MIPS.
|
||
|
|
+using VarArgLoongArch64Helper = VarArgMIPS64Helper;
|
||
|
|
+
|
||
|
|
/// A no-op implementation of VarArgHelper.
|
||
|
|
struct VarArgNoOpHelper : public VarArgHelper {
|
||
|
|
VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
|
||
|
|
@@ -5868,6 +5873,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
|
||
|
|
return new VarArgPowerPC64Helper(Func, Msan, Visitor);
|
||
|
|
else if (TargetTriple.getArch() == Triple::systemz)
|
||
|
|
return new VarArgSystemZHelper(Func, Msan, Visitor);
|
||
|
|
+ else if (TargetTriple.isLoongArch64())
|
||
|
|
+ return new VarArgLoongArch64Helper(Func, Msan, Visitor);
|
||
|
|
else
|
||
|
|
return new VarArgNoOpHelper(Func, Msan, Visitor);
|
||
|
|
}
|
||
|
|
diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000000..8a4ab59588ad
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
|
||
|
|
@@ -0,0 +1,78 @@
|
||
|
|
+; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
|
||
|
|
+
|
||
|
|
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
||
|
|
+target triple = "loongarch64-unknown-linux-gnu"
|
||
|
|
+
|
||
|
|
+;; First, check allocation of the save area.
|
||
|
|
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1
|
||
|
|
+declare void @llvm.va_start(ptr) #2
|
||
|
|
+declare void @llvm.va_end(ptr) #2
|
||
|
|
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
|
||
|
|
+define i32 @foo(i32 %guard, ...) {
|
||
|
|
+; CHECK-LABEL: @foo
|
||
|
|
+; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
|
||
|
|
+; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]]
|
||
|
|
+; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]]
|
||
|
|
+; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false)
|
||
|
|
+; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800)
|
||
|
|
+; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false)
|
||
|
|
+;
|
||
|
|
+ %vl = alloca ptr, align 8
|
||
|
|
+ call void @llvm.lifetime.start.p0(i64 32, ptr %vl)
|
||
|
|
+ call void @llvm.va_start(ptr %vl)
|
||
|
|
+ call void @llvm.va_end(ptr %vl)
|
||
|
|
+ call void @llvm.lifetime.end.p0(i64 32, ptr %vl)
|
||
|
|
+ ret i32 0
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
|
||
|
|
+;; array.
|
||
|
|
+define i32 @bar() {
|
||
|
|
+; CHECK-LABEL: @bar
|
||
|
|
+; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8
|
||
|
|
+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
|
||
|
|
+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
|
||
|
|
+; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
|
||
|
|
+;
|
||
|
|
+ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
|
||
|
|
+ ret i32 %1
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+;; Check multiple fixed arguments.
|
||
|
|
+declare i32 @foo2(i32 %g1, i32 %g2, ...)
|
||
|
|
+define i32 @bar2() {
|
||
|
|
+; CHECK-LABEL: @bar2
|
||
|
|
+; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8
|
||
|
|
+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
|
||
|
|
+; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
|
||
|
|
+;
|
||
|
|
+ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
|
||
|
|
+ ret i32 %1
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
|
||
|
|
+;; passed to a variadic function.
|
||
|
|
+declare i64 @sum(i64 %n, ...)
|
||
|
|
+define dso_local i64 @many_args() {
|
||
|
|
+;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed.
|
||
|
|
+; CHECK-LABEL: @many_args
|
||
|
|
+; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792)
|
||
|
|
+; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800)
|
||
|
|
+;
|
||
|
|
+entry:
|
||
|
|
+ %ret = call i64 (i64, ...) @sum(i64 120,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
|
||
|
|
+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1
|
||
|
|
+ )
|
||
|
|
+ ret i64 %ret
|
||
|
|
+}
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From f1265a12fa947b79967552ab520f904486c76353 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com>
|
||
|
|
Date: Thu, 28 Sep 2023 15:26:18 +0800
|
||
|
|
Subject: [PATCH 02/14] [LowerTypeTests] Add loongarch64 to CFI jumptables
|
||
|
|
(#67312)
|
||
|
|
|
||
|
|
This patch implements jump tables for loongarch64.
|
||
|
|
|
||
|
|
(cherry picked from commit 0e8a8c85f8765c086c573f36e60c895920381e18)
|
||
|
|
---
|
||
|
|
llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 9 ++++++++-
|
||
|
|
llvm/test/Transforms/LowerTypeTests/function-weak.ll | 2 ++
|
||
|
|
llvm/test/Transforms/LowerTypeTests/function.ll | 9 +++++++++
|
||
|
|
3 files changed, 19 insertions(+), 1 deletion(-)
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
|
||
|
|
index 9b4b3efd7283..a89d57d12615 100644
|
||
|
|
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
|
||
|
|
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
|
||
|
|
@@ -1196,6 +1196,7 @@ static const unsigned kARMJumpTableEntrySize = 4;
|
||
|
|
static const unsigned kARMBTIJumpTableEntrySize = 8;
|
||
|
|
static const unsigned kARMv6MJumpTableEntrySize = 16;
|
||
|
|
static const unsigned kRISCVJumpTableEntrySize = 8;
|
||
|
|
+static const unsigned kLOONGARCH64JumpTableEntrySize = 8;
|
||
|
|
|
||
|
|
unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
|
||
|
|
switch (JumpTableArch) {
|
||
|
|
@@ -1222,6 +1223,8 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
|
||
|
|
case Triple::riscv32:
|
||
|
|
case Triple::riscv64:
|
||
|
|
return kRISCVJumpTableEntrySize;
|
||
|
|
+ case Triple::loongarch64:
|
||
|
|
+ return kLOONGARCH64JumpTableEntrySize;
|
||
|
|
default:
|
||
|
|
report_fatal_error("Unsupported architecture for jump tables");
|
||
|
|
}
|
||
|
|
@@ -1286,6 +1289,9 @@ void LowerTypeTestsModule::createJumpTableEntry(
|
||
|
|
} else if (JumpTableArch == Triple::riscv32 ||
|
||
|
|
JumpTableArch == Triple::riscv64) {
|
||
|
|
AsmOS << "tail $" << ArgIndex << "@plt\n";
|
||
|
|
+ } else if (JumpTableArch == Triple::loongarch64) {
|
||
|
|
+ AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n"
|
||
|
|
+ << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n";
|
||
|
|
} else {
|
||
|
|
report_fatal_error("Unsupported architecture for jump tables");
|
||
|
|
}
|
||
|
|
@@ -1304,7 +1310,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctions(
|
||
|
|
ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
|
||
|
|
if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
|
||
|
|
Arch == Triple::thumb || Arch == Triple::aarch64 ||
|
||
|
|
- Arch == Triple::riscv32 || Arch == Triple::riscv64)
|
||
|
|
+ Arch == Triple::riscv32 || Arch == Triple::riscv64 ||
|
||
|
|
+ Arch == Triple::loongarch64)
|
||
|
|
buildBitSetsFromFunctionsNative(TypeIds, Functions);
|
||
|
|
else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
|
||
|
|
buildBitSetsFromFunctionsWASM(TypeIds, Functions);
|
||
|
|
diff --git a/llvm/test/Transforms/LowerTypeTests/function-weak.ll b/llvm/test/Transforms/LowerTypeTests/function-weak.ll
|
||
|
|
index ff69abacc8e9..c765937f1991 100644
|
||
|
|
--- a/llvm/test/Transforms/LowerTypeTests/function-weak.ll
|
||
|
|
+++ b/llvm/test/Transforms/LowerTypeTests/function-weak.ll
|
||
|
|
@@ -4,6 +4,7 @@
|
||
|
|
; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,ARM %s
|
||
|
|
; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s
|
||
|
|
; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s
|
||
|
|
+; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,LOONGARCH64 %s
|
||
|
|
|
||
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||
|
|
target triple = "x86_64-unknown-linux-gnu"
|
||
|
|
@@ -116,6 +117,7 @@ define i1 @foo(ptr %p) {
|
||
|
|
; X86: define private void @[[JT]]() #{{.*}} align 8 {
|
||
|
|
; ARM: define private void @[[JT]]() #{{.*}} align 4 {
|
||
|
|
; RISCV: define private void @[[JT]]() #{{.*}} align 8 {
|
||
|
|
+; LOONGARCH64: define private void @[[JT]]() #{{.*}} align 8 {
|
||
|
|
|
||
|
|
; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" {
|
||
|
|
; CHECK-NEXT: entry:
|
||
|
|
diff --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll
|
||
|
|
index 968c9d434eb2..802b88d92977 100644
|
||
|
|
--- a/llvm/test/Transforms/LowerTypeTests/function.ll
|
||
|
|
+++ b/llvm/test/Transforms/LowerTypeTests/function.ll
|
||
|
|
@@ -5,6 +5,7 @@
|
||
|
|
; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s
|
||
|
|
; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s
|
||
|
|
; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s
|
||
|
|
+; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE %s
|
||
|
|
|
||
|
|
; The right format for Arm jump tables depends on the selected
|
||
|
|
; subtarget, so we can't get these tests right without the Arm target
|
||
|
|
@@ -34,6 +35,7 @@ target datalayout = "e-p:64:64"
|
||
|
|
; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1)
|
||
|
|
; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1)
|
||
|
|
; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1)
|
||
|
|
+; LOONGARCH64: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1)
|
||
|
|
|
||
|
|
; NATIVE: define hidden void @f.cfi()
|
||
|
|
; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]]
|
||
|
|
@@ -65,6 +67,7 @@ define i1 @foo(ptr %p) {
|
||
|
|
; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 {
|
||
|
|
; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 {
|
||
|
|
; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
|
||
|
|
+; LOONGARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
|
||
|
|
|
||
|
|
; X86: jmp ${0:c}@plt
|
||
|
|
; X86-SAME: int3
|
||
|
|
@@ -99,6 +102,11 @@ define i1 @foo(ptr %p) {
|
||
|
|
; RISCV: tail $0@plt
|
||
|
|
; RISCV-SAME: tail $1@plt
|
||
|
|
|
||
|
|
+; LOONGARCH64: pcalau12i $$t0, %pc_hi20($0)
|
||
|
|
+; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($0)
|
||
|
|
+; LOONGARCH64-SAME: pcalau12i $$t0, %pc_hi20($1)
|
||
|
|
+; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($1)
|
||
|
|
+
|
||
|
|
; NATIVE-SAME: "s,s"(ptr @f.cfi, ptr @g.cfi)
|
||
|
|
|
||
|
|
; X86-LINUX: attributes #[[ATTR]] = { naked nocf_check nounwind }
|
||
|
|
@@ -107,6 +115,7 @@ define i1 @foo(ptr %p) {
|
||
|
|
; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" }
|
||
|
|
; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" }
|
||
|
|
; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" }
|
||
|
|
+; LOONGARCH64: attributes #[[ATTR]] = { naked nounwind }
|
||
|
|
|
||
|
|
; WASM32: ![[I0]] = !{i64 1}
|
||
|
|
; WASM32: ![[I1]] = !{i64 2}
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 6f3143e1ad0bb759b7519af81994ed3c71dcf52b Mon Sep 17 00:00:00 2001
|
||
|
|
From: wanglei <wanglei@loongson.cn>
|
||
|
|
Date: Fri, 20 Oct 2023 10:44:55 +0800
|
||
|
|
Subject: [PATCH 03/14] [LoongArch] Fix td pattern for CACOP LDPTE and LDDIR
|
||
|
|
|
||
|
|
The immediate argument should be a target constant (`timm`).
|
||
|
|
|
||
|
|
(cherry picked from commit 47826b3f148996767ebd2c67ee41c329cb364fef)
|
||
|
|
---
|
||
|
|
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 8 ++++----
|
||
|
|
1 file changed, 4 insertions(+), 4 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
|
||
|
|
index b2c4bb812ba5..166379d7d592 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
|
||
|
|
@@ -1857,9 +1857,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>;
|
||
|
|
/// Intrinsics
|
||
|
|
|
||
|
|
def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12),
|
||
|
|
- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>;
|
||
|
|
+ (CACOP timm:$op, GPR:$rj, timm:$imm12)>;
|
||
|
|
def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12),
|
||
|
|
- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>;
|
||
|
|
+ (CACOP timm:$op, GPR:$rj, timm:$imm12)>;
|
||
|
|
def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>;
|
||
|
|
def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>;
|
||
|
|
def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>;
|
||
|
|
@@ -2023,9 +2023,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk),
|
||
|
|
def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk),
|
||
|
|
(ASRTGT_D GPR:$rj, GPR:$rk)>;
|
||
|
|
def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8),
|
||
|
|
- (LDDIR GPR:$rj, uimm8:$imm8)>;
|
||
|
|
+ (LDDIR GPR:$rj, timm:$imm8)>;
|
||
|
|
def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8),
|
||
|
|
- (LDPTE GPR:$rj, uimm8:$imm8)>;
|
||
|
|
+ (LDPTE GPR:$rj, timm:$imm8)>;
|
||
|
|
} // Predicates = [IsLA64]
|
||
|
|
|
||
|
|
//===----------------------------------------------------------------------===//
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From d90b85e94180543fd1789f9e26d7931f2329069b Mon Sep 17 00:00:00 2001
|
||
|
|
From: ZhaoQi <zhaoqi01@loongson.cn>
|
||
|
|
Date: Fri, 10 Nov 2023 15:54:33 +0800
|
||
|
|
Subject: [PATCH 04/14] [LoongArch][MC] Refine MCInstrAnalysis based on
|
||
|
|
registers used (#71276)
|
||
|
|
|
||
|
|
MCInstrAnalysis can return properties of instructions (e.g., isCall(),
|
||
|
|
isBranch(),...) based on the informations that MCInstrDesc can get from
|
||
|
|
*InstrInfo*.td files. These infos are based on opcodes only, but JIRL
|
||
|
|
can have different properties based on different registers used.
|
||
|
|
|
||
|
|
So this patch refines several MCInstrAnalysis methods: isTerminator,
|
||
|
|
isCall,isReturn,isBranch,isUnconditionalBranch and isIndirectBranch.
|
||
|
|
|
||
|
|
This patch also allows BOLT which will be supported on LoongArch later
|
||
|
|
to get right instruction infos.
|
||
|
|
|
||
|
|
(cherry picked from commit f7d784709673ca185f6fb0633fd53c72e81f2ae1)
|
||
|
|
---
|
||
|
|
.../MCTargetDesc/LoongArchMCTargetDesc.cpp | 76 +++++++++++++
|
||
|
|
.../unittests/Target/LoongArch/CMakeLists.txt | 1 +
|
||
|
|
.../Target/LoongArch/MCInstrAnalysisTest.cpp | 107 ++++++++++++++++++
|
||
|
|
3 files changed, 184 insertions(+)
|
||
|
|
create mode 100644 llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
|
||
|
|
index 942e667bc261..d580c3457fec 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
|
||
|
|
@@ -104,6 +104,82 @@ public:
|
||
|
|
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
+
|
||
|
|
+ bool isTerminator(const MCInst &Inst) const override {
|
||
|
|
+ if (MCInstrAnalysis::isTerminator(Inst))
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
+ switch (Inst.getOpcode()) {
|
||
|
|
+ default:
|
||
|
|
+ return false;
|
||
|
|
+ case LoongArch::JIRL:
|
||
|
|
+ return Inst.getOperand(0).getReg() == LoongArch::R0;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ bool isCall(const MCInst &Inst) const override {
|
||
|
|
+ if (MCInstrAnalysis::isCall(Inst))
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
+ switch (Inst.getOpcode()) {
|
||
|
|
+ default:
|
||
|
|
+ return false;
|
||
|
|
+ case LoongArch::JIRL:
|
||
|
|
+ return Inst.getOperand(0).getReg() != LoongArch::R0;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ bool isReturn(const MCInst &Inst) const override {
|
||
|
|
+ if (MCInstrAnalysis::isReturn(Inst))
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
+ switch (Inst.getOpcode()) {
|
||
|
|
+ default:
|
||
|
|
+ return false;
|
||
|
|
+ case LoongArch::JIRL:
|
||
|
|
+ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
|
||
|
|
+ Inst.getOperand(1).getReg() == LoongArch::R1;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ bool isBranch(const MCInst &Inst) const override {
|
||
|
|
+ if (MCInstrAnalysis::isBranch(Inst))
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
+ switch (Inst.getOpcode()) {
|
||
|
|
+ default:
|
||
|
|
+ return false;
|
||
|
|
+ case LoongArch::JIRL:
|
||
|
|
+ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
|
||
|
|
+ Inst.getOperand(1).getReg() != LoongArch::R1;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ bool isUnconditionalBranch(const MCInst &Inst) const override {
|
||
|
|
+ if (MCInstrAnalysis::isUnconditionalBranch(Inst))
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
+ switch (Inst.getOpcode()) {
|
||
|
|
+ default:
|
||
|
|
+ return false;
|
||
|
|
+ case LoongArch::JIRL:
|
||
|
|
+ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
|
||
|
|
+ Inst.getOperand(1).getReg() != LoongArch::R1;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ bool isIndirectBranch(const MCInst &Inst) const override {
|
||
|
|
+ if (MCInstrAnalysis::isIndirectBranch(Inst))
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
+ switch (Inst.getOpcode()) {
|
||
|
|
+ default:
|
||
|
|
+ return false;
|
||
|
|
+ case LoongArch::JIRL:
|
||
|
|
+ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
|
||
|
|
+ Inst.getOperand(1).getReg() != LoongArch::R1;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
};
|
||
|
|
|
||
|
|
} // end namespace
|
||
|
|
diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt
|
||
|
|
index fef4f8e15461..e6f8ec073721 100644
|
||
|
|
--- a/llvm/unittests/Target/LoongArch/CMakeLists.txt
|
||
|
|
+++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt
|
||
|
|
@@ -20,6 +20,7 @@ set(LLVM_LINK_COMPONENTS
|
||
|
|
|
||
|
|
add_llvm_target_unittest(LoongArchTests
|
||
|
|
InstSizes.cpp
|
||
|
|
+ MCInstrAnalysisTest.cpp
|
||
|
|
)
|
||
|
|
|
||
|
|
set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests")
|
||
|
|
diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000000..6a208d274a0d
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
@@ -0,0 +1,107 @@
|
||
|
|
+//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===//
|
||
|
|
+//
|
||
|
|
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
|
+// See https://llvm.org/LICENSE.txt for license information.
|
||
|
|
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
|
+//
|
||
|
|
+//===----------------------------------------------------------------------===//
|
||
|
|
+
|
||
|
|
+#include "llvm/MC/MCInstrAnalysis.h"
|
||
|
|
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
|
||
|
|
+#include "llvm/MC/MCInstBuilder.h"
|
||
|
|
+#include "llvm/MC/TargetRegistry.h"
|
||
|
|
+#include "llvm/Support/TargetSelect.h"
|
||
|
|
+
|
||
|
|
+#include "gtest/gtest.h"
|
||
|
|
+
|
||
|
|
+#include <memory>
|
||
|
|
+
|
||
|
|
+using namespace llvm;
|
||
|
|
+
|
||
|
|
+namespace {
|
||
|
|
+
|
||
|
|
+class InstrAnalysisTest : public testing::TestWithParam<const char *> {
|
||
|
|
+protected:
|
||
|
|
+ std::unique_ptr<const MCInstrInfo> Info;
|
||
|
|
+ std::unique_ptr<const MCInstrAnalysis> Analysis;
|
||
|
|
+
|
||
|
|
+ static void SetUpTestSuite() {
|
||
|
|
+ LLVMInitializeLoongArchTargetInfo();
|
||
|
|
+ LLVMInitializeLoongArchTarget();
|
||
|
|
+ LLVMInitializeLoongArchTargetMC();
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ InstrAnalysisTest() {
|
||
|
|
+ std::string Error;
|
||
|
|
+ const Target *TheTarget =
|
||
|
|
+ TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error);
|
||
|
|
+ Info = std::unique_ptr<const MCInstrInfo>(TheTarget->createMCInstrInfo());
|
||
|
|
+ Analysis = std::unique_ptr<const MCInstrAnalysis>(
|
||
|
|
+ TheTarget->createMCInstrAnalysis(Info.get()));
|
||
|
|
+ }
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+} // namespace
|
||
|
|
+
|
||
|
|
+static MCInst beq() {
|
||
|
|
+ return MCInstBuilder(LoongArch::BEQ)
|
||
|
|
+ .addReg(LoongArch::R0)
|
||
|
|
+ .addReg(LoongArch::R1)
|
||
|
|
+ .addImm(32);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); }
|
||
|
|
+
|
||
|
|
+static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) {
|
||
|
|
+ return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+TEST_P(InstrAnalysisTest, IsTerminator) {
|
||
|
|
+ EXPECT_TRUE(Analysis->isTerminator(beq()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isTerminator(bl()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5)));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+TEST_P(InstrAnalysisTest, IsCall) {
|
||
|
|
+ EXPECT_FALSE(Analysis->isCall(beq()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isCall(bl()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0)));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+TEST_P(InstrAnalysisTest, IsReturn) {
|
||
|
|
+ EXPECT_FALSE(Analysis->isReturn(beq()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isReturn(bl()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1)));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+TEST_P(InstrAnalysisTest, IsBranch) {
|
||
|
|
+ EXPECT_TRUE(Analysis->isBranch(beq()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isBranch(bl()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1)));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+TEST_P(InstrAnalysisTest, IsUnconditionalBranch) {
|
||
|
|
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1)));
|
||
|
|
+ EXPECT_FALSE(
|
||
|
|
+ Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1)));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+TEST_P(InstrAnalysisTest, IsIndirectBranch) {
|
||
|
|
+ EXPECT_FALSE(Analysis->isIndirectBranch(beq()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isIndirectBranch(bl()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1)));
|
||
|
|
+ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1)));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest,
|
||
|
|
+ testing::Values("loongarch32", "loongarch64"));
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 4d3ba0892d66b21f6a8a72f1d787e42a64be8867 Mon Sep 17 00:00:00 2001
|
||
|
|
From: ZhaoQi <zhaoqi01@loongson.cn>
|
||
|
|
Date: Wed, 15 Nov 2023 11:12:30 +0800
|
||
|
|
Subject: [PATCH 05/14] [LoongArch][NFC] Pre-commit MCInstrAnalysis tests for
|
||
|
|
instruction 'b' (#71903)
|
||
|
|
|
||
|
|
The tests for 'b' which commented with FIXME are incorrect, the
|
||
|
|
following patch will fix it.
|
||
|
|
|
||
|
|
(cherry picked from commit f6c4bb07eaa94bcd5d02ba7a46850225b6ed50d4)
|
||
|
|
---
|
||
|
|
.../Target/LoongArch/MCInstrAnalysisTest.cpp | 18 ++++++++++++++++++
|
||
|
|
1 file changed, 18 insertions(+)
|
||
|
|
|
||
|
|
diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
index 6a208d274a0d..6e1919fc2261 100644
|
||
|
|
--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
@@ -50,6 +50,8 @@ static MCInst beq() {
|
||
|
|
.addImm(32);
|
||
|
|
}
|
||
|
|
|
||
|
|
+static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); }
|
||
|
|
+
|
||
|
|
static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); }
|
||
|
|
|
||
|
|
static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) {
|
||
|
|
@@ -58,6 +60,7 @@ static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) {
|
||
|
|
|
||
|
|
TEST_P(InstrAnalysisTest, IsTerminator) {
|
||
|
|
EXPECT_TRUE(Analysis->isTerminator(beq()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isTerminator(b()));
|
||
|
|
EXPECT_FALSE(Analysis->isTerminator(bl()));
|
||
|
|
EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0)));
|
||
|
|
EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5)));
|
||
|
|
@@ -65,6 +68,7 @@ TEST_P(InstrAnalysisTest, IsTerminator) {
|
||
|
|
|
||
|
|
TEST_P(InstrAnalysisTest, IsCall) {
|
||
|
|
EXPECT_FALSE(Analysis->isCall(beq()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isCall(b()));
|
||
|
|
EXPECT_TRUE(Analysis->isCall(bl()));
|
||
|
|
EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1)));
|
||
|
|
EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0)));
|
||
|
|
@@ -72,6 +76,7 @@ TEST_P(InstrAnalysisTest, IsCall) {
|
||
|
|
|
||
|
|
TEST_P(InstrAnalysisTest, IsReturn) {
|
||
|
|
EXPECT_FALSE(Analysis->isReturn(beq()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isReturn(b()));
|
||
|
|
EXPECT_FALSE(Analysis->isReturn(bl()));
|
||
|
|
EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1)));
|
||
|
|
EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0)));
|
||
|
|
@@ -80,14 +85,26 @@ TEST_P(InstrAnalysisTest, IsReturn) {
|
||
|
|
|
||
|
|
TEST_P(InstrAnalysisTest, IsBranch) {
|
||
|
|
EXPECT_TRUE(Analysis->isBranch(beq()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isBranch(b()));
|
||
|
|
EXPECT_FALSE(Analysis->isBranch(bl()));
|
||
|
|
EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0)));
|
||
|
|
EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1)));
|
||
|
|
EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1)));
|
||
|
|
}
|
||
|
|
|
||
|
|
+TEST_P(InstrAnalysisTest, IsConditionalBranch) {
|
||
|
|
+ EXPECT_TRUE(Analysis->isConditionalBranch(beq()));
|
||
|
|
+ // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is
|
||
|
|
+ // wrong. The following patch will fix it.
|
||
|
|
+ EXPECT_TRUE(Analysis->isConditionalBranch(b()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isConditionalBranch(bl()));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
TEST_P(InstrAnalysisTest, IsUnconditionalBranch) {
|
||
|
|
EXPECT_FALSE(Analysis->isUnconditionalBranch(beq()));
|
||
|
|
+ // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is
|
||
|
|
+ // wrong. The following patch will fix it.
|
||
|
|
+ EXPECT_FALSE(Analysis->isUnconditionalBranch(b()));
|
||
|
|
EXPECT_FALSE(Analysis->isUnconditionalBranch(bl()));
|
||
|
|
EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0)));
|
||
|
|
EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1)));
|
||
|
|
@@ -97,6 +114,7 @@ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) {
|
||
|
|
|
||
|
|
TEST_P(InstrAnalysisTest, IsIndirectBranch) {
|
||
|
|
EXPECT_FALSE(Analysis->isIndirectBranch(beq()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isIndirectBranch(b()));
|
||
|
|
EXPECT_FALSE(Analysis->isIndirectBranch(bl()));
|
||
|
|
EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0)));
|
||
|
|
EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1)));
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 034d4087be71c54248fff1bf7eae66291671776a Mon Sep 17 00:00:00 2001
|
||
|
|
From: ZhaoQi <zhaoqi01@loongson.cn>
|
||
|
|
Date: Thu, 16 Nov 2023 14:01:58 +0800
|
||
|
|
Subject: [PATCH 06/14] [LoongArch] Set isBarrier to true for instruction 'b'
|
||
|
|
(#72339)
|
||
|
|
|
||
|
|
Instr "b offs26" represent to an unconditional branch in LoongArch. Set
|
||
|
|
isBarrier to 1 in tablegen for it, so that MCInstrAnalysis can return
|
||
|
|
correctly.
|
||
|
|
|
||
|
|
Fixes https://github.com/llvm/llvm-project/pull/71903.
|
||
|
|
|
||
|
|
(cherry picked from commit 42a4d5e8cab1537515d92ed56d6e17b673ed352f)
|
||
|
|
---
|
||
|
|
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 1 +
|
||
|
|
llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp | 8 ++------
|
||
|
|
2 files changed, 3 insertions(+), 6 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
|
||
|
|
index 166379d7d592..05ae36a9781d 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
|
||
|
|
@@ -586,6 +586,7 @@ class Br_I26<bits<32> op>
|
||
|
|
: FmtI26<op, (outs), (ins simm26_b:$imm26), "$imm26"> {
|
||
|
|
let isBranch = 1;
|
||
|
|
let isTerminator = 1;
|
||
|
|
+ let isBarrier = 1;
|
||
|
|
}
|
||
|
|
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
|
||
|
|
|
||
|
|
diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
index 6e1919fc2261..468ee79615d6 100644
|
||
|
|
--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp
|
||
|
|
@@ -94,17 +94,13 @@ TEST_P(InstrAnalysisTest, IsBranch) {
|
||
|
|
|
||
|
|
TEST_P(InstrAnalysisTest, IsConditionalBranch) {
|
||
|
|
EXPECT_TRUE(Analysis->isConditionalBranch(beq()));
|
||
|
|
- // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is
|
||
|
|
- // wrong. The following patch will fix it.
|
||
|
|
- EXPECT_TRUE(Analysis->isConditionalBranch(b()));
|
||
|
|
+ EXPECT_FALSE(Analysis->isConditionalBranch(b()));
|
||
|
|
EXPECT_FALSE(Analysis->isConditionalBranch(bl()));
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST_P(InstrAnalysisTest, IsUnconditionalBranch) {
|
||
|
|
EXPECT_FALSE(Analysis->isUnconditionalBranch(beq()));
|
||
|
|
- // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is
|
||
|
|
- // wrong. The following patch will fix it.
|
||
|
|
- EXPECT_FALSE(Analysis->isUnconditionalBranch(b()));
|
||
|
|
+ EXPECT_TRUE(Analysis->isUnconditionalBranch(b()));
|
||
|
|
EXPECT_FALSE(Analysis->isUnconditionalBranch(bl()));
|
||
|
|
EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0)));
|
||
|
|
EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1)));
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 701109dc419b8d07cd5254268d848dee1278b9ad Mon Sep 17 00:00:00 2001
|
||
|
|
From: ZhaoQi <zhaoqi01@loongson.cn>
|
||
|
|
Date: Tue, 21 Nov 2023 08:34:52 +0800
|
||
|
|
Subject: [PATCH 07/14] [LoongArch][MC] Pre-commit tests for instr bl fixupkind
|
||
|
|
testing (#72826)
|
||
|
|
|
||
|
|
This patch is used to test whether fixupkind for bl can be returned
|
||
|
|
correctly. When BL has target-flags(loongarch-call), there is no error.
|
||
|
|
But without this flag, an assertion error will appear. So the test is
|
||
|
|
just tagged as "Expectedly Failed" now until the following patch fix it.
|
||
|
|
|
||
|
|
(cherry picked from commit 2ca028ce7c6de5f1350440012355a65383b8729a)
|
||
|
|
---
|
||
|
|
.../CodeGen/LoongArch/test_bl_fixupkind.mir | 66 +++++++++++++++++++
|
||
|
|
1 file changed, 66 insertions(+)
|
||
|
|
create mode 100644 llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
|
||
|
|
|
||
|
|
diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000000..2c1d41be7711
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
|
||
|
|
@@ -0,0 +1,66 @@
|
||
|
|
+## Tagged as "Expectedly Failed" until the following patch fix it
|
||
|
|
+# XFAIL: *
|
||
|
|
+# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \
|
||
|
|
+# RUN: llvm-objdump -d - | FileCheck %s
|
||
|
|
+
|
||
|
|
+# REQUIRES: asserts
|
||
|
|
+
|
||
|
|
+## Check that bl can get fixupkind correctly.
|
||
|
|
+## When BL has target-flags(loongarch-call), there is no error. But without
|
||
|
|
+## this flag, an assertion error will appear:
|
||
|
|
+## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed.
|
||
|
|
+
|
||
|
|
+--- |
|
||
|
|
+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
||
|
|
+ target triple = "loongarch64"
|
||
|
|
+
|
||
|
|
+ define dso_local void @test_bl_fixupkind_with_flag() {
|
||
|
|
+ ; CHECK-LABEL: test_bl_fixupkind_with_flag
|
||
|
|
+ ; CHECK: addi.d $sp, $sp, -16
|
||
|
|
+ ; CHECK-NEXT: st.d $ra, $sp, 8
|
||
|
|
+ ; CHECK-NEXT: bl 0 <test_bl_fixupkind_with_flag+0x8>
|
||
|
|
+ ; CHECK-NEXT: ld.d $ra, $sp, 8
|
||
|
|
+ ; CHECK-NEXT: addi.d $sp, $sp, 16
|
||
|
|
+ ; CHECK-NEXT: ret
|
||
|
|
+ entry:
|
||
|
|
+ call void @foo()
|
||
|
|
+ ret void
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ define dso_local void @test_bl_fixupkind_without_flag() {
|
||
|
|
+ ; CHECK-LABEL: test_bl_fixupkind_without_flag
|
||
|
|
+ ; CHECK: addi.d $sp, $sp, -16
|
||
|
|
+ ; CHECK-NEXT: st.d $ra, $sp, 8
|
||
|
|
+ ; CHECK-NEXT: bl 0 <test_bl_fixupkind_without_flag+0x8>
|
||
|
|
+ ; CHECK-NEXT: ld.d $ra, $sp, 8
|
||
|
|
+ ; CHECK-NEXT: addi.d $sp, $sp, 16
|
||
|
|
+ ; CHECK-NEXT: ret
|
||
|
|
+ entry:
|
||
|
|
+ call void @foo()
|
||
|
|
+ ret void
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ declare dso_local void @foo(...)
|
||
|
|
+...
|
||
|
|
+---
|
||
|
|
+name: test_bl_fixupkind_with_flag
|
||
|
|
+tracksRegLiveness: true
|
||
|
|
+body: |
|
||
|
|
+ bb.0.entry:
|
||
|
|
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3
|
||
|
|
+ BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3
|
||
|
|
+ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
|
||
|
|
+ PseudoRET
|
||
|
|
+
|
||
|
|
+...
|
||
|
|
+---
|
||
|
|
+name: test_bl_fixupkind_without_flag
|
||
|
|
+tracksRegLiveness: true
|
||
|
|
+body: |
|
||
|
|
+ bb.0.entry:
|
||
|
|
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3
|
||
|
|
+ BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3
|
||
|
|
+ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
|
||
|
|
+ PseudoRET
|
||
|
|
+
|
||
|
|
+...
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From a5bf03107b8738b0fab521d7718bed863056134b Mon Sep 17 00:00:00 2001
|
||
|
|
From: ZhaoQi <zhaoqi01@loongson.cn>
|
||
|
|
Date: Tue, 21 Nov 2023 19:00:29 +0800
|
||
|
|
Subject: [PATCH 08/14] [LoongArch][MC] Support to get the FixupKind for BL
|
||
|
|
(#72938)
|
||
|
|
|
||
|
|
Previously, bolt could not get FixupKind for BL correctly, because bolt
|
||
|
|
cannot get target-flags for BL. Here just add support in MCCodeEmitter.
|
||
|
|
|
||
|
|
Fixes https://github.com/llvm/llvm-project/pull/72826.
|
||
|
|
|
||
|
|
(cherry picked from commit 775d2f3201cf7fb657aaf58d1b37c130bd9eb8f9)
|
||
|
|
---
|
||
|
|
.../LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1 +
|
||
|
|
llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir | 8 ++------
|
||
|
|
2 files changed, 3 insertions(+), 6 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
|
||
|
|
index 08c0820cb862..09d92ac9aa3a 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
|
||
|
|
@@ -263,6 +263,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
|
||
|
|
FixupKind = LoongArch::fixup_loongarch_b21;
|
||
|
|
break;
|
||
|
|
case LoongArch::B:
|
||
|
|
+ case LoongArch::BL:
|
||
|
|
FixupKind = LoongArch::fixup_loongarch_b26;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
|
||
|
|
index 2c1d41be7711..70cd5fb8d7eb 100644
|
||
|
|
--- a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
|
||
|
|
+++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir
|
||
|
|
@@ -1,14 +1,10 @@
|
||
|
|
-## Tagged as "Expectedly Failed" until the following patch fix it
|
||
|
|
-# XFAIL: *
|
||
|
|
# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \
|
||
|
|
# RUN: llvm-objdump -d - | FileCheck %s
|
||
|
|
|
||
|
|
# REQUIRES: asserts
|
||
|
|
|
||
|
|
-## Check that bl can get fixupkind correctly.
|
||
|
|
-## When BL has target-flags(loongarch-call), there is no error. But without
|
||
|
|
-## this flag, an assertion error will appear:
|
||
|
|
-## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed.
|
||
|
|
+## Check that bl can get fixupkind correctly, whether BL contains
|
||
|
|
+## target-flags(loongarch-call) or not.
|
||
|
|
|
||
|
|
--- |
|
||
|
|
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 20421e57af53d963a95c6c318f71f9399d241188 Mon Sep 17 00:00:00 2001
|
||
|
|
From: ZhaoQi <zhaoqi01@loongson.cn>
|
||
|
|
Date: Thu, 23 Nov 2023 16:38:41 +0800
|
||
|
|
Subject: [PATCH 09/14] [LoongArch][MC] Modify branch evaluation for
|
||
|
|
MCInstrAnalysis (#73205)
|
||
|
|
|
||
|
|
Function evaluateBranch() is used to compute target address for a given
|
||
|
|
branch instruction and return true on success. But target address of
|
||
|
|
indirect branch cannot be simply added, so rule it out and just return
|
||
|
|
false.
|
||
|
|
|
||
|
|
This patch also add objdump tests which capture the current state of
|
||
|
|
support for printing branch targets. Without this patch, the result of
|
||
|
|
"jirl $zero, $a0, 4" is "jirl $zero, $a0, 4 <foo+0x64>". It is obviously
|
||
|
|
incorrect, because this instruction represents an indirect branch whose
|
||
|
|
target address depends on both the register value and the imm. After
|
||
|
|
this patch, it will be right despite loss of details.
|
||
|
|
|
||
|
|
(cherry picked from commit 1c68c4c57a65a67963264878bc4646be8b58854c)
|
||
|
|
---
|
||
|
|
.../MCTargetDesc/LoongArchMCTargetDesc.cpp | 3 +-
|
||
|
|
.../llvm-objdump/ELF/LoongArch/branches.s | 76 +++++++++++++++++++
|
||
|
|
.../llvm-objdump/ELF/LoongArch/lit.local.cfg | 2 +
|
||
|
|
3 files changed, 80 insertions(+), 1 deletion(-)
|
||
|
|
create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s
|
||
|
|
create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
|
||
|
|
index d580c3457fec..a4e6a09863e6 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
|
||
|
|
@@ -97,7 +97,8 @@ public:
|
||
|
|
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
|
||
|
|
uint64_t &Target) const override {
|
||
|
|
unsigned NumOps = Inst.getNumOperands();
|
||
|
|
- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) {
|
||
|
|
+ if ((isBranch(Inst) && !isIndirectBranch(Inst)) ||
|
||
|
|
+ Inst.getOpcode() == LoongArch::BL) {
|
||
|
|
Target = Addr + Inst.getOperand(NumOps - 1).getImm();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000000..8cb00aef9954
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s
|
||
|
|
@@ -0,0 +1,76 @@
|
||
|
|
+# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \
|
||
|
|
+# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s
|
||
|
|
+# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \
|
||
|
|
+# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s
|
||
|
|
+
|
||
|
|
+# CHECK-LABEL: <foo>:
|
||
|
|
+foo:
|
||
|
|
+# CHECK: beq $a0, $a1, 108 <foo+0x6c>
|
||
|
|
+beq $a0, $a1, .Llocal
|
||
|
|
+# CHECK: bne $a0, $a1, 104 <foo+0x6c>
|
||
|
|
+bne $a0, $a1, .Llocal
|
||
|
|
+# CHECK: blt $a0, $a1, 100 <foo+0x6c>
|
||
|
|
+blt $a0, $a1, .Llocal
|
||
|
|
+# CHECK: bltu $a0, $a1, 96 <foo+0x6c>
|
||
|
|
+bltu $a0, $a1, .Llocal
|
||
|
|
+# CHECK: bge $a0, $a1, 92 <foo+0x6c>
|
||
|
|
+bge $a0, $a1, .Llocal
|
||
|
|
+# CHECK: bgeu $a0, $a1, 88 <foo+0x6c>
|
||
|
|
+bgeu $a0, $a1, .Llocal
|
||
|
|
+# CHECK: beqz $a0, 84 <foo+0x6c>
|
||
|
|
+beqz $a0, .Llocal
|
||
|
|
+# CHECK: bnez $a0, 80 <foo+0x6c>
|
||
|
|
+bnez $a0, .Llocal
|
||
|
|
+# CHECK: bceqz $fcc6, 76 <foo+0x6c>
|
||
|
|
+bceqz $fcc6, .Llocal
|
||
|
|
+# CHECK: bcnez $fcc6, 72 <foo+0x6c>
|
||
|
|
+bcnez $fcc6, .Llocal
|
||
|
|
+
|
||
|
|
+# CHECK: beq $a0, $a1, 76 <bar>
|
||
|
|
+beq $a0, $a1, bar
|
||
|
|
+# CHECK: bne $a0, $a1, 72 <bar>
|
||
|
|
+bne $a0, $a1, bar
|
||
|
|
+# CHECK: blt $a0, $a1, 68 <bar>
|
||
|
|
+blt $a0, $a1, bar
|
||
|
|
+# CHECK: bltu $a0, $a1, 64 <bar>
|
||
|
|
+bltu $a0, $a1, bar
|
||
|
|
+# CHECK: bge $a0, $a1, 60 <bar>
|
||
|
|
+bge $a0, $a1, bar
|
||
|
|
+# CHECK: bgeu $a0, $a1, 56 <bar>
|
||
|
|
+bgeu $a0, $a1, bar
|
||
|
|
+# CHECK: beqz $a0, 52 <bar>
|
||
|
|
+beqz $a0, bar
|
||
|
|
+# CHECK: bnez $a0, 48 <bar>
|
||
|
|
+bnez $a0, bar
|
||
|
|
+# CHECK: bceqz $fcc6, 44 <bar>
|
||
|
|
+bceqz $fcc6, bar
|
||
|
|
+# CHECK: bcnez $fcc6, 40 <bar>
|
||
|
|
+bcnez $fcc6, bar
|
||
|
|
+
|
||
|
|
+# CHECK: b 28 <foo+0x6c>
|
||
|
|
+b .Llocal
|
||
|
|
+# CHECK: b 32 <bar>
|
||
|
|
+b bar
|
||
|
|
+
|
||
|
|
+# CHECK: bl 20 <foo+0x6c>
|
||
|
|
+bl .Llocal
|
||
|
|
+# CHECK: bl 24 <bar>
|
||
|
|
+bl bar
|
||
|
|
+
|
||
|
|
+# CHECK: jirl $zero, $a0, 4{{$}}
|
||
|
|
+jirl $zero, $a0, 4
|
||
|
|
+# CHECK: jirl $ra, $a0, 4{{$}}
|
||
|
|
+jirl $ra, $a0, 4
|
||
|
|
+# CHECK: ret
|
||
|
|
+ret
|
||
|
|
+
|
||
|
|
+.Llocal:
|
||
|
|
+# CHECK: 6c: nop
|
||
|
|
+# CHECK: nop
|
||
|
|
+nop
|
||
|
|
+nop
|
||
|
|
+
|
||
|
|
+# CHECK-LABEL: <bar>:
|
||
|
|
+bar:
|
||
|
|
+# CHECK: 74: nop
|
||
|
|
+nop
|
||
|
|
diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000000..cc24278acbb4
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg
|
||
|
|
@@ -0,0 +1,2 @@
|
||
|
|
+if not "LoongArch" in config.root.targets:
|
||
|
|
+ config.unsupported = True
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 0fe85205a8637c6671f423cddd41b712085232ac Mon Sep 17 00:00:00 2001
|
||
|
|
From: hev <wangrui@loongson.cn>
|
||
|
|
Date: Thu, 23 Nov 2023 15:15:26 +0800
|
||
|
|
Subject: [PATCH 10/14] [LoongArch] Precommit a test for smul with overflow
|
||
|
|
(NFC) (#73212)
|
||
|
|
|
||
|
|
(cherry picked from commit 7414c0db962f8a5029fd44c3e0bc93d9ce20be71)
|
||
|
|
---
|
||
|
|
.../CodeGen/LoongArch/smul-with-overflow.ll | 118 ++++++++++++++++++
|
||
|
|
1 file changed, 118 insertions(+)
|
||
|
|
create mode 100644 llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
|
||
|
|
|
||
|
|
diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000000..a53e77e5aa4b
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
|
||
|
|
@@ -0,0 +1,118 @@
|
||
|
|
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||
|
|
+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
|
||
|
|
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
|
||
|
|
+
|
||
|
|
+define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
|
||
|
|
+; LA32-LABEL: smuloi64:
|
||
|
|
+; LA32: # %bb.0:
|
||
|
|
+; LA32-NEXT: addi.w $sp, $sp, -16
|
||
|
|
+; LA32-NEXT: .cfi_def_cfa_offset 16
|
||
|
|
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: .cfi_offset 1, -4
|
||
|
|
+; LA32-NEXT: .cfi_offset 22, -8
|
||
|
|
+; LA32-NEXT: move $fp, $a4
|
||
|
|
+; LA32-NEXT: st.w $zero, $sp, 4
|
||
|
|
+; LA32-NEXT: addi.w $a4, $sp, 4
|
||
|
|
+; LA32-NEXT: bl %plt(__mulodi4)
|
||
|
|
+; LA32-NEXT: st.w $a1, $fp, 4
|
||
|
|
+; LA32-NEXT: st.w $a0, $fp, 0
|
||
|
|
+; LA32-NEXT: ld.w $a0, $sp, 4
|
||
|
|
+; LA32-NEXT: sltu $a0, $zero, $a0
|
||
|
|
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: addi.w $sp, $sp, 16
|
||
|
|
+; LA32-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-LABEL: smuloi64:
|
||
|
|
+; LA64: # %bb.0:
|
||
|
|
+; LA64-NEXT: mul.d $a3, $a0, $a1
|
||
|
|
+; LA64-NEXT: st.d $a3, $a2, 0
|
||
|
|
+; LA64-NEXT: mulh.d $a0, $a0, $a1
|
||
|
|
+; LA64-NEXT: srai.d $a1, $a3, 63
|
||
|
|
+; LA64-NEXT: xor $a0, $a0, $a1
|
||
|
|
+; LA64-NEXT: sltu $a0, $zero, $a0
|
||
|
|
+; LA64-NEXT: ret
|
||
|
|
+ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
|
||
|
|
+ %val = extractvalue {i64, i1} %t, 0
|
||
|
|
+ %obit = extractvalue {i64, i1} %t, 1
|
||
|
|
+ store i64 %val, ptr %res
|
||
|
|
+ ret i1 %obit
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
|
||
|
|
+; LA32-LABEL: smuloi128:
|
||
|
|
+; LA32: # %bb.0:
|
||
|
|
+; LA32-NEXT: addi.w $sp, $sp, -64
|
||
|
|
+; LA32-NEXT: .cfi_def_cfa_offset 64
|
||
|
|
+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: .cfi_offset 1, -4
|
||
|
|
+; LA32-NEXT: .cfi_offset 22, -8
|
||
|
|
+; LA32-NEXT: move $fp, $a2
|
||
|
|
+; LA32-NEXT: st.w $zero, $sp, 52
|
||
|
|
+; LA32-NEXT: ld.w $a2, $a1, 12
|
||
|
|
+; LA32-NEXT: st.w $a2, $sp, 12
|
||
|
|
+; LA32-NEXT: ld.w $a2, $a1, 8
|
||
|
|
+; LA32-NEXT: st.w $a2, $sp, 8
|
||
|
|
+; LA32-NEXT: ld.w $a2, $a1, 4
|
||
|
|
+; LA32-NEXT: st.w $a2, $sp, 4
|
||
|
|
+; LA32-NEXT: ld.w $a1, $a1, 0
|
||
|
|
+; LA32-NEXT: st.w $a1, $sp, 0
|
||
|
|
+; LA32-NEXT: ld.w $a1, $a0, 12
|
||
|
|
+; LA32-NEXT: st.w $a1, $sp, 28
|
||
|
|
+; LA32-NEXT: ld.w $a1, $a0, 8
|
||
|
|
+; LA32-NEXT: st.w $a1, $sp, 24
|
||
|
|
+; LA32-NEXT: ld.w $a1, $a0, 4
|
||
|
|
+; LA32-NEXT: st.w $a1, $sp, 20
|
||
|
|
+; LA32-NEXT: ld.w $a0, $a0, 0
|
||
|
|
+; LA32-NEXT: st.w $a0, $sp, 16
|
||
|
|
+; LA32-NEXT: addi.w $a0, $sp, 32
|
||
|
|
+; LA32-NEXT: addi.w $a1, $sp, 16
|
||
|
|
+; LA32-NEXT: addi.w $a2, $sp, 0
|
||
|
|
+; LA32-NEXT: addi.w $a3, $sp, 52
|
||
|
|
+; LA32-NEXT: bl %plt(__muloti4)
|
||
|
|
+; LA32-NEXT: ld.w $a0, $sp, 44
|
||
|
|
+; LA32-NEXT: st.w $a0, $fp, 12
|
||
|
|
+; LA32-NEXT: ld.w $a0, $sp, 40
|
||
|
|
+; LA32-NEXT: st.w $a0, $fp, 8
|
||
|
|
+; LA32-NEXT: ld.w $a0, $sp, 36
|
||
|
|
+; LA32-NEXT: st.w $a0, $fp, 4
|
||
|
|
+; LA32-NEXT: ld.w $a0, $sp, 32
|
||
|
|
+; LA32-NEXT: st.w $a0, $fp, 0
|
||
|
|
+; LA32-NEXT: ld.w $a0, $sp, 52
|
||
|
|
+; LA32-NEXT: sltu $a0, $zero, $a0
|
||
|
|
+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: addi.w $sp, $sp, 64
|
||
|
|
+; LA32-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-LABEL: smuloi128:
|
||
|
|
+; LA64: # %bb.0:
|
||
|
|
+; LA64-NEXT: addi.d $sp, $sp, -32
|
||
|
|
+; LA64-NEXT: .cfi_def_cfa_offset 32
|
||
|
|
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
|
||
|
|
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
|
||
|
|
+; LA64-NEXT: .cfi_offset 1, -8
|
||
|
|
+; LA64-NEXT: .cfi_offset 22, -16
|
||
|
|
+; LA64-NEXT: move $fp, $a4
|
||
|
|
+; LA64-NEXT: st.d $zero, $sp, 8
|
||
|
|
+; LA64-NEXT: addi.d $a4, $sp, 8
|
||
|
|
+; LA64-NEXT: bl %plt(__muloti4)
|
||
|
|
+; LA64-NEXT: st.d $a1, $fp, 8
|
||
|
|
+; LA64-NEXT: st.d $a0, $fp, 0
|
||
|
|
+; LA64-NEXT: ld.d $a0, $sp, 8
|
||
|
|
+; LA64-NEXT: sltu $a0, $zero, $a0
|
||
|
|
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
|
||
|
|
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
|
||
|
|
+; LA64-NEXT: addi.d $sp, $sp, 32
|
||
|
|
+; LA64-NEXT: ret
|
||
|
|
+ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
|
||
|
|
+ %val = extractvalue {i128, i1} %t, 0
|
||
|
|
+ %obit = extractvalue {i128, i1} %t, 1
|
||
|
|
+ store i128 %val, ptr %res
|
||
|
|
+ ret i1 %obit
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
|
||
|
|
+declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From e29ff285726046ec46c9005c67ba992e3efc8ace Mon Sep 17 00:00:00 2001
|
||
|
|
From: hev <wangrui@loongson.cn>
|
||
|
|
Date: Thu, 23 Nov 2023 19:34:50 +0800
|
||
|
|
Subject: [PATCH 11/14] [LoongArch] Disable mulodi4 and muloti4 libcalls
|
||
|
|
(#73199)
|
||
|
|
|
||
|
|
This library function only exists in compiler-rt not libgcc. So this
|
||
|
|
would fail to link unless we were linking with compiler-rt.
|
||
|
|
|
||
|
|
Fixes https://github.com/ClangBuiltLinux/linux/issues/1958
|
||
|
|
|
||
|
|
(cherry picked from commit 0d9f557b6c36da3aa92daff4c0d37ea821d7ae1e)
|
||
|
|
---
|
||
|
|
.../LoongArch/LoongArchISelLowering.cpp | 5 +
|
||
|
|
.../CodeGen/LoongArch/smul-with-overflow.ll | 463 +++++++++++++++---
|
||
|
|
2 files changed, 397 insertions(+), 71 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
|
||
|
|
index f7eacd56c542..ed106cb766bc 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
|
||
|
|
@@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
|
||
|
|
|
||
|
|
// Set libcalls.
|
||
|
|
setLibcallName(RTLIB::MUL_I128, nullptr);
|
||
|
|
+ // The MULO libcall is not part of libgcc, only compiler-rt.
|
||
|
|
+ setLibcallName(RTLIB::MULO_I64, nullptr);
|
||
|
|
}
|
||
|
|
|
||
|
|
+ // The MULO libcall is not part of libgcc, only compiler-rt.
|
||
|
|
+ setLibcallName(RTLIB::MULO_I128, nullptr);
|
||
|
|
+
|
||
|
|
static const ISD::CondCode FPCCToExpand[] = {
|
||
|
|
ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
|
||
|
|
ISD::SETGE, ISD::SETNE, ISD::SETGT};
|
||
|
|
diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
|
||
|
|
index a53e77e5aa4b..6cba4108d63c 100644
|
||
|
|
--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
|
||
|
|
+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
|
||
|
|
@@ -5,23 +5,53 @@
|
||
|
|
define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
|
||
|
|
; LA32-LABEL: smuloi64:
|
||
|
|
; LA32: # %bb.0:
|
||
|
|
-; LA32-NEXT: addi.w $sp, $sp, -16
|
||
|
|
-; LA32-NEXT: .cfi_def_cfa_offset 16
|
||
|
|
-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
|
||
|
|
-; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
|
||
|
|
-; LA32-NEXT: .cfi_offset 1, -4
|
||
|
|
-; LA32-NEXT: .cfi_offset 22, -8
|
||
|
|
-; LA32-NEXT: move $fp, $a4
|
||
|
|
-; LA32-NEXT: st.w $zero, $sp, 4
|
||
|
|
-; LA32-NEXT: addi.w $a4, $sp, 4
|
||
|
|
-; LA32-NEXT: bl %plt(__mulodi4)
|
||
|
|
-; LA32-NEXT: st.w $a1, $fp, 4
|
||
|
|
-; LA32-NEXT: st.w $a0, $fp, 0
|
||
|
|
-; LA32-NEXT: ld.w $a0, $sp, 4
|
||
|
|
+; LA32-NEXT: srai.w $a5, $a1, 31
|
||
|
|
+; LA32-NEXT: mul.w $a6, $a2, $a5
|
||
|
|
+; LA32-NEXT: mulh.wu $a7, $a2, $a5
|
||
|
|
+; LA32-NEXT: add.w $a7, $a7, $a6
|
||
|
|
+; LA32-NEXT: mul.w $a5, $a3, $a5
|
||
|
|
+; LA32-NEXT: add.w $a5, $a7, $a5
|
||
|
|
+; LA32-NEXT: srai.w $a7, $a3, 31
|
||
|
|
+; LA32-NEXT: mul.w $t0, $a7, $a1
|
||
|
|
+; LA32-NEXT: mulh.wu $t1, $a7, $a0
|
||
|
|
+; LA32-NEXT: add.w $t0, $t1, $t0
|
||
|
|
+; LA32-NEXT: mul.w $a7, $a7, $a0
|
||
|
|
+; LA32-NEXT: add.w $t0, $t0, $a7
|
||
|
|
+; LA32-NEXT: add.w $a5, $t0, $a5
|
||
|
|
+; LA32-NEXT: mulh.wu $t0, $a0, $a2
|
||
|
|
+; LA32-NEXT: mul.w $t1, $a1, $a2
|
||
|
|
+; LA32-NEXT: add.w $t0, $t1, $t0
|
||
|
|
+; LA32-NEXT: sltu $t1, $t0, $t1
|
||
|
|
+; LA32-NEXT: mulh.wu $t2, $a1, $a2
|
||
|
|
+; LA32-NEXT: add.w $t1, $t2, $t1
|
||
|
|
+; LA32-NEXT: mul.w $t2, $a0, $a3
|
||
|
|
+; LA32-NEXT: add.w $t0, $t2, $t0
|
||
|
|
+; LA32-NEXT: sltu $t2, $t0, $t2
|
||
|
|
+; LA32-NEXT: mulh.wu $t3, $a0, $a3
|
||
|
|
+; LA32-NEXT: add.w $t2, $t3, $t2
|
||
|
|
+; LA32-NEXT: add.w $a6, $a7, $a6
|
||
|
|
+; LA32-NEXT: sltu $a7, $a6, $a7
|
||
|
|
+; LA32-NEXT: add.w $a5, $a5, $a7
|
||
|
|
+; LA32-NEXT: mul.w $a0, $a0, $a2
|
||
|
|
+; LA32-NEXT: mul.w $a2, $a1, $a3
|
||
|
|
+; LA32-NEXT: mulh.wu $a1, $a1, $a3
|
||
|
|
+; LA32-NEXT: add.w $a3, $t1, $t2
|
||
|
|
+; LA32-NEXT: sltu $a7, $a3, $t1
|
||
|
|
+; LA32-NEXT: add.w $a1, $a1, $a7
|
||
|
|
+; LA32-NEXT: st.w $a0, $a4, 0
|
||
|
|
+; LA32-NEXT: add.w $a0, $a2, $a3
|
||
|
|
+; LA32-NEXT: sltu $a2, $a0, $a2
|
||
|
|
+; LA32-NEXT: add.w $a1, $a1, $a2
|
||
|
|
+; LA32-NEXT: st.w $t0, $a4, 4
|
||
|
|
+; LA32-NEXT: add.w $a1, $a1, $a5
|
||
|
|
+; LA32-NEXT: add.w $a2, $a0, $a6
|
||
|
|
+; LA32-NEXT: sltu $a0, $a2, $a0
|
||
|
|
+; LA32-NEXT: add.w $a0, $a1, $a0
|
||
|
|
+; LA32-NEXT: srai.w $a1, $t0, 31
|
||
|
|
+; LA32-NEXT: xor $a0, $a0, $a1
|
||
|
|
+; LA32-NEXT: xor $a1, $a2, $a1
|
||
|
|
+; LA32-NEXT: or $a0, $a1, $a0
|
||
|
|
; LA32-NEXT: sltu $a0, $zero, $a0
|
||
|
|
-; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
|
||
|
|
-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
|
||
|
|
-; LA32-NEXT: addi.w $sp, $sp, 16
|
||
|
|
; LA32-NEXT: ret
|
||
|
|
;
|
||
|
|
; LA64-LABEL: smuloi64:
|
||
|
|
@@ -43,69 +73,360 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
|
||
|
|
define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
|
||
|
|
; LA32-LABEL: smuloi128:
|
||
|
|
; LA32: # %bb.0:
|
||
|
|
-; LA32-NEXT: addi.w $sp, $sp, -64
|
||
|
|
-; LA32-NEXT: .cfi_def_cfa_offset 64
|
||
|
|
-; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
|
||
|
|
-; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: addi.w $sp, $sp, -96
|
||
|
|
+; LA32-NEXT: .cfi_def_cfa_offset 96
|
||
|
|
+; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill
|
||
|
|
; LA32-NEXT: .cfi_offset 1, -4
|
||
|
|
; LA32-NEXT: .cfi_offset 22, -8
|
||
|
|
-; LA32-NEXT: move $fp, $a2
|
||
|
|
-; LA32-NEXT: st.w $zero, $sp, 52
|
||
|
|
-; LA32-NEXT: ld.w $a2, $a1, 12
|
||
|
|
-; LA32-NEXT: st.w $a2, $sp, 12
|
||
|
|
-; LA32-NEXT: ld.w $a2, $a1, 8
|
||
|
|
-; LA32-NEXT: st.w $a2, $sp, 8
|
||
|
|
-; LA32-NEXT: ld.w $a2, $a1, 4
|
||
|
|
-; LA32-NEXT: st.w $a2, $sp, 4
|
||
|
|
-; LA32-NEXT: ld.w $a1, $a1, 0
|
||
|
|
-; LA32-NEXT: st.w $a1, $sp, 0
|
||
|
|
-; LA32-NEXT: ld.w $a1, $a0, 12
|
||
|
|
-; LA32-NEXT: st.w $a1, $sp, 28
|
||
|
|
-; LA32-NEXT: ld.w $a1, $a0, 8
|
||
|
|
-; LA32-NEXT: st.w $a1, $sp, 24
|
||
|
|
-; LA32-NEXT: ld.w $a1, $a0, 4
|
||
|
|
-; LA32-NEXT: st.w $a1, $sp, 20
|
||
|
|
-; LA32-NEXT: ld.w $a0, $a0, 0
|
||
|
|
-; LA32-NEXT: st.w $a0, $sp, 16
|
||
|
|
-; LA32-NEXT: addi.w $a0, $sp, 32
|
||
|
|
-; LA32-NEXT: addi.w $a1, $sp, 16
|
||
|
|
-; LA32-NEXT: addi.w $a2, $sp, 0
|
||
|
|
-; LA32-NEXT: addi.w $a3, $sp, 52
|
||
|
|
-; LA32-NEXT: bl %plt(__muloti4)
|
||
|
|
-; LA32-NEXT: ld.w $a0, $sp, 44
|
||
|
|
-; LA32-NEXT: st.w $a0, $fp, 12
|
||
|
|
-; LA32-NEXT: ld.w $a0, $sp, 40
|
||
|
|
-; LA32-NEXT: st.w $a0, $fp, 8
|
||
|
|
-; LA32-NEXT: ld.w $a0, $sp, 36
|
||
|
|
-; LA32-NEXT: st.w $a0, $fp, 4
|
||
|
|
-; LA32-NEXT: ld.w $a0, $sp, 32
|
||
|
|
-; LA32-NEXT: st.w $a0, $fp, 0
|
||
|
|
-; LA32-NEXT: ld.w $a0, $sp, 52
|
||
|
|
+; LA32-NEXT: .cfi_offset 23, -12
|
||
|
|
+; LA32-NEXT: .cfi_offset 24, -16
|
||
|
|
+; LA32-NEXT: .cfi_offset 25, -20
|
||
|
|
+; LA32-NEXT: .cfi_offset 26, -24
|
||
|
|
+; LA32-NEXT: .cfi_offset 27, -28
|
||
|
|
+; LA32-NEXT: .cfi_offset 28, -32
|
||
|
|
+; LA32-NEXT: .cfi_offset 29, -36
|
||
|
|
+; LA32-NEXT: .cfi_offset 30, -40
|
||
|
|
+; LA32-NEXT: .cfi_offset 31, -44
|
||
|
|
+; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: ld.w $a6, $a1, 0
|
||
|
|
+; LA32-NEXT: ld.w $a7, $a0, 0
|
||
|
|
+; LA32-NEXT: mulh.wu $a3, $a7, $a6
|
||
|
|
+; LA32-NEXT: ld.w $a5, $a0, 4
|
||
|
|
+; LA32-NEXT: mul.w $a4, $a5, $a6
|
||
|
|
+; LA32-NEXT: add.w $a3, $a4, $a3
|
||
|
|
+; LA32-NEXT: sltu $a4, $a3, $a4
|
||
|
|
+; LA32-NEXT: mulh.wu $t0, $a5, $a6
|
||
|
|
+; LA32-NEXT: add.w $a4, $t0, $a4
|
||
|
|
+; LA32-NEXT: ld.w $t0, $a1, 4
|
||
|
|
+; LA32-NEXT: mul.w $t1, $a7, $t0
|
||
|
|
+; LA32-NEXT: add.w $a3, $t1, $a3
|
||
|
|
+; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: sltu $t1, $a3, $t1
|
||
|
|
+; LA32-NEXT: mulh.wu $t2, $a7, $t0
|
||
|
|
+; LA32-NEXT: add.w $t1, $t2, $t1
|
||
|
|
+; LA32-NEXT: ld.w $t4, $a0, 12
|
||
|
|
+; LA32-NEXT: ld.w $t2, $a0, 8
|
||
|
|
+; LA32-NEXT: ld.w $t3, $a1, 8
|
||
|
|
+; LA32-NEXT: mulh.wu $a0, $t2, $t3
|
||
|
|
+; LA32-NEXT: mul.w $t5, $t4, $t3
|
||
|
|
+; LA32-NEXT: add.w $a0, $t5, $a0
|
||
|
|
+; LA32-NEXT: sltu $t5, $a0, $t5
|
||
|
|
+; LA32-NEXT: mulh.wu $t6, $t4, $t3
|
||
|
|
+; LA32-NEXT: add.w $t5, $t6, $t5
|
||
|
|
+; LA32-NEXT: ld.w $t7, $a1, 12
|
||
|
|
+; LA32-NEXT: mul.w $a1, $t2, $t7
|
||
|
|
+; LA32-NEXT: add.w $a0, $a1, $a0
|
||
|
|
+; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: sltu $a1, $a0, $a1
|
||
|
|
+; LA32-NEXT: mulh.wu $t6, $t2, $t7
|
||
|
|
+; LA32-NEXT: add.w $t6, $t6, $a1
|
||
|
|
+; LA32-NEXT: srai.w $s7, $t4, 31
|
||
|
|
+; LA32-NEXT: mul.w $a1, $s7, $t7
|
||
|
|
+; LA32-NEXT: mulh.wu $t8, $s7, $t3
|
||
|
|
+; LA32-NEXT: add.w $t8, $t8, $a1
|
||
|
|
+; LA32-NEXT: mulh.wu $fp, $a6, $s7
|
||
|
|
+; LA32-NEXT: mul.w $s6, $t0, $s7
|
||
|
|
+; LA32-NEXT: add.w $s8, $s6, $fp
|
||
|
|
+; LA32-NEXT: mul.w $a1, $a6, $s7
|
||
|
|
+; LA32-NEXT: add.w $ra, $a1, $s8
|
||
|
|
+; LA32-NEXT: sltu $s0, $ra, $a1
|
||
|
|
+; LA32-NEXT: add.w $a0, $fp, $s0
|
||
|
|
+; LA32-NEXT: add.w $a3, $a4, $t1
|
||
|
|
+; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: sltu $a4, $a3, $a4
|
||
|
|
+; LA32-NEXT: mulh.wu $t1, $a5, $t0
|
||
|
|
+; LA32-NEXT: add.w $a3, $t1, $a4
|
||
|
|
+; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: srai.w $s4, $t7, 31
|
||
|
|
+; LA32-NEXT: mul.w $fp, $a7, $s4
|
||
|
|
+; LA32-NEXT: mulh.wu $a4, $a7, $s4
|
||
|
|
+; LA32-NEXT: add.w $s1, $a4, $fp
|
||
|
|
+; LA32-NEXT: sltu $s0, $s1, $fp
|
||
|
|
+; LA32-NEXT: add.w $s5, $a4, $s0
|
||
|
|
+; LA32-NEXT: mul.w $a4, $s7, $t3
|
||
|
|
+; LA32-NEXT: add.w $t8, $t8, $a4
|
||
|
|
+; LA32-NEXT: add.w $s0, $ra, $t8
|
||
|
|
+; LA32-NEXT: add.w $a3, $a1, $a4
|
||
|
|
+; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: sltu $a4, $a3, $a1
|
||
|
|
+; LA32-NEXT: add.w $a3, $s0, $a4
|
||
|
|
+; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: add.w $s3, $t5, $t6
|
||
|
|
+; LA32-NEXT: sltu $a4, $s3, $t5
|
||
|
|
+; LA32-NEXT: mulh.wu $t5, $t4, $t7
|
||
|
|
+; LA32-NEXT: add.w $a3, $t5, $a4
|
||
|
|
+; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: mul.w $a4, $a7, $a6
|
||
|
|
+; LA32-NEXT: st.w $a4, $a2, 0
|
||
|
|
+; LA32-NEXT: sltu $a4, $s8, $s6
|
||
|
|
+; LA32-NEXT: mulh.wu $t5, $t0, $s7
|
||
|
|
+; LA32-NEXT: add.w $a4, $t5, $a4
|
||
|
|
+; LA32-NEXT: add.w $t1, $a4, $a0
|
||
|
|
+; LA32-NEXT: sltu $a4, $t1, $a4
|
||
|
|
+; LA32-NEXT: add.w $s2, $t5, $a4
|
||
|
|
+; LA32-NEXT: mulh.wu $a4, $a7, $t3
|
||
|
|
+; LA32-NEXT: mul.w $t5, $a5, $t3
|
||
|
|
+; LA32-NEXT: add.w $a4, $t5, $a4
|
||
|
|
+; LA32-NEXT: sltu $t5, $a4, $t5
|
||
|
|
+; LA32-NEXT: mulh.wu $t6, $a5, $t3
|
||
|
|
+; LA32-NEXT: add.w $a3, $t6, $t5
|
||
|
|
+; LA32-NEXT: mul.w $t6, $a7, $t7
|
||
|
|
+; LA32-NEXT: add.w $t5, $t6, $a4
|
||
|
|
+; LA32-NEXT: sltu $a4, $t5, $t6
|
||
|
|
+; LA32-NEXT: mulh.wu $t6, $a7, $t7
|
||
|
|
+; LA32-NEXT: add.w $a4, $t6, $a4
|
||
|
|
+; LA32-NEXT: mulh.wu $t6, $t2, $a6
|
||
|
|
+; LA32-NEXT: mul.w $s7, $t4, $a6
|
||
|
|
+; LA32-NEXT: add.w $t6, $s7, $t6
|
||
|
|
+; LA32-NEXT: sltu $s7, $t6, $s7
|
||
|
|
+; LA32-NEXT: mulh.wu $s8, $t4, $a6
|
||
|
|
+; LA32-NEXT: add.w $a0, $s8, $s7
|
||
|
|
+; LA32-NEXT: mul.w $s7, $t2, $t0
|
||
|
|
+; LA32-NEXT: add.w $t6, $s7, $t6
|
||
|
|
+; LA32-NEXT: sltu $s7, $t6, $s7
|
||
|
|
+; LA32-NEXT: mulh.wu $s8, $t2, $t0
|
||
|
|
+; LA32-NEXT: add.w $a2, $s8, $s7
|
||
|
|
+; LA32-NEXT: mul.w $s8, $a5, $s4
|
||
|
|
+; LA32-NEXT: add.w $s7, $s1, $s8
|
||
|
|
+; LA32-NEXT: add.w $s1, $s7, $ra
|
||
|
|
+; LA32-NEXT: add.w $a1, $fp, $a1
|
||
|
|
+; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: sltu $ra, $a1, $fp
|
||
|
|
+; LA32-NEXT: add.w $a1, $s1, $ra
|
||
|
|
+; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill
|
||
|
|
+; LA32-NEXT: xor $s0, $a1, $s7
|
||
|
|
+; LA32-NEXT: sltui $s0, $s0, 1
|
||
|
|
+; LA32-NEXT: sltu $a1, $a1, $s7
|
||
|
|
+; LA32-NEXT: masknez $s1, $a1, $s0
|
||
|
|
+; LA32-NEXT: maskeqz $s0, $ra, $s0
|
||
|
|
+; LA32-NEXT: add.w $t1, $s6, $t1
|
||
|
|
+; LA32-NEXT: sltu $s6, $t1, $s6
|
||
|
|
+; LA32-NEXT: add.w $s2, $s2, $s6
|
||
|
|
+; LA32-NEXT: add.w $a2, $a0, $a2
|
||
|
|
+; LA32-NEXT: sltu $a0, $a2, $a0
|
||
|
|
+; LA32-NEXT: mulh.wu $s6, $t4, $t0
|
||
|
|
+; LA32-NEXT: add.w $t8, $s6, $a0
|
||
|
|
+; LA32-NEXT: add.w $a4, $a3, $a4
|
||
|
|
+; LA32-NEXT: sltu $a3, $a4, $a3
|
||
|
|
+; LA32-NEXT: mulh.wu $s6, $a5, $t7
|
||
|
|
+; LA32-NEXT: add.w $a3, $s6, $a3
|
||
|
|
+; LA32-NEXT: mul.w $s6, $t4, $t7
|
||
|
|
+; LA32-NEXT: mul.w $t7, $a5, $t7
|
||
|
|
+; LA32-NEXT: mul.w $ra, $t4, $t0
|
||
|
|
+; LA32-NEXT: mul.w $t0, $a5, $t0
|
||
|
|
+; LA32-NEXT: mul.w $t4, $t4, $s4
|
||
|
|
+; LA32-NEXT: mul.w $a7, $a7, $t3
|
||
|
|
+; LA32-NEXT: mul.w $a6, $t2, $a6
|
||
|
|
+; LA32-NEXT: mul.w $t3, $t2, $t3
|
||
|
|
+; LA32-NEXT: mul.w $a0, $t2, $s4
|
||
|
|
+; LA32-NEXT: mulh.wu $t2, $t2, $s4
|
||
|
|
+; LA32-NEXT: mulh.wu $a5, $s4, $a5
|
||
|
|
+; LA32-NEXT: sltu $s4, $s7, $s8
|
||
|
|
+; LA32-NEXT: add.w $s4, $a5, $s4
|
||
|
|
+; LA32-NEXT: add.w $s4, $s5, $s4
|
||
|
|
+; LA32-NEXT: sltu $s5, $s4, $s5
|
||
|
|
+; LA32-NEXT: add.w $s5, $a5, $s5
|
||
|
|
+; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: add.w $a1, $t0, $a1
|
||
|
|
+; LA32-NEXT: sltu $a5, $a1, $t0
|
||
|
|
+; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: add.w $t0, $t0, $a5
|
||
|
|
+; LA32-NEXT: or $s0, $s0, $s1
|
||
|
|
+; LA32-NEXT: add.w $a4, $t7, $a4
|
||
|
|
+; LA32-NEXT: sltu $a5, $a4, $t7
|
||
|
|
+; LA32-NEXT: add.w $t7, $a3, $a5
|
||
|
|
+; LA32-NEXT: add.w $s1, $ra, $a2
|
||
|
|
+; LA32-NEXT: sltu $a2, $s1, $ra
|
||
|
|
+; LA32-NEXT: add.w $t8, $t8, $a2
|
||
|
|
+; LA32-NEXT: add.w $a5, $s6, $s3
|
||
|
|
+; LA32-NEXT: sltu $a2, $a5, $s6
|
||
|
|
+; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: add.w $a2, $a3, $a2
|
||
|
|
+; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: st.w $a3, $s6, 4
|
||
|
|
+; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: add.w $a3, $s2, $a3
|
||
|
|
+; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: add.w $s2, $t1, $s2
|
||
|
|
+; LA32-NEXT: sltu $t1, $s2, $t1
|
||
|
|
+; LA32-NEXT: add.w $a3, $a3, $t1
|
||
|
|
+; LA32-NEXT: add.w $t1, $s8, $s4
|
||
|
|
+; LA32-NEXT: sltu $s3, $t1, $s8
|
||
|
|
+; LA32-NEXT: add.w $s3, $s5, $s3
|
||
|
|
+; LA32-NEXT: add.w $t2, $t2, $a0
|
||
|
|
+; LA32-NEXT: add.w $t2, $t2, $t4
|
||
|
|
+; LA32-NEXT: add.w $t2, $t2, $s7
|
||
|
|
+; LA32-NEXT: add.w $t4, $a0, $fp
|
||
|
|
+; LA32-NEXT: sltu $a0, $t4, $a0
|
||
|
|
+; LA32-NEXT: add.w $a0, $t2, $a0
|
||
|
|
+; LA32-NEXT: add.w $a0, $s3, $a0
|
||
|
|
+; LA32-NEXT: add.w $t2, $t1, $t4
|
||
|
|
+; LA32-NEXT: sltu $t1, $t2, $t1
|
||
|
|
+; LA32-NEXT: add.w $a0, $a0, $t1
|
||
|
|
+; LA32-NEXT: add.w $a0, $a0, $a3
|
||
|
|
+; LA32-NEXT: add.w $t1, $t2, $s2
|
||
|
|
+; LA32-NEXT: sltu $a3, $t1, $t2
|
||
|
|
+; LA32-NEXT: add.w $a0, $a0, $a3
|
||
|
|
+; LA32-NEXT: add.w $a3, $t6, $t0
|
||
|
|
+; LA32-NEXT: add.w $a1, $a6, $a1
|
||
|
|
+; LA32-NEXT: sltu $a6, $a1, $a6
|
||
|
|
+; LA32-NEXT: add.w $t0, $a3, $a6
|
||
|
|
+; LA32-NEXT: add.w $a1, $a7, $a1
|
||
|
|
+; LA32-NEXT: sltu $a7, $a1, $a7
|
||
|
|
+; LA32-NEXT: add.w $a3, $t5, $t0
|
||
|
|
+; LA32-NEXT: add.w $a3, $a3, $a7
|
||
|
|
+; LA32-NEXT: sltu $t2, $a3, $t5
|
||
|
|
+; LA32-NEXT: xor $t4, $a3, $t5
|
||
|
|
+; LA32-NEXT: sltui $t4, $t4, 1
|
||
|
|
+; LA32-NEXT: masknez $t2, $t2, $t4
|
||
|
|
+; LA32-NEXT: maskeqz $a7, $a7, $t4
|
||
|
|
+; LA32-NEXT: st.w $a1, $s6, 8
|
||
|
|
+; LA32-NEXT: or $a1, $a7, $t2
|
||
|
|
+; LA32-NEXT: sltu $a7, $t0, $t6
|
||
|
|
+; LA32-NEXT: xor $t0, $t0, $t6
|
||
|
|
+; LA32-NEXT: sltui $t0, $t0, 1
|
||
|
|
+; LA32-NEXT: masknez $a7, $a7, $t0
|
||
|
|
+; LA32-NEXT: maskeqz $a6, $a6, $t0
|
||
|
|
+; LA32-NEXT: or $a6, $a6, $a7
|
||
|
|
+; LA32-NEXT: add.w $a6, $s1, $a6
|
||
|
|
+; LA32-NEXT: sltu $a7, $a6, $s1
|
||
|
|
+; LA32-NEXT: add.w $a7, $t8, $a7
|
||
|
|
+; LA32-NEXT: add.w $a1, $a4, $a1
|
||
|
|
+; LA32-NEXT: sltu $a4, $a1, $a4
|
||
|
|
+; LA32-NEXT: add.w $a4, $t7, $a4
|
||
|
|
+; LA32-NEXT: add.w $t0, $t1, $s0
|
||
|
|
+; LA32-NEXT: sltu $t1, $t0, $t1
|
||
|
|
+; LA32-NEXT: add.w $a0, $a0, $t1
|
||
|
|
+; LA32-NEXT: st.w $a3, $s6, 12
|
||
|
|
+; LA32-NEXT: add.w $a1, $a6, $a1
|
||
|
|
+; LA32-NEXT: sltu $a6, $a1, $a6
|
||
|
|
+; LA32-NEXT: add.w $a4, $a7, $a4
|
||
|
|
+; LA32-NEXT: add.w $a4, $a4, $a6
|
||
|
|
+; LA32-NEXT: sltu $t1, $a4, $a7
|
||
|
|
+; LA32-NEXT: xor $a7, $a4, $a7
|
||
|
|
+; LA32-NEXT: sltui $a7, $a7, 1
|
||
|
|
+; LA32-NEXT: masknez $t1, $t1, $a7
|
||
|
|
+; LA32-NEXT: maskeqz $a6, $a6, $a7
|
||
|
|
+; LA32-NEXT: or $a6, $a6, $t1
|
||
|
|
+; LA32-NEXT: add.w $a6, $a5, $a6
|
||
|
|
+; LA32-NEXT: sltu $a5, $a6, $a5
|
||
|
|
+; LA32-NEXT: add.w $a2, $a2, $a5
|
||
|
|
+; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: add.w $a4, $t1, $a4
|
||
|
|
+; LA32-NEXT: add.w $a1, $t3, $a1
|
||
|
|
+; LA32-NEXT: sltu $a5, $a1, $t3
|
||
|
|
+; LA32-NEXT: add.w $a4, $a4, $a5
|
||
|
|
+; LA32-NEXT: sltu $a7, $a4, $t1
|
||
|
|
+; LA32-NEXT: xor $t1, $a4, $t1
|
||
|
|
+; LA32-NEXT: sltui $t1, $t1, 1
|
||
|
|
+; LA32-NEXT: masknez $a7, $a7, $t1
|
||
|
|
+; LA32-NEXT: maskeqz $a5, $a5, $t1
|
||
|
|
+; LA32-NEXT: or $a5, $a5, $a7
|
||
|
|
+; LA32-NEXT: add.w $a5, $a6, $a5
|
||
|
|
+; LA32-NEXT: sltu $a6, $a5, $a6
|
||
|
|
+; LA32-NEXT: add.w $a2, $a2, $a6
|
||
|
|
+; LA32-NEXT: add.w $a0, $a2, $a0
|
||
|
|
+; LA32-NEXT: add.w $a2, $a5, $t0
|
||
|
|
+; LA32-NEXT: sltu $a5, $a2, $a5
|
||
|
|
+; LA32-NEXT: add.w $a0, $a0, $a5
|
||
|
|
+; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: add.w $a5, $a1, $a5
|
||
|
|
+; LA32-NEXT: sltu $a1, $a5, $a1
|
||
|
|
+; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: add.w $a6, $a4, $a6
|
||
|
|
+; LA32-NEXT: add.w $a6, $a6, $a1
|
||
|
|
+; LA32-NEXT: sltu $a7, $a6, $a4
|
||
|
|
+; LA32-NEXT: xor $a4, $a6, $a4
|
||
|
|
+; LA32-NEXT: sltui $a4, $a4, 1
|
||
|
|
+; LA32-NEXT: masknez $a7, $a7, $a4
|
||
|
|
+; LA32-NEXT: maskeqz $a1, $a1, $a4
|
||
|
|
+; LA32-NEXT: or $a1, $a1, $a7
|
||
|
|
+; LA32-NEXT: add.w $a1, $a2, $a1
|
||
|
|
+; LA32-NEXT: sltu $a2, $a1, $a2
|
||
|
|
+; LA32-NEXT: add.w $a0, $a0, $a2
|
||
|
|
+; LA32-NEXT: srai.w $a2, $a3, 31
|
||
|
|
+; LA32-NEXT: xor $a3, $a6, $a2
|
||
|
|
+; LA32-NEXT: xor $a0, $a0, $a2
|
||
|
|
+; LA32-NEXT: or $a0, $a3, $a0
|
||
|
|
+; LA32-NEXT: xor $a3, $a5, $a2
|
||
|
|
+; LA32-NEXT: xor $a1, $a1, $a2
|
||
|
|
+; LA32-NEXT: or $a1, $a3, $a1
|
||
|
|
+; LA32-NEXT: or $a0, $a1, $a0
|
||
|
|
; LA32-NEXT: sltu $a0, $zero, $a0
|
||
|
|
-; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
|
||
|
|
-; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
|
||
|
|
-; LA32-NEXT: addi.w $sp, $sp, 64
|
||
|
|
+; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload
|
||
|
|
+; LA32-NEXT: addi.w $sp, $sp, 96
|
||
|
|
; LA32-NEXT: ret
|
||
|
|
;
|
||
|
|
; LA64-LABEL: smuloi128:
|
||
|
|
; LA64: # %bb.0:
|
||
|
|
-; LA64-NEXT: addi.d $sp, $sp, -32
|
||
|
|
-; LA64-NEXT: .cfi_def_cfa_offset 32
|
||
|
|
-; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
|
||
|
|
-; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
|
||
|
|
-; LA64-NEXT: .cfi_offset 1, -8
|
||
|
|
-; LA64-NEXT: .cfi_offset 22, -16
|
||
|
|
-; LA64-NEXT: move $fp, $a4
|
||
|
|
-; LA64-NEXT: st.d $zero, $sp, 8
|
||
|
|
-; LA64-NEXT: addi.d $a4, $sp, 8
|
||
|
|
-; LA64-NEXT: bl %plt(__muloti4)
|
||
|
|
-; LA64-NEXT: st.d $a1, $fp, 8
|
||
|
|
-; LA64-NEXT: st.d $a0, $fp, 0
|
||
|
|
-; LA64-NEXT: ld.d $a0, $sp, 8
|
||
|
|
+; LA64-NEXT: srai.d $a5, $a1, 63
|
||
|
|
+; LA64-NEXT: mul.d $a6, $a2, $a5
|
||
|
|
+; LA64-NEXT: mulh.du $a7, $a2, $a5
|
||
|
|
+; LA64-NEXT: add.d $a7, $a7, $a6
|
||
|
|
+; LA64-NEXT: mul.d $a5, $a3, $a5
|
||
|
|
+; LA64-NEXT: add.d $a5, $a7, $a5
|
||
|
|
+; LA64-NEXT: srai.d $a7, $a3, 63
|
||
|
|
+; LA64-NEXT: mul.d $t0, $a7, $a1
|
||
|
|
+; LA64-NEXT: mulh.du $t1, $a7, $a0
|
||
|
|
+; LA64-NEXT: add.d $t0, $t1, $t0
|
||
|
|
+; LA64-NEXT: mul.d $a7, $a7, $a0
|
||
|
|
+; LA64-NEXT: add.d $t0, $t0, $a7
|
||
|
|
+; LA64-NEXT: add.d $a5, $t0, $a5
|
||
|
|
+; LA64-NEXT: mulh.du $t0, $a0, $a2
|
||
|
|
+; LA64-NEXT: mul.d $t1, $a1, $a2
|
||
|
|
+; LA64-NEXT: add.d $t0, $t1, $t0
|
||
|
|
+; LA64-NEXT: sltu $t1, $t0, $t1
|
||
|
|
+; LA64-NEXT: mulh.du $t2, $a1, $a2
|
||
|
|
+; LA64-NEXT: add.d $t1, $t2, $t1
|
||
|
|
+; LA64-NEXT: mul.d $t2, $a0, $a3
|
||
|
|
+; LA64-NEXT: add.d $t0, $t2, $t0
|
||
|
|
+; LA64-NEXT: sltu $t2, $t0, $t2
|
||
|
|
+; LA64-NEXT: mulh.du $t3, $a0, $a3
|
||
|
|
+; LA64-NEXT: add.d $t2, $t3, $t2
|
||
|
|
+; LA64-NEXT: add.d $a6, $a7, $a6
|
||
|
|
+; LA64-NEXT: sltu $a7, $a6, $a7
|
||
|
|
+; LA64-NEXT: add.d $a5, $a5, $a7
|
||
|
|
+; LA64-NEXT: mul.d $a0, $a0, $a2
|
||
|
|
+; LA64-NEXT: mul.d $a2, $a1, $a3
|
||
|
|
+; LA64-NEXT: mulh.du $a1, $a1, $a3
|
||
|
|
+; LA64-NEXT: add.d $a3, $t1, $t2
|
||
|
|
+; LA64-NEXT: sltu $a7, $a3, $t1
|
||
|
|
+; LA64-NEXT: add.d $a1, $a1, $a7
|
||
|
|
+; LA64-NEXT: st.d $a0, $a4, 0
|
||
|
|
+; LA64-NEXT: add.d $a0, $a2, $a3
|
||
|
|
+; LA64-NEXT: sltu $a2, $a0, $a2
|
||
|
|
+; LA64-NEXT: add.d $a1, $a1, $a2
|
||
|
|
+; LA64-NEXT: st.d $t0, $a4, 8
|
||
|
|
+; LA64-NEXT: add.d $a1, $a1, $a5
|
||
|
|
+; LA64-NEXT: add.d $a2, $a0, $a6
|
||
|
|
+; LA64-NEXT: sltu $a0, $a2, $a0
|
||
|
|
+; LA64-NEXT: add.d $a0, $a1, $a0
|
||
|
|
+; LA64-NEXT: srai.d $a1, $t0, 63
|
||
|
|
+; LA64-NEXT: xor $a0, $a0, $a1
|
||
|
|
+; LA64-NEXT: xor $a1, $a2, $a1
|
||
|
|
+; LA64-NEXT: or $a0, $a1, $a0
|
||
|
|
; LA64-NEXT: sltu $a0, $zero, $a0
|
||
|
|
-; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
|
||
|
|
-; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
|
||
|
|
-; LA64-NEXT: addi.d $sp, $sp, 32
|
||
|
|
; LA64-NEXT: ret
|
||
|
|
%t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
|
||
|
|
%val = extractvalue {i128, i1} %t, 0
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 01ced6193e2abfbd50fbd9d40066cf27f9f9067b Mon Sep 17 00:00:00 2001
|
||
|
|
From: wanglei <wanglei@loongson.cn>
|
||
|
|
Date: Wed, 29 Nov 2023 15:21:21 +0800
|
||
|
|
Subject: [PATCH 12/14] [LoongArch] Fix pattern for FNMSUB_{S/D} instructions
|
||
|
|
(#73742)
|
||
|
|
|
||
|
|
```
|
||
|
|
when a=c=-0.0, b=0.0:
|
||
|
|
-(a * b + (-c)) = -0.0
|
||
|
|
-a * b + c = 0.0
|
||
|
|
(fneg (fma a, b (-c))) != (fma (fneg a), b ,c)
|
||
|
|
```
|
||
|
|
|
||
|
|
See https://reviews.llvm.org/D90901 for a similar discussion on X86.
|
||
|
|
|
||
|
|
(cherry picked from commit 5e7e0d603204ede803323a825318e365a87f73e9)
|
||
|
|
---
|
||
|
|
.../LoongArch/LoongArchFloat32InstrInfo.td | 8 +-
|
||
|
|
.../LoongArch/LoongArchFloat64InstrInfo.td | 6 +-
|
||
|
|
llvm/test/CodeGen/LoongArch/double-fma.ll | 259 ++++++++++++++++--
|
||
|
|
llvm/test/CodeGen/LoongArch/float-fma.ll | 259 ++++++++++++++++--
|
||
|
|
4 files changed, 483 insertions(+), 49 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
|
||
|
|
index 826db54febd3..65120c083f49 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
|
||
|
|
@@ -294,8 +294,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)),
|
||
|
|
def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)),
|
||
|
|
(FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
|
||
|
|
|
||
|
|
-// fnmsub.s: -fj * fk + fa
|
||
|
|
-def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
|
||
|
|
+// fnmsub.s: -(fj * fk - fa)
|
||
|
|
+def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))),
|
||
|
|
+ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
|
||
|
|
+
|
||
|
|
+// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
|
||
|
|
+def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
|
||
|
|
(FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
|
||
|
|
} // Predicates = [HasBasicF]
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
|
||
|
|
index 5118474725b6..437c1e4d7be2 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
|
||
|
|
@@ -256,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)),
|
||
|
|
(FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
|
||
|
|
|
||
|
|
// fnmsub.d: -(fj * fk - fa)
|
||
|
|
-def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
|
||
|
|
+def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))),
|
||
|
|
+ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
|
||
|
|
+
|
||
|
|
+// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
|
||
|
|
+def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
|
||
|
|
(FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
|
||
|
|
} // Predicates = [HasBasicD]
|
||
|
|
|
||
|
|
diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll
|
||
|
|
index 6dd628479433..58d20c62a668 100644
|
||
|
|
--- a/llvm/test/CodeGen/LoongArch/double-fma.ll
|
||
|
|
+++ b/llvm/test/CodeGen/LoongArch/double-fma.ll
|
||
|
|
@@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind {
|
||
|
|
; LA32-CONTRACT-ON-LABEL: fnmsub_d:
|
||
|
|
; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
-; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
|
||
|
|
; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
;
|
||
|
|
; LA32-CONTRACT-OFF-LABEL: fnmsub_d:
|
||
|
|
; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
-; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
|
||
|
|
; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
;
|
||
|
|
; LA64-CONTRACT-FAST-LABEL: fnmsub_d:
|
||
|
|
@@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind {
|
||
|
|
; LA64-CONTRACT-ON-LABEL: fnmsub_d:
|
||
|
|
; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
-; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
|
||
|
|
; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
;
|
||
|
|
; LA64-CONTRACT-OFF-LABEL: fnmsub_d:
|
||
|
|
; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %negc = fneg double %c
|
||
|
|
+ %mul = fmul double %a, %b
|
||
|
|
+ %add = fadd double %mul, %negc
|
||
|
|
+ %neg = fneg double %add
|
||
|
|
+ ret double %neg
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %nega = fneg nsz double %a
|
||
|
|
+ %mul = fmul nsz double %nega, %b
|
||
|
|
+ %add = fadd nsz double %mul, %c
|
||
|
|
+ ret double %add
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+;; Check that fnmsub.d is not emitted.
|
||
|
|
+define double @not_fnmsub_d(double %a, double %b, double %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: not_fnmsub_d:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: not_fnmsub_d:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%nega = fneg double %a
|
||
|
|
@@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind {
|
||
|
|
; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d:
|
||
|
|
; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %negc = fneg contract double %c
|
||
|
|
+ %mul = fmul contract double %a, %b
|
||
|
|
+ %add = fadd contract double %mul, %negc
|
||
|
|
+ %neg = fneg contract double %add
|
||
|
|
+ ret double %neg
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %nega = fneg contract nsz double %a
|
||
|
|
+ %mul = fmul contract nsz double %nega, %b
|
||
|
|
+ %add = fadd contract nsz double %mul, %c
|
||
|
|
+ ret double %add
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+;; Check that fnmsub.d is not emitted.
|
||
|
|
+define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%nega = fneg contract double %a
|
||
|
|
%mul = fmul contract double %nega, %b
|
||
|
|
@@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind {
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%fma = call double @llvm.fma.f64(double %a, double %b, double %c)
|
||
|
|
- %neg = fneg double %fma
|
||
|
|
- ret double %neg
|
||
|
|
+ %negfma = fneg double %fma
|
||
|
|
+ ret double %negfma
|
||
|
|
}
|
||
|
|
|
||
|
|
define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind {
|
||
|
|
@@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind {
|
||
|
|
; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics:
|
||
|
|
; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %negc = fneg double %c
|
||
|
|
+ %fma = call double @llvm.fma.f64(double %a, double %b, double %negc)
|
||
|
|
+ %negfma = fneg double %fma
|
||
|
|
+ ret double %negfma
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%nega = fneg double %a
|
||
|
|
- %fma = call double @llvm.fma.f64(double %nega, double %b, double %c)
|
||
|
|
+ %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c)
|
||
|
|
ret double %fma
|
||
|
|
}
|
||
|
|
|
||
|
|
-define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind {
|
||
|
|
-; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics:
|
||
|
|
+;; Check that fnmsub.d is not emitted.
|
||
|
|
+define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics:
|
||
|
|
; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
-; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics:
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics:
|
||
|
|
; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
-; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics:
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics:
|
||
|
|
; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
-; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics:
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics:
|
||
|
|
; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
-; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics:
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics:
|
||
|
|
; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
-; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics:
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics:
|
||
|
|
; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
-; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
- %negb = fneg double %b
|
||
|
|
- %fma = call double @llvm.fma.f64(double %a, double %negb, double %c)
|
||
|
|
+ %nega = fneg double %a
|
||
|
|
+ %fma = call double @llvm.fma.f64(double %nega, double %b, double %c)
|
||
|
|
ret double %fma
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind {
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%mul = fmul contract double %a, %b
|
||
|
|
- %sub = fsub contract double %c, %mul
|
||
|
|
- ret double %sub
|
||
|
|
+ %negc = fneg contract double %c
|
||
|
|
+ %add = fadd contract double %negc, %mul
|
||
|
|
+ %negadd = fneg contract double %add
|
||
|
|
+ ret double %negadd
|
||
|
|
}
|
||
|
|
diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll
|
||
|
|
index 54dc56784006..c236255d971a 100644
|
||
|
|
--- a/llvm/test/CodeGen/LoongArch/float-fma.ll
|
||
|
|
+++ b/llvm/test/CodeGen/LoongArch/float-fma.ll
|
||
|
|
@@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind {
|
||
|
|
; LA32-CONTRACT-ON-LABEL: fnmsub_s:
|
||
|
|
; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
-; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
|
||
|
|
; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
;
|
||
|
|
; LA32-CONTRACT-OFF-LABEL: fnmsub_s:
|
||
|
|
; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
-; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
|
||
|
|
; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
;
|
||
|
|
; LA64-CONTRACT-FAST-LABEL: fnmsub_s:
|
||
|
|
@@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind {
|
||
|
|
; LA64-CONTRACT-ON-LABEL: fnmsub_s:
|
||
|
|
; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
-; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
|
||
|
|
; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
;
|
||
|
|
; LA64-CONTRACT-OFF-LABEL: fnmsub_s:
|
||
|
|
; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %negc = fneg float %c
|
||
|
|
+ %mul = fmul float %a, %b
|
||
|
|
+ %add = fadd float %mul, %negc
|
||
|
|
+ %neg = fneg float %add
|
||
|
|
+ ret float %neg
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %nega = fneg nsz float %a
|
||
|
|
+ %mul = fmul nsz float %nega, %b
|
||
|
|
+ %add = fadd nsz float %mul, %c
|
||
|
|
+ ret float %add
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+;; Check that fnmsub.s is not emitted.
|
||
|
|
+define float @not_fnmsub_s(float %a, float %b, float %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: not_fnmsub_s:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: not_fnmsub_s:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%nega = fneg float %a
|
||
|
|
@@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind {
|
||
|
|
; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s:
|
||
|
|
; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %negc = fneg contract float %c
|
||
|
|
+ %mul = fmul contract float %a, %b
|
||
|
|
+ %add = fadd contract float %mul, %negc
|
||
|
|
+ %neg = fneg contract float %add
|
||
|
|
+ ret float %neg
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %nega = fneg contract nsz float %a
|
||
|
|
+ %mul = fmul contract nsz float %nega, %b
|
||
|
|
+ %add = fadd contract nsz float %mul, %c
|
||
|
|
+ ret float %add
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+;; Check that fnmsub.s is not emitted.
|
||
|
|
+define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%nega = fneg contract float %a
|
||
|
|
%mul = fmul contract float %nega, %b
|
||
|
|
@@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind {
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%fma = call float @llvm.fma.f64(float %a, float %b, float %c)
|
||
|
|
- %neg = fneg float %fma
|
||
|
|
- ret float %neg
|
||
|
|
+ %negfma = fneg float %fma
|
||
|
|
+ ret float %negfma
|
||
|
|
}
|
||
|
|
|
||
|
|
define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind {
|
||
|
|
@@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind {
|
||
|
|
; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics:
|
||
|
|
; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
+ %negc = fneg float %c
|
||
|
|
+ %fma = call float @llvm.fma.f64(float %a, float %b, float %negc)
|
||
|
|
+ %negfma = fneg float %fma
|
||
|
|
+ ret float %negfma
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics:
|
||
|
|
+; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics:
|
||
|
|
+; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics:
|
||
|
|
+; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics:
|
||
|
|
+; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics:
|
||
|
|
+; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
+;
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics:
|
||
|
|
+; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%nega = fneg float %a
|
||
|
|
- %fma = call float @llvm.fma.f64(float %nega, float %b, float %c)
|
||
|
|
+ %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c)
|
||
|
|
ret float %fma
|
||
|
|
}
|
||
|
|
|
||
|
|
-define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind {
|
||
|
|
-; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics:
|
||
|
|
+;; Check that fnmsub.s is not emitted.
|
||
|
|
+define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind {
|
||
|
|
+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics:
|
||
|
|
; LA32-CONTRACT-FAST: # %bb.0:
|
||
|
|
-; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA32-CONTRACT-FAST-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics:
|
||
|
|
+; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics:
|
||
|
|
; LA32-CONTRACT-ON: # %bb.0:
|
||
|
|
-; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA32-CONTRACT-ON-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics:
|
||
|
|
+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics:
|
||
|
|
; LA32-CONTRACT-OFF: # %bb.0:
|
||
|
|
-; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA32-CONTRACT-OFF-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics:
|
||
|
|
+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics:
|
||
|
|
; LA64-CONTRACT-FAST: # %bb.0:
|
||
|
|
-; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-FAST-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics:
|
||
|
|
+; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics:
|
||
|
|
; LA64-CONTRACT-ON: # %bb.0:
|
||
|
|
-; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-ON-NEXT: ret
|
||
|
|
;
|
||
|
|
-; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics:
|
||
|
|
+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics:
|
||
|
|
; LA64-CONTRACT-OFF: # %bb.0:
|
||
|
|
-; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0
|
||
|
|
+; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
- %negb = fneg float %b
|
||
|
|
- %fma = call float @llvm.fma.f64(float %a, float %negb, float %c)
|
||
|
|
+ %nega = fneg float %a
|
||
|
|
+ %fma = call float @llvm.fma.f64(float %nega, float %b, float %c)
|
||
|
|
ret float %fma
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2
|
||
|
|
; LA64-CONTRACT-OFF-NEXT: ret
|
||
|
|
%mul = fmul contract float %a, %b
|
||
|
|
- %sub = fsub contract float %c, %mul
|
||
|
|
- ret float %sub
|
||
|
|
+ %negc = fneg contract float %c
|
||
|
|
+ %add = fadd contract float %negc, %mul
|
||
|
|
+ %negadd = fneg contract float %add
|
||
|
|
+ ret float %negadd
|
||
|
|
}
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 7a3bd125d9c1d0265b265ce238a88d0d4550e5a0 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Weining Lu <luweining@loongson.cn>
|
||
|
|
Date: Wed, 3 Jan 2024 13:59:12 +0800
|
||
|
|
Subject: [PATCH 13/14] [LoongArch] Fix the procossor series mask
|
||
|
|
|
||
|
|
Refer PRID_SERIES_MASK definition in linux kernel:
|
||
|
|
arch/loongarch/include/asm/cpu.h.
|
||
|
|
|
||
|
|
(cherry picked from commit 7e186d366d6c7def0543acc255931f617e76dff0)
|
||
|
|
---
|
||
|
|
llvm/lib/TargetParser/Host.cpp | 3 ++-
|
||
|
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
|
||
|
|
index 81309280a44b..d11dc605e188 100644
|
||
|
|
--- a/llvm/lib/TargetParser/Host.cpp
|
||
|
|
+++ b/llvm/lib/TargetParser/Host.cpp
|
||
|
|
@@ -1462,7 +1462,8 @@ StringRef sys::getHostCPUName() {
|
||
|
|
// Use processor id to detect cpu name.
|
||
|
|
uint32_t processor_id;
|
||
|
|
__asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
|
||
|
|
- switch (processor_id & 0xff00) {
|
||
|
|
+ // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h.
|
||
|
|
+ switch (processor_id & 0xf000) {
|
||
|
|
case 0xc000: // Loongson 64bit, 4-issue
|
||
|
|
return "la464";
|
||
|
|
// TODO: Others.
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|
||
|
|
|
||
|
|
From 3634ac4cbc475509c46521f5b8a3fcbeca6d06c7 Mon Sep 17 00:00:00 2001
|
||
|
|
From: wanglei <wanglei@loongson.cn>
|
||
|
|
Date: Mon, 11 Mar 2024 08:59:17 +0800
|
||
|
|
Subject: [PATCH 14/14] [LoongArch] Make sure that the LoongArchISD::BSTRINS
|
||
|
|
node uses the correct `MSB` value (#84454)
|
||
|
|
|
||
|
|
The `MSB` must not be greater than `GRLen`. Without this patch, newly
|
||
|
|
added test cases will crash with LoongArch32, resulting in a 'cannot
|
||
|
|
select' error.
|
||
|
|
|
||
|
|
(cherry picked from commit edd4c6c6dca4c556de22b2ab73d5bfc02d28e59b)
|
||
|
|
(cherry picked from commit d77c5c3830d925b3795e2f1535a6568399fe6626)
|
||
|
|
---
|
||
|
|
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 +++-
|
||
|
|
llvm/test/CodeGen/LoongArch/bstrins_w.ll | 13 +++++++++++++
|
||
|
|
2 files changed, 16 insertions(+), 1 deletion(-)
|
||
|
|
|
||
|
|
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
|
||
|
|
index ed106cb766bc..5affaf37ad5a 100644
|
||
|
|
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
|
||
|
|
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
|
||
|
|
@@ -2310,7 +2310,9 @@ Retry:
|
||
|
|
return DAG.getNode(
|
||
|
|
LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
|
||
|
|
DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
|
||
|
|
- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
|
||
|
|
+ DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
|
||
|
|
+ : (MaskIdx0 + MaskLen0 - 1),
|
||
|
|
+ DL, GRLenVT),
|
||
|
|
DAG.getConstant(MaskIdx0, DL, GRLenVT));
|
||
|
|
}
|
||
|
|
|
||
|
|
diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll
|
||
|
|
index dfbe000841cd..e008caacad2a 100644
|
||
|
|
--- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll
|
||
|
|
+++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll
|
||
|
|
@@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind {
|
||
|
|
ret i32 %or
|
||
|
|
}
|
||
|
|
|
||
|
|
+;; The high bits of `const` are zero.
|
||
|
|
+define i32 @pat5_high_zeros(i32 %a) nounwind {
|
||
|
|
+; CHECK-LABEL: pat5_high_zeros:
|
||
|
|
+; CHECK: # %bb.0:
|
||
|
|
+; CHECK-NEXT: lu12i.w $a1, 1
|
||
|
|
+; CHECK-NEXT: ori $a1, $a1, 564
|
||
|
|
+; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16
|
||
|
|
+; CHECK-NEXT: ret
|
||
|
|
+ %and = and i32 %a, 65535 ; 0x0000ffff
|
||
|
|
+ %or = or i32 %and, 305397760 ; 0x12340000
|
||
|
|
+ ret i32 %or
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
;; Pattern 6: a = b | ((c & mask) << shamt)
|
||
|
|
;; In this testcase b is 0x10000002, but in fact we do not require b being a
|
||
|
|
;; constant. As long as all positions in b to be overwritten by the incoming
|
||
|
|
--
|
||
|
|
2.20.1
|
||
|
|
|