Revert "Support stack clash protection"
This reverts commit 4f4298791f15f26e0649f57c6edfd999af51ec41.
This commit is contained in:
parent
b2ca14af7a
commit
f9af047c9f
@ -1,315 +0,0 @@
|
|||||||
From 7aeecae6393d5c3333beec64ad343ed1cabe75e4 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Matt Arsenault <Matthew.Arsenault@amd.com>
|
|
||||||
Date: Sat, 29 Jul 2023 19:12:24 -0400
|
|
||||||
Subject: [PATCH 1/7] GlobalISel: Don't expand stacksave/stackrestore in
|
|
||||||
IRTranslator
|
|
||||||
|
|
||||||
In some (likely invalid edge cases anyway), it's not correct to
|
|
||||||
directly copy the stack pointer register.
|
|
||||||
---
|
|
||||||
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 +
|
|
||||||
llvm/include/llvm/Support/TargetOpcodes.def | 6 +++
|
|
||||||
llvm/include/llvm/Target/GenericOpcodes.td | 12 ++++++
|
|
||||||
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 25 ++----------
|
|
||||||
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 26 +++++++++++++
|
|
||||||
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 4 +-
|
|
||||||
llvm/lib/Target/X86/X86LegalizerInfo.cpp | 4 ++
|
|
||||||
.../AArch64/GlobalISel/arm64-irtranslator.ll | 4 +-
|
|
||||||
.../GlobalISel/legalizer-info-validation.mir | 10 ++++-
|
|
||||||
.../GlobalISel/stacksave-stackrestore.ll | 35 +++++++++++++++++
|
|
||||||
.../X86/GlobalISel/stacksave-stackrestore.ll | 39 +++++++++++++++++++
|
|
||||||
11 files changed, 141 insertions(+), 26 deletions(-)
|
|
||||||
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
create mode 100644 llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
|
|
||||||
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
|
|
||||||
index a568edd0e640..9288091874cf 100644
|
|
||||||
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
|
|
||||||
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
|
|
||||||
@@ -401,6 +401,8 @@ public:
|
|
||||||
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerShuffleVector(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerDynStackAlloc(MachineInstr &MI);
|
|
||||||
+ LegalizeResult lowerStackSave(MachineInstr &MI);
|
|
||||||
+ LegalizeResult lowerStackRestore(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerExtract(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerInsert(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI);
|
|
||||||
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
|
|
||||||
index 186bea75ae96..c92ce6dc701c 100644
|
|
||||||
--- a/llvm/include/llvm/Support/TargetOpcodes.def
|
|
||||||
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
|
|
||||||
@@ -763,6 +763,12 @@ HANDLE_TARGET_OPCODE(G_JUMP_TABLE)
|
|
||||||
/// Generic dynamic stack allocation.
|
|
||||||
HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC)
|
|
||||||
|
|
||||||
+/// Generic stack pointer save.
|
|
||||||
+HANDLE_TARGET_OPCODE(G_STACKSAVE)
|
|
||||||
+
|
|
||||||
+/// Generic stack pointer restore.
|
|
||||||
+HANDLE_TARGET_OPCODE(G_STACKRESTORE)
|
|
||||||
+
|
|
||||||
/// Strict floating point instructions.
|
|
||||||
HANDLE_TARGET_OPCODE(G_STRICT_FADD)
|
|
||||||
HANDLE_TARGET_OPCODE(G_STRICT_FSUB)
|
|
||||||
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
|
|
||||||
index 00d56d1c4bd5..e8cfaeab3cd8 100644
|
|
||||||
--- a/llvm/include/llvm/Target/GenericOpcodes.td
|
|
||||||
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
|
|
||||||
@@ -225,6 +225,18 @@ def G_DYN_STACKALLOC : GenericInstruction {
|
|
||||||
let hasSideEffects = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
+def G_STACKSAVE : GenericInstruction {
|
|
||||||
+ let OutOperandList = (outs ptype0:$dst);
|
|
||||||
+ let InOperandList = (ins);
|
|
||||||
+ let hasSideEffects = true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+def G_STACKRESTORE : GenericInstruction {
|
|
||||||
+ let OutOperandList = (outs);
|
|
||||||
+ let InOperandList = (ins ptype0:$src);
|
|
||||||
+ let hasSideEffects = true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
def G_FREEZE : GenericInstruction {
|
|
||||||
let OutOperandList = (outs type0:$dst);
|
|
||||||
let InOperandList = (ins type0:$src);
|
|
||||||
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
|
|
||||||
index 9a67a8d05a4d..e4b837c6b8ce 100644
|
|
||||||
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
|
|
||||||
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
|
|
||||||
@@ -2229,31 +2229,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
case Intrinsic::stacksave: {
|
|
||||||
- // Save the stack pointer to the location provided by the intrinsic.
|
|
||||||
- Register Reg = getOrCreateVReg(CI);
|
|
||||||
- Register StackPtr = MF->getSubtarget()
|
|
||||||
- .getTargetLowering()
|
|
||||||
- ->getStackPointerRegisterToSaveRestore();
|
|
||||||
-
|
|
||||||
- // If the target doesn't specify a stack pointer, then fall back.
|
|
||||||
- if (!StackPtr)
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- MIRBuilder.buildCopy(Reg, StackPtr);
|
|
||||||
+ MIRBuilder.buildInstr(TargetOpcode::G_STACKSAVE, {getOrCreateVReg(CI)}, {});
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
case Intrinsic::stackrestore: {
|
|
||||||
- // Restore the stack pointer from the location provided by the intrinsic.
|
|
||||||
- Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
|
|
||||||
- Register StackPtr = MF->getSubtarget()
|
|
||||||
- .getTargetLowering()
|
|
||||||
- ->getStackPointerRegisterToSaveRestore();
|
|
||||||
-
|
|
||||||
- // If the target doesn't specify a stack pointer, then fall back.
|
|
||||||
- if (!StackPtr)
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- MIRBuilder.buildCopy(StackPtr, Reg);
|
|
||||||
+ MIRBuilder.buildInstr(TargetOpcode::G_STACKRESTORE, {},
|
|
||||||
+ {getOrCreateVReg(*CI.getArgOperand(0))});
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
case Intrinsic::cttz:
|
|
||||||
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
|
|
||||||
index f0da0d88140f..75d9789be4d0 100644
|
|
||||||
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
|
|
||||||
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
|
|
||||||
@@ -3503,6 +3503,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
|
|
||||||
return lowerShuffleVector(MI);
|
|
||||||
case G_DYN_STACKALLOC:
|
|
||||||
return lowerDynStackAlloc(MI);
|
|
||||||
+ case G_STACKSAVE:
|
|
||||||
+ return lowerStackSave(MI);
|
|
||||||
+ case G_STACKRESTORE:
|
|
||||||
+ return lowerStackRestore(MI);
|
|
||||||
case G_EXTRACT:
|
|
||||||
return lowerExtract(MI);
|
|
||||||
case G_INSERT:
|
|
||||||
@@ -6810,6 +6814,28 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
|
|
||||||
return Legalized;
|
|
||||||
}
|
|
||||||
|
|
||||||
+LegalizerHelper::LegalizeResult
|
|
||||||
+LegalizerHelper::lowerStackSave(MachineInstr &MI) {
|
|
||||||
+ Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
|
|
||||||
+ if (!StackPtr)
|
|
||||||
+ return UnableToLegalize;
|
|
||||||
+
|
|
||||||
+ MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
|
|
||||||
+ MI.eraseFromParent();
|
|
||||||
+ return Legalized;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+LegalizerHelper::LegalizeResult
|
|
||||||
+LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
|
|
||||||
+ Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
|
|
||||||
+ if (!StackPtr)
|
|
||||||
+ return UnableToLegalize;
|
|
||||||
+
|
|
||||||
+ MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
|
|
||||||
+ MI.eraseFromParent();
|
|
||||||
+ return Legalized;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
LegalizerHelper::LegalizeResult
|
|
||||||
LegalizerHelper::lowerExtract(MachineInstr &MI) {
|
|
||||||
auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
|
|
||||||
index d905da4eaec3..f0130a0be29d 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
|
|
||||||
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
|
|
||||||
@@ -797,7 +797,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
|
||||||
return Query.Types[0] == p0 && Query.Types[1] == s64;
|
|
||||||
});
|
|
||||||
|
|
||||||
- getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
|
|
||||||
+ getActionDefinitionsBuilder({G_DYN_STACKALLOC,
|
|
||||||
+ G_STACKSAVE,
|
|
||||||
+ G_STACKRESTORE}).lower();
|
|
||||||
|
|
||||||
if (ST.hasMOPS()) {
|
|
||||||
// G_BZERO is not supported. Currently it is only emitted by
|
|
||||||
diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
|
|
||||||
index a4a247f85f3d..104461cff0a9 100644
|
|
||||||
--- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp
|
|
||||||
+++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
|
|
||||||
@@ -528,6 +528,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
|
|
||||||
// memory intrinsics
|
|
||||||
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
|
|
||||||
|
|
||||||
+ getActionDefinitionsBuilder({G_DYN_STACKALLOC,
|
|
||||||
+ G_STACKSAVE,
|
|
||||||
+ G_STACKRESTORE}).lower();
|
|
||||||
+
|
|
||||||
// fp intrinsics
|
|
||||||
getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
|
|
||||||
.scalarize(0)
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
|
|
||||||
index 5f3544add398..575cd6b874e3 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
|
|
||||||
@@ -2392,8 +2392,8 @@ declare ptr @llvm.stacksave()
|
|
||||||
declare void @llvm.stackrestore(ptr)
|
|
||||||
define void @test_stacksaverestore() {
|
|
||||||
; CHECK-LABEL: name: test_stacksaverestore
|
|
||||||
- ; CHECK: [[SAVE:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
- ; CHECK-NEXT: $sp = COPY [[SAVE]](p0)
|
|
||||||
+ ; CHECK: [[SAVE:%[0-9]+]]:_(p0) = G_STACKSAVE
|
|
||||||
+ ; CHECK-NEXT: G_STACKRESTORE [[SAVE]]
|
|
||||||
; CHECK-NEXT: RET_ReallyLR
|
|
||||||
%sp = call ptr @llvm.stacksave()
|
|
||||||
call void @llvm.stackrestore(ptr %sp)
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
|
|
||||||
index b4fe73d29fa6..461161f5b338 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
|
|
||||||
@@ -641,7 +641,15 @@
|
|
||||||
# DEBUG-NEXT: G_JUMP_TABLE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
|
|
||||||
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
|
|
||||||
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
|
|
||||||
-# DEBUG-NEXT: G_DYN_STACKALLOC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
|
|
||||||
+# DEBUG-NEXT: G_DYN_STACKALLOC (opcode [[DYN_STACKALLOC:[0-9]+]]): 2 type indices, 0 imm indices
|
|
||||||
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
+# DEBUG-NEXT: G_STACKSAVE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
|
|
||||||
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to [[DYN_STACKALLOC]]
|
|
||||||
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
+# DEBUG-NEXT: G_STACKRESTORE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
|
|
||||||
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to [[DYN_STACKALLOC]]
|
|
||||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
# DEBUG-NEXT: G_STRICT_FADD (opcode {{[0-9]+}}): 1 type index, 0 imm indices
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000000..16bf85af9c17
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
@@ -0,0 +1,35 @@
|
|
||||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
||||||
+; RUN: llc -global-isel=1 -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
|
|
||||||
+
|
|
||||||
+declare void @use_addr(ptr)
|
|
||||||
+declare ptr @llvm.stacksave.p0()
|
|
||||||
+declare void @llvm.stackrestore.p0(ptr)
|
|
||||||
+
|
|
||||||
+define void @test_scoped_alloca(i64 %n) {
|
|
||||||
+; CHECK-LABEL: test_scoped_alloca:
|
|
||||||
+; CHECK: // %bb.0:
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
||||||
+; CHECK-NEXT: .cfi_offset w19, -16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -24
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -32
|
|
||||||
+; CHECK-NEXT: add x9, x0, #15
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
+; CHECK-NEXT: mov x19, sp
|
|
||||||
+; CHECK-NEXT: sub x0, x8, x9
|
|
||||||
+; CHECK-NEXT: mov sp, x0
|
|
||||||
+; CHECK-NEXT: bl use_addr
|
|
||||||
+; CHECK-NEXT: mov sp, x19
|
|
||||||
+; CHECK-NEXT: mov sp, x29
|
|
||||||
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+ %sp = call ptr @llvm.stacksave.p0()
|
|
||||||
+ %addr = alloca i8, i64 %n
|
|
||||||
+ call void @use_addr(ptr %addr)
|
|
||||||
+ call void @llvm.stackrestore.p0(ptr %sp)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
diff --git a/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000000..e86c04ee22db
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
@@ -0,0 +1,39 @@
|
|
||||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
||||||
+; RUN: llc -global-isel=1 -mtriple=x86_64-linux-gnu -o - %s | FileCheck %s
|
|
||||||
+
|
|
||||||
+declare void @use_addr(ptr)
|
|
||||||
+declare ptr @llvm.stacksave.p0()
|
|
||||||
+declare void @llvm.stackrestore.p0(ptr)
|
|
||||||
+
|
|
||||||
+define void @test_scoped_alloca(i64 %n) {
|
|
||||||
+; CHECK-LABEL: test_scoped_alloca:
|
|
||||||
+; CHECK: # %bb.0:
|
|
||||||
+; CHECK-NEXT: pushq %rbp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
||||||
+; CHECK-NEXT: movq %rsp, %rbp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
||||||
+; CHECK-NEXT: pushq %rbx
|
|
||||||
+; CHECK-NEXT: pushq %rax
|
|
||||||
+; CHECK-NEXT: .cfi_offset %rbx, -24
|
|
||||||
+; CHECK-NEXT: movq %rsp, %rbx
|
|
||||||
+; CHECK-NEXT: movq %rsp, %rax
|
|
||||||
+; CHECK-NEXT: imulq $1, %rdi, %rcx
|
|
||||||
+; CHECK-NEXT: addq $15, %rcx
|
|
||||||
+; CHECK-NEXT: andq $-16, %rcx
|
|
||||||
+; CHECK-NEXT: subq %rcx, %rax
|
|
||||||
+; CHECK-NEXT: movq %rax, %rsp
|
|
||||||
+; CHECK-NEXT: movq %rax, %rdi
|
|
||||||
+; CHECK-NEXT: callq use_addr
|
|
||||||
+; CHECK-NEXT: movq %rbx, %rsp
|
|
||||||
+; CHECK-NEXT: leaq -8(%rbp), %rsp
|
|
||||||
+; CHECK-NEXT: popq %rbx
|
|
||||||
+; CHECK-NEXT: popq %rbp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
|
||||||
+; CHECK-NEXT: retq
|
|
||||||
+ %sp = call ptr @llvm.stacksave.p0()
|
|
||||||
+ %addr = alloca i8, i64 %n
|
|
||||||
+ call void @use_addr(ptr %addr)
|
|
||||||
+ call void @llvm.stackrestore.p0(ptr %sp)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
--
|
|
||||||
2.42.0.windows.2
|
|
||||||
|
|
||||||
@ -1,546 +0,0 @@
|
|||||||
From 8db377e2a22d83637171008b6c8723f1869a2926 Mon Sep 17 00:00:00 2001
|
|
||||||
From: rickyleung <leung.wing.chung@huawei.com>
|
|
||||||
Date: Tue, 7 May 2024 21:24:49 +0800
|
|
||||||
Subject: [PATCH 3/7] [backport][AArch64] Refactor allocation of locals and
|
|
||||||
stack realignment
|
|
||||||
|
|
||||||
Reference: https://github.com/wc00862805aj/llvm-project/commit/dedf2c6bb5193652f6ad7d9ff9e676624c2485b7?
|
|
||||||
|
|
||||||
Factor out some stack allocation in a separate function. This patch
|
|
||||||
splits out the generic portion of a larger refactoring done as a part of
|
|
||||||
stack clash protection support.
|
|
||||||
|
|
||||||
The patch is almost, but not quite NFC. The only difference should
|
|
||||||
be that where we have adjacent allocation of stack space
|
|
||||||
for local SVE objects and non-local SVE objects the order
|
|
||||||
of `sub sp, ...` and `addvl sp, ...` instructions is reversed, because now
|
|
||||||
it's done with a single call to `emitFrameOffset` and it happens
|
|
||||||
add/subtract the fixed part before the scalable part, e.g.
|
|
||||||
|
|
||||||
addvl sp, sp, #-2
|
|
||||||
sub sp, sp, llvm#16, lsl llvm#12
|
|
||||||
sub sp, sp, llvm#16
|
|
||||||
|
|
||||||
becomes
|
|
||||||
|
|
||||||
sub sp, sp, llvm#16, lsl llvm#12
|
|
||||||
sub sp, sp, llvm#16
|
|
||||||
addvl sp, sp, #-2
|
|
||||||
---
|
|
||||||
.../Target/AArch64/AArch64FrameLowering.cpp | 114 +++++++++---------
|
|
||||||
.../lib/Target/AArch64/AArch64FrameLowering.h | 5 +
|
|
||||||
.../AArch64/framelayout-sve-basepointer.mir | 4 +-
|
|
||||||
.../framelayout-sve-fixed-width-access.mir | 2 +-
|
|
||||||
.../framelayout-sve-scavengingslot.mir | 4 +-
|
|
||||||
llvm/test/CodeGen/AArch64/framelayout-sve.mir | 54 ++++-----
|
|
||||||
.../AArch64/spill-stack-realignment.mir | 2 +-
|
|
||||||
llvm/test/CodeGen/AArch64/stack-guard-sve.ll | 4 +-
|
|
||||||
.../AArch64/sve-calling-convention-mixed.ll | 4 +-
|
|
||||||
.../CodeGen/AArch64/sve-fixed-length-fp128.ll | 4 +-
|
|
||||||
10 files changed, 103 insertions(+), 94 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
|
|
||||||
index 4d5676f34101..eeb6185fa36d 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
|
|
||||||
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
|
|
||||||
@@ -300,6 +300,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
|
|
||||||
static bool produceCompactUnwindFrame(MachineFunction &MF);
|
|
||||||
static bool needsWinCFI(const MachineFunction &MF);
|
|
||||||
static StackOffset getSVEStackSize(const MachineFunction &MF);
|
|
||||||
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
|
|
||||||
static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF);
|
|
||||||
|
|
||||||
/// Returns true if a homogeneous prolog or epilog code can be emitted
|
|
||||||
@@ -671,6 +672,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
|
|
||||||
emitCalleeSavedRestores(MBB, MBBI, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
+void AArch64FrameLowering::allocateStackSpace(
|
|
||||||
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
|
||||||
+ bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI,
|
|
||||||
+ bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const {
|
|
||||||
+
|
|
||||||
+ if (!AllocSize)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ DebugLoc DL;
|
|
||||||
+ MachineFunction &MF = *MBB.getParent();
|
|
||||||
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
|
|
||||||
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
|
|
||||||
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
|
|
||||||
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
||||||
+
|
|
||||||
+ Register TargetReg =
|
|
||||||
+ NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP;
|
|
||||||
+ // SUB Xd/SP, SP, AllocSize
|
|
||||||
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
|
|
||||||
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
|
|
||||||
+ EmitCFI, InitialOffset);
|
|
||||||
+
|
|
||||||
+ if (NeedsRealignment) {
|
|
||||||
+ const int64_t MaxAlign = MFI.getMaxAlign().value();
|
|
||||||
+ const uint64_t AndMask = ~(MaxAlign - 1);
|
|
||||||
+ // AND SP, Xd, 0b11111...0000
|
|
||||||
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
|
|
||||||
+ .addReg(TargetReg, RegState::Kill)
|
|
||||||
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
|
|
||||||
+ .setMIFlags(MachineInstr::FrameSetup);
|
|
||||||
+ AFI.setStackRealigned(true);
|
|
||||||
+
|
|
||||||
+ // No need for SEH instructions here; if we're realigning the stack,
|
|
||||||
+ // we've set a frame pointer and already finished the SEH prologue.
|
|
||||||
+ assert(!NeedsWinCFI);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
|
|
||||||
switch (Reg.id()) {
|
|
||||||
default:
|
|
||||||
@@ -1769,7 +1808,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
- StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
|
|
||||||
+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
|
|
||||||
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
|
|
||||||
|
|
||||||
// Process the SVE callee-saves to determine what space needs to be
|
|
||||||
@@ -1782,67 +1821,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
|
||||||
++MBBI;
|
|
||||||
CalleeSavesEnd = MBBI;
|
|
||||||
|
|
||||||
- AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
|
|
||||||
- AllocateAfter = SVEStackSize - AllocateBefore;
|
|
||||||
+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
|
|
||||||
+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate space for the callee saves (if any).
|
|
||||||
- emitFrameOffset(
|
|
||||||
- MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
|
|
||||||
- MachineInstr::FrameSetup, false, false, nullptr,
|
|
||||||
- EmitAsyncCFI && !HasFP && AllocateBefore,
|
|
||||||
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
|
|
||||||
+ StackOffset CFAOffset =
|
|
||||||
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
|
|
||||||
+ allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false,
|
|
||||||
+ nullptr, EmitAsyncCFI && !HasFP, CFAOffset);
|
|
||||||
+ CFAOffset += SVECalleeSavesSize;
|
|
||||||
|
|
||||||
if (EmitAsyncCFI)
|
|
||||||
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
|
|
||||||
|
|
||||||
- // Finally allocate remaining SVE stack space.
|
|
||||||
- emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
|
|
||||||
- -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
|
|
||||||
- nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
|
|
||||||
- AllocateBefore + StackOffset::getFixed(
|
|
||||||
- (int64_t)MFI.getStackSize() - NumBytes));
|
|
||||||
-
|
|
||||||
- // Allocate space for the rest of the frame.
|
|
||||||
- if (NumBytes) {
|
|
||||||
- unsigned scratchSPReg = AArch64::SP;
|
|
||||||
-
|
|
||||||
- if (NeedsRealignment) {
|
|
||||||
- scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
|
|
||||||
- assert(scratchSPReg != AArch64::NoRegister);
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- // If we're a leaf function, try using the red zone.
|
|
||||||
- if (!canUseRedZone(MF)) {
|
|
||||||
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
|
|
||||||
- // the correct value here, as NumBytes also includes padding bytes,
|
|
||||||
- // which shouldn't be counted here.
|
|
||||||
- emitFrameOffset(
|
|
||||||
- MBB, MBBI, DL, scratchSPReg, AArch64::SP,
|
|
||||||
- StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
|
|
||||||
- false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
|
|
||||||
- SVEStackSize +
|
|
||||||
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
|
|
||||||
- }
|
|
||||||
- if (NeedsRealignment) {
|
|
||||||
- assert(MFI.getMaxAlign() > Align(1));
|
|
||||||
- assert(scratchSPReg != AArch64::SP);
|
|
||||||
-
|
|
||||||
- // SUB X9, SP, NumBytes
|
|
||||||
- // -- X9 is temporary register, so shouldn't contain any live data here,
|
|
||||||
- // -- free to use. This is already produced by emitFrameOffset above.
|
|
||||||
- // AND SP, X9, 0b11111...0000
|
|
||||||
- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
|
|
||||||
-
|
|
||||||
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
|
|
||||||
- .addReg(scratchSPReg, RegState::Kill)
|
|
||||||
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
|
|
||||||
- AFI->setStackRealigned(true);
|
|
||||||
-
|
|
||||||
- // No need for SEH instructions here; if we're realigning the stack,
|
|
||||||
- // we've set a frame pointer and already finished the SEH prologue.
|
|
||||||
- assert(!NeedsWinCFI);
|
|
||||||
- }
|
|
||||||
+ // Allocate space for the rest of the frame including SVE locals. Align the
|
|
||||||
+ // stack as necessary.
|
|
||||||
+ assert(!(canUseRedZone(MF) && NeedsRealignment) &&
|
|
||||||
+ "Cannot use redzone with stack realignment");
|
|
||||||
+ if (!canUseRedZone(MF)) {
|
|
||||||
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
|
|
||||||
+ // the correct value here, as NumBytes also includes padding bytes,
|
|
||||||
+ // which shouldn't be counted here.
|
|
||||||
+ allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment,
|
|
||||||
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
|
|
||||||
+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
|
|
||||||
+ CFAOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we need a base pointer, set it up here. It's whatever the value of the
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
|
|
||||||
index 147b5c181be5..f3313f3b53ff 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
|
|
||||||
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
|
|
||||||
@@ -150,6 +150,11 @@ private:
|
|
||||||
MachineBasicBlock::iterator MBBI) const;
|
|
||||||
void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MBBI) const;
|
|
||||||
+ void allocateStackSpace(MachineBasicBlock &MBB,
|
|
||||||
+ MachineBasicBlock::iterator MBBI,
|
|
||||||
+ bool NeedsRealignment, StackOffset AllocSize,
|
|
||||||
+ bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
|
|
||||||
+ StackOffset InitialOffset) const;
|
|
||||||
|
|
||||||
/// Emit target zero call-used regs.
|
|
||||||
void emitZeroCallUsedRegs(BitVector RegsToZero,
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
|
|
||||||
index 623c0f240be4..265c474fbc5d 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
|
|
||||||
@@ -4,8 +4,8 @@
|
|
||||||
name: hasBasepointer
|
|
||||||
# CHECK-LABEL: name: hasBasepointer
|
|
||||||
# CHECK: bb.0:
|
|
||||||
-# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
+# CHECK: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
# CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
|
|
||||||
# CHECK: STRXui $x0, $x19, 0
|
|
||||||
tracksRegLiveness: true
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
|
|
||||||
index e367a380f8ba..35fd7ca77d5c 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
|
|
||||||
@@ -7,9 +7,9 @@
|
|
||||||
; CHECK: // %bb.0: // %entry
|
|
||||||
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
||||||
; CHECK-NEXT: mov x29, sp
|
|
||||||
+ ; CHECK-NEXT: sub sp, sp, #2064
|
|
||||||
; CHECK-NEXT: addvl sp, sp, #-32
|
|
||||||
; CHECK-NEXT: addvl sp, sp, #-28
|
|
||||||
- ; CHECK-NEXT: sub sp, sp, #2064
|
|
||||||
; CHECK-NEXT: ldr x8, [sp, #2048]
|
|
||||||
; CHECK-NEXT: addvl sp, sp, #31
|
|
||||||
; CHECK-NEXT: addvl sp, sp, #29
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
|
|
||||||
index d54f67634d02..680f9c335c25 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
|
|
||||||
@@ -4,9 +4,9 @@
|
|
||||||
name: LateScavengingSlot
|
|
||||||
# CHECK-LABEL: name: LateScavengingSlot
|
|
||||||
# CHECK: bb.0:
|
|
||||||
-# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12
|
|
||||||
+# CHECK: $sp = frame-setup SUBXri $sp, 8, 12
|
|
||||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0
|
|
||||||
# CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1
|
|
||||||
# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
|
|
||||||
index 7c87587c6dc4..8b657c95bfc7 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
|
|
||||||
@@ -60,10 +60,10 @@
|
|
||||||
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
|
|
||||||
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
-# CHECK-NEXT: CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
|
|
||||||
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
|
|
||||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
|
|
||||||
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32
|
|
||||||
@@ -77,7 +77,7 @@
|
|
||||||
# ASM-LABEL: test_allocate_sve:
|
|
||||||
# ASM: .cfi_def_cfa_offset 16
|
|
||||||
# ASM-NEXT: .cfi_offset w29, -16
|
|
||||||
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
|
|
||||||
+# ASM: .cfi_def_cfa_offset 32
|
|
||||||
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
|
|
||||||
# ASM: .cfi_def_cfa wsp, 32
|
|
||||||
# ASM: .cfi_def_cfa_offset 16
|
|
||||||
@@ -87,7 +87,7 @@
|
|
||||||
#
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
|
|
||||||
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
|
|
||||||
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
+# UNWINDINFO: DW_CFA_def_cfa_offset: +32
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
|
|
||||||
@@ -125,9 +125,9 @@ body: |
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w21, -16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
|
|
||||||
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
#
|
|
||||||
# CHECK-NEXT: $x20 = IMPLICIT_DEF
|
|
||||||
@@ -149,7 +149,7 @@ body: |
|
|
||||||
# ASM: .cfi_offset w20, -8
|
|
||||||
# ASM-NEXT: .cfi_offset w21, -16
|
|
||||||
# ASM-NEXT: .cfi_offset w29, -32
|
|
||||||
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
|
|
||||||
+# ASM: .cfi_def_cfa_offset 48
|
|
||||||
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG
|
|
||||||
#
|
|
||||||
# ASM: .cfi_def_cfa wsp, 48
|
|
||||||
@@ -164,7 +164,7 @@ body: |
|
|
||||||
# UNWINDINFO: DW_CFA_offset: reg20 -8
|
|
||||||
# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16
|
|
||||||
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32
|
|
||||||
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
+# UNWINDINFO: DW_CFA_def_cfa_offset: +48
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
#
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa: reg31 +48
|
|
||||||
@@ -205,9 +205,9 @@ body: |
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
|
|
||||||
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
|
|
||||||
-# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
|
|
||||||
+# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]]
|
|
||||||
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
|
|
||||||
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
|
|
||||||
# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
|
|
||||||
@@ -267,9 +267,9 @@ body: |
|
|
||||||
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
|
|
||||||
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
|
|
||||||
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
|
|
||||||
@@ -292,7 +292,7 @@ body: |
|
|
||||||
# ASM-LABEL: test_address_sve:
|
|
||||||
# ASM: .cfi_def_cfa_offset 16
|
|
||||||
# ASM-NEXT: .cfi_offset w29, -16
|
|
||||||
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
|
|
||||||
+# ASM: .cfi_def_cfa_offset 32
|
|
||||||
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG
|
|
||||||
#
|
|
||||||
# ASM: .cfi_def_cfa wsp, 32
|
|
||||||
@@ -302,7 +302,7 @@ body: |
|
|
||||||
#
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
|
|
||||||
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
|
|
||||||
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
+# UNWINDINFO: DW_CFA_def_cfa_offset: +32
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
#
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
|
|
||||||
@@ -353,8 +353,8 @@ body: |
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
|
|
||||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
|
|
||||||
|
|
||||||
# CHECK-NEXT: STR_ZXI $z0, $fp, -1
|
|
||||||
# CHECK-NEXT: STR_ZXI $z1, $fp, -2
|
|
||||||
@@ -429,9 +429,9 @@ body: |
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
|
||||||
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
|
|
||||||
# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
|
|
||||||
@@ -448,7 +448,7 @@ body: |
|
|
||||||
# ASM-LABEL: test_stack_arg_sve:
|
|
||||||
# ASM: .cfi_def_cfa_offset 16
|
|
||||||
# ASM-NEXT: .cfi_offset w29, -16
|
|
||||||
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
|
|
||||||
+# ASM: .cfi_def_cfa_offset 32
|
|
||||||
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
|
|
||||||
#
|
|
||||||
# ASM: .cfi_def_cfa wsp, 32
|
|
||||||
@@ -458,7 +458,7 @@ body: |
|
|
||||||
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
|
|
||||||
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
|
|
||||||
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
+# UNWINDINFO: DW_CFA_def_cfa_offset: +32
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
#
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
|
|
||||||
@@ -640,8 +640,8 @@ body: |
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -16
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
# CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
|
|
||||||
# CHECK-NEXT: STRXui $xzr, $x19, 0
|
|
||||||
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
|
|
||||||
@@ -863,9 +863,9 @@ body: |
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
-# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
|
|
||||||
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
+# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
|
|
||||||
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
|
|
||||||
@@ -916,7 +916,7 @@ body: |
|
|
||||||
# ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
|
|
||||||
# ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
|
|
||||||
# ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
|
|
||||||
-# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG
|
|
||||||
+# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 144 * VG
|
|
||||||
# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG
|
|
||||||
#
|
|
||||||
# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG
|
|
||||||
@@ -950,7 +950,7 @@ body: |
|
|
||||||
# UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
# UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
# UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +144, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
#
|
|
||||||
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
|
|
||||||
@@ -1031,9 +1031,9 @@ body: |
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
|
||||||
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
|
||||||
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
|
|
||||||
-# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
|
|
||||||
+# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -1
|
|
||||||
+# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]]
|
|
||||||
|
|
||||||
# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18
|
|
||||||
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
|
|
||||||
index 1b9411d07f43..f6fc627ac2d3 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
|
|
||||||
@@ -21,7 +21,7 @@ stack:
|
|
||||||
- { id: 1, size: 4, alignment: 4, local-offset: -68 }
|
|
||||||
|
|
||||||
# CHECK: body:
|
|
||||||
-# CHECK: $sp = ANDXri killed ${{x[0-9]+}}, 7865
|
|
||||||
+# CHECK: $sp = frame-setup ANDXri killed ${{x[0-9]+}}, 7865
|
|
||||||
# CHECK: STRSui $s0, $sp, 0
|
|
||||||
# CHECK: STRSui $s0, $fp, 7
|
|
||||||
body: |
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
|
|
||||||
index 1672a7eb8739..5acbb22bf1ab 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
|
|
||||||
@@ -148,9 +148,9 @@ entry:
|
|
||||||
|
|
||||||
; CHECK-LABEL: local_stack_alloc:
|
|
||||||
; CHECK: mov x29, sp
|
|
||||||
-; CHECK: addvl sp, sp, #-2
|
|
||||||
; CHECK: sub sp, sp, #16, lsl #12
|
|
||||||
; CHECK: sub sp, sp, #16
|
|
||||||
+; CHECK: addvl sp, sp, #-2
|
|
||||||
|
|
||||||
; Stack guard is placed below the SVE stack area (and above all fixed-width objects)
|
|
||||||
; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #8, lsl #12
|
|
||||||
@@ -198,9 +198,9 @@ entry:
|
|
||||||
|
|
||||||
; CHECK-LABEL: local_stack_alloc_strong:
|
|
||||||
; CHECK: mov x29, sp
|
|
||||||
-; CHECK: addvl sp, sp, #-3
|
|
||||||
; CHECK: sub sp, sp, #16, lsl #12
|
|
||||||
; CHECK: sub sp, sp, #16
|
|
||||||
+; CHECK: addvl sp, sp, #-3
|
|
||||||
|
|
||||||
; Stack guard is placed at the top of the SVE stack area
|
|
||||||
; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
|
|
||||||
index a97649523565..235364ac2321 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
|
|
||||||
@@ -56,8 +56,8 @@ define float @foo2(ptr %x0, ptr %x1) nounwind {
|
|
||||||
; CHECK-LABEL: foo2:
|
|
||||||
; CHECK: // %bb.0: // %entry
|
|
||||||
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
||||||
-; CHECK-NEXT: addvl sp, sp, #-4
|
|
||||||
; CHECK-NEXT: sub sp, sp, #16
|
|
||||||
+; CHECK-NEXT: addvl sp, sp, #-4
|
|
||||||
; CHECK-NEXT: ptrue p0.b
|
|
||||||
; CHECK-NEXT: add x8, sp, #16
|
|
||||||
; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
|
|
||||||
@@ -699,8 +699,8 @@ define void @verify_all_operands_are_initialised() {
|
|
||||||
; CHECK-LABEL: verify_all_operands_are_initialised:
|
|
||||||
; CHECK: // %bb.0:
|
|
||||||
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
||||||
-; CHECK-NEXT: addvl sp, sp, #-1
|
|
||||||
; CHECK-NEXT: sub sp, sp, #16
|
|
||||||
+; CHECK-NEXT: addvl sp, sp, #-1
|
|
||||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
|
|
||||||
; CHECK-NEXT: .cfi_offset w30, -8
|
|
||||||
; CHECK-NEXT: .cfi_offset w29, -16
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
|
|
||||||
index 31ff9287046c..b3529549c22b 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
|
|
||||||
@@ -9,8 +9,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
||||||
; CHECK: // %bb.0:
|
|
||||||
; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
|
|
||||||
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
|
|
||||||
-; CHECK-NEXT: addvl sp, sp, #-2
|
|
||||||
; CHECK-NEXT: sub sp, sp, #48
|
|
||||||
+; CHECK-NEXT: addvl sp, sp, #-2
|
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
|
||||||
; CHECK-NEXT: add x8, sp, #48
|
|
||||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
@@ -59,8 +59,8 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
||||||
; CHECK: // %bb.0:
|
|
||||||
; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
|
|
||||||
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
|
|
||||||
-; CHECK-NEXT: addvl sp, sp, #-2
|
|
||||||
; CHECK-NEXT: sub sp, sp, #128
|
|
||||||
+; CHECK-NEXT: addvl sp, sp, #-2
|
|
||||||
; CHECK-NEXT: ldr q1, [x0, #64]
|
|
||||||
; CHECK-NEXT: mov x19, x1
|
|
||||||
; CHECK-NEXT: ldr q0, [x0, #80]
|
|
||||||
--
|
|
||||||
2.42.0.windows.2
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,744 +0,0 @@
|
|||||||
From e433199a7dbe87324a671299f6509f19d295382f Mon Sep 17 00:00:00 2001
|
|
||||||
From: rickyleung <leung.wing.chung@huawei.com>
|
|
||||||
Date: Fri, 26 Apr 2024 16:59:48 +0800
|
|
||||||
Subject: [PATCH 5/7] [backport][AArch64] Stack probing for dynamic allocas in
|
|
||||||
SelectionDAG
|
|
||||||
|
|
||||||
Reference: https://github.com/llvm/llvm-project/commit/b1806e6a1f0589acc88499419531c4eb82488f1a
|
|
||||||
|
|
||||||
Add support for probing for dynamic allocas (variable-size objects and
|
|
||||||
outgoing stack arguments).
|
|
||||||
|
|
||||||
Co-authored-by: Oliver Stannard <oliver.stannard@linaro.org>
|
|
||||||
---
|
|
||||||
.../Target/AArch64/AArch64FrameLowering.cpp | 26 ++
|
|
||||||
.../Target/AArch64/AArch64ISelLowering.cpp | 152 +++++---
|
|
||||||
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 13 +-
|
|
||||||
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 +
|
|
||||||
.../stack-probing-dynamic-no-frame-setup.ll | 14 +
|
|
||||||
.../CodeGen/AArch64/stack-probing-dynamic.ll | 362 ++++++++++++++++++
|
|
||||||
6 files changed, 526 insertions(+), 55 deletions(-)
|
|
||||||
create mode 100644 llvm/test/CodeGen/AArch64/stack-probing-dynamic-no-frame-setup.ll
|
|
||||||
create mode 100644 llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
|
|
||||||
index af019ab23770..fe21173f531f 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
|
|
||||||
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
|
|
||||||
@@ -462,6 +462,11 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
|
|
||||||
/// included as part of the stack frame.
|
|
||||||
bool
|
|
||||||
AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
|
||||||
+ // The stack probing code for the dynamically allocated outgoing arguments
|
|
||||||
+ // area assumes that the stack is probed at the top - either by the prologue
|
|
||||||
+ // code, which issues a probe if `hasVarSizedObjects` return true, or by the
|
|
||||||
+ // most recent variable-sized object allocation. Changing the condition here
|
|
||||||
+ // may need to be followed up by changes to the probe issuing logic.
|
|
||||||
return !MF.getFrameInfo().hasVarSizedObjects();
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -470,6 +475,9 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
|
|
||||||
MachineBasicBlock::iterator I) const {
|
|
||||||
const AArch64InstrInfo *TII =
|
|
||||||
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
|
|
||||||
+ const AArch64TargetLowering *TLI =
|
|
||||||
+ MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
|
|
||||||
+ MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
||||||
DebugLoc DL = I->getDebugLoc();
|
|
||||||
unsigned Opc = I->getOpcode();
|
|
||||||
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
|
|
||||||
@@ -496,6 +504,24 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
|
|
||||||
// Most call frames will be allocated at the start of a function so
|
|
||||||
// this is OK, but it is a limitation that needs dealing with.
|
|
||||||
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
|
|
||||||
+
|
|
||||||
+ if (TLI->hasInlineStackProbe(MF) &&
|
|
||||||
+ -Amount >= AArch64::StackProbeMaxUnprobedStack) {
|
|
||||||
+ // When stack probing is enabled, the decrement of SP may need to be
|
|
||||||
+ // probed. We only need to do this if the call site needs 1024 bytes of
|
|
||||||
+ // space or more, because a region smaller than that is allowed to be
|
|
||||||
+ // unprobed at an ABI boundary. We rely on the fact that SP has been
|
|
||||||
+ // probed exactly at this point, either by the prologue or most recent
|
|
||||||
+ // dynamic allocation.
|
|
||||||
+ assert(MFI.hasVarSizedObjects() &&
|
|
||||||
+ "non-reserved call frame without var sized objects?");
|
|
||||||
+ Register ScratchReg =
|
|
||||||
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
|
|
||||||
+ inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
|
|
||||||
+ } else {
|
|
||||||
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
|
|
||||||
+ StackOffset::getFixed(Amount), TII);
|
|
||||||
+ }
|
|
||||||
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
|
|
||||||
StackOffset::getFixed(Amount), TII);
|
|
||||||
}
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
|
|
||||||
index 082043420fb9..eff0722e1c77 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
|
|
||||||
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
|
|
||||||
@@ -556,10 +556,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|
||||||
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
|
|
||||||
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
|
|
||||||
|
|
||||||
- if (Subtarget->isTargetWindows())
|
|
||||||
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
|
|
||||||
- else
|
|
||||||
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
|
|
||||||
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
|
|
||||||
|
|
||||||
// Constant pool entries
|
|
||||||
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
|
|
||||||
@@ -2288,6 +2285,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|
||||||
MAKE_CASE(AArch64ISD::CSINC)
|
|
||||||
MAKE_CASE(AArch64ISD::THREAD_POINTER)
|
|
||||||
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
|
|
||||||
+ MAKE_CASE(AArch64ISD::PROBED_ALLOCA)
|
|
||||||
MAKE_CASE(AArch64ISD::ABDS_PRED)
|
|
||||||
MAKE_CASE(AArch64ISD::ABDU_PRED)
|
|
||||||
MAKE_CASE(AArch64ISD::HADDS_PRED)
|
|
||||||
@@ -2646,6 +2644,22 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
|
|
||||||
return BB;
|
|
||||||
}
|
|
||||||
|
|
||||||
+MachineBasicBlock *
|
|
||||||
+AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI,
|
|
||||||
+ MachineBasicBlock *MBB) const {
|
|
||||||
+ MachineFunction &MF = *MBB->getParent();
|
|
||||||
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
|
|
||||||
+ DebugLoc DL = MBB->findDebugLoc(MBBI);
|
|
||||||
+ const AArch64InstrInfo &TII =
|
|
||||||
+ *MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
|
|
||||||
+ Register TargetReg = MI.getOperand(0).getReg();
|
|
||||||
+ MachineBasicBlock::iterator NextInst =
|
|
||||||
+ TII.probedStackAlloc(MBBI, TargetReg, false);
|
|
||||||
+
|
|
||||||
+ MI.eraseFromParent();
|
|
||||||
+ return NextInst->getParent();
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
MachineBasicBlock *
|
|
||||||
AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
|
|
||||||
MachineInstr &MI,
|
|
||||||
@@ -2774,6 +2788,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
|
|
||||||
|
|
||||||
case AArch64::CATCHRET:
|
|
||||||
return EmitLoweredCatchRet(MI, BB);
|
|
||||||
+ case AArch64::PROBED_STACKALLOC_DYN:
|
|
||||||
+ return EmitDynamicProbedAlloc(MI, BB);
|
|
||||||
case AArch64::LD1_MXIPXX_H_PSEUDO_B:
|
|
||||||
return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
|
|
||||||
case AArch64::LD1_MXIPXX_H_PSEUDO_H:
|
|
||||||
@@ -13666,9 +13682,34 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
|
|
||||||
AN->getMemOperand());
|
|
||||||
}
|
|
||||||
|
|
||||||
-SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
|
|
||||||
- SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
|
|
||||||
+SDValue
|
|
||||||
+AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
|
|
||||||
+ SelectionDAG &DAG) const {
|
|
||||||
+
|
|
||||||
SDLoc dl(Op);
|
|
||||||
+ // Get the inputs.
|
|
||||||
+ SDNode *Node = Op.getNode();
|
|
||||||
+ SDValue Chain = Op.getOperand(0);
|
|
||||||
+ SDValue Size = Op.getOperand(1);
|
|
||||||
+ MaybeAlign Align =
|
|
||||||
+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
|
|
||||||
+ EVT VT = Node->getValueType(0);
|
|
||||||
+
|
|
||||||
+ if (DAG.getMachineFunction().getFunction().hasFnAttribute(
|
|
||||||
+ "no-stack-arg-probe")) {
|
|
||||||
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
|
|
||||||
+ Chain = SP.getValue(1);
|
|
||||||
+ SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
|
|
||||||
+ if (Align)
|
|
||||||
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
|
|
||||||
+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
|
|
||||||
+ Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
|
|
||||||
+ SDValue Ops[2] = {SP, Chain};
|
|
||||||
+ return DAG.getMergeValues(Ops, dl);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
|
|
||||||
+
|
|
||||||
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
|
||||||
SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
|
|
||||||
PtrVT, 0);
|
|
||||||
@@ -13692,7 +13733,59 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
|
|
||||||
|
|
||||||
Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
|
|
||||||
DAG.getConstant(4, dl, MVT::i64));
|
|
||||||
- return Chain;
|
|
||||||
+
|
|
||||||
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
|
|
||||||
+ Chain = SP.getValue(1);
|
|
||||||
+ SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
|
|
||||||
+ if (Align)
|
|
||||||
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
|
|
||||||
+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
|
|
||||||
+ Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
|
|
||||||
+
|
|
||||||
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
|
|
||||||
+
|
|
||||||
+ SDValue Ops[2] = {SP, Chain};
|
|
||||||
+ return DAG.getMergeValues(Ops, dl);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SDValue
|
|
||||||
+AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(SDValue Op,
|
|
||||||
+ SelectionDAG &DAG) const {
|
|
||||||
+ // Get the inputs.
|
|
||||||
+ SDNode *Node = Op.getNode();
|
|
||||||
+ SDValue Chain = Op.getOperand(0);
|
|
||||||
+ SDValue Size = Op.getOperand(1);
|
|
||||||
+
|
|
||||||
+ MaybeAlign Align =
|
|
||||||
+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
|
|
||||||
+ SDLoc dl(Op);
|
|
||||||
+ EVT VT = Node->getValueType(0);
|
|
||||||
+
|
|
||||||
+ // Construct the new SP value in a GPR.
|
|
||||||
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
|
|
||||||
+ Chain = SP.getValue(1);
|
|
||||||
+ SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
|
|
||||||
+ if (Align)
|
|
||||||
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
|
|
||||||
+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
|
|
||||||
+
|
|
||||||
+ // Set the real SP to the new value with a probing loop.
|
|
||||||
+ Chain = DAG.getNode(AArch64ISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
|
|
||||||
+ SDValue Ops[2] = {SP, Chain};
|
|
||||||
+ return DAG.getMergeValues(Ops, dl);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SDValue
|
|
||||||
+AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|
||||||
+ SelectionDAG &DAG) const {
|
|
||||||
+ MachineFunction &MF = DAG.getMachineFunction();
|
|
||||||
+
|
|
||||||
+ if (Subtarget->isTargetWindows())
|
|
||||||
+ return LowerWindowsDYNAMIC_STACKALLOC(Op, DAG);
|
|
||||||
+ else if (hasInlineStackProbe(MF))
|
|
||||||
+ return LowerInlineDYNAMIC_STACKALLOC(Op, DAG);
|
|
||||||
+ else
|
|
||||||
+ return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
// When x and y are extended, lower:
|
|
||||||
@@ -13746,51 +13839,6 @@ SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG,
|
|
||||||
return DAG.getNode(ISD::ADD, dl, VT, Add, tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
-SDValue
|
|
||||||
-AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|
||||||
- SelectionDAG &DAG) const {
|
|
||||||
- assert(Subtarget->isTargetWindows() &&
|
|
||||||
- "Only Windows alloca probing supported");
|
|
||||||
- SDLoc dl(Op);
|
|
||||||
- // Get the inputs.
|
|
||||||
- SDNode *Node = Op.getNode();
|
|
||||||
- SDValue Chain = Op.getOperand(0);
|
|
||||||
- SDValue Size = Op.getOperand(1);
|
|
||||||
- MaybeAlign Align =
|
|
||||||
- cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
|
|
||||||
- EVT VT = Node->getValueType(0);
|
|
||||||
-
|
|
||||||
- if (DAG.getMachineFunction().getFunction().hasFnAttribute(
|
|
||||||
- "no-stack-arg-probe")) {
|
|
||||||
- SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
|
|
||||||
- Chain = SP.getValue(1);
|
|
||||||
- SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
|
|
||||||
- if (Align)
|
|
||||||
- SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
|
|
||||||
- DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
|
|
||||||
- Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
|
|
||||||
- SDValue Ops[2] = {SP, Chain};
|
|
||||||
- return DAG.getMergeValues(Ops, dl);
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
|
|
||||||
-
|
|
||||||
- Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
|
|
||||||
-
|
|
||||||
- SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
|
|
||||||
- Chain = SP.getValue(1);
|
|
||||||
- SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
|
|
||||||
- if (Align)
|
|
||||||
- SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
|
|
||||||
- DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
|
|
||||||
- Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
|
|
||||||
-
|
|
||||||
- Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
|
|
||||||
-
|
|
||||||
- SDValue Ops[2] = {SP, Chain};
|
|
||||||
- return DAG.getMergeValues(Ops, dl);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
|
|
||||||
SelectionDAG &DAG) const {
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
|
|
||||||
index 643d363e234a..9b388c7f8668 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
|
|
||||||
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
|
|
||||||
@@ -90,6 +90,10 @@ enum NodeType : unsigned {
|
|
||||||
ADC,
|
|
||||||
SBC, // adc, sbc instructions
|
|
||||||
|
|
||||||
+ // To avoid stack clash, allocation is performed by block and each block is
|
|
||||||
+ // probed.
|
|
||||||
+ PROBED_ALLOCA,
|
|
||||||
+
|
|
||||||
// Predicated instructions where inactive lanes produce undefined results.
|
|
||||||
ABDS_PRED,
|
|
||||||
ABDU_PRED,
|
|
||||||
@@ -610,6 +614,9 @@ public:
|
|
||||||
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
|
|
||||||
MachineBasicBlock *BB) const;
|
|
||||||
|
|
||||||
+ MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
|
|
||||||
+ MachineBasicBlock *MBB) const;
|
|
||||||
+
|
|
||||||
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
|
|
||||||
MachineInstr &MI,
|
|
||||||
MachineBasicBlock *BB) const;
|
|
||||||
@@ -1113,10 +1120,10 @@ private:
|
|
||||||
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
+ SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
+ SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
- SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
|
|
||||||
- SDValue &Size,
|
|
||||||
- SelectionDAG &DAG) const;
|
|
||||||
+
|
|
||||||
SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
|
|
||||||
|
|
||||||
SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
|
|
||||||
index 09980c2f45e6..9b9103e01d67 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
|
|
||||||
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
|
|
||||||
@@ -818,6 +818,12 @@ def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain,
|
|
||||||
def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
|
||||||
|
|
||||||
def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
|
|
||||||
+
|
|
||||||
+def AArch64probedalloca
|
|
||||||
+ : SDNode<"AArch64ISD::PROBED_ALLOCA",
|
|
||||||
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
|
|
||||||
+ [SDNPHasChain, SDNPMayStore]>;
|
|
||||||
+
|
|
||||||
def AArch64mrs : SDNode<"AArch64ISD::MRS",
|
|
||||||
SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
|
|
||||||
[SDNPHasChain, SDNPOutGlue]>;
|
|
||||||
@@ -908,6 +914,14 @@ def PROBED_STACKALLOC_VAR : Pseudo<(outs),
|
|
||||||
[]>,
|
|
||||||
Sched<[]>;
|
|
||||||
|
|
||||||
+// Probed stack allocations of a variable size, used for allocas of unknown size
|
|
||||||
+// when stack-clash protection is enabled.
|
|
||||||
+let usesCustomInserter = 1 in
|
|
||||||
+def PROBED_STACKALLOC_DYN : Pseudo<(outs),
|
|
||||||
+ (ins GPR64common:$target),
|
|
||||||
+ [(AArch64probedalloca GPR64common:$target)]>,
|
|
||||||
+ Sched<[]>;
|
|
||||||
+
|
|
||||||
} // Defs = [SP, NZCV], Uses = [SP] in
|
|
||||||
} // hasSideEffects = 1, isCodeGenOnly = 1
|
|
||||||
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic-no-frame-setup.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic-no-frame-setup.ll
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000000..673f9038a35f
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic-no-frame-setup.ll
|
|
||||||
@@ -0,0 +1,14 @@
|
|
||||||
+; RUN: llc --stop-after=finalize-isel -o - | FileCheck %s
|
|
||||||
+target triple = "aarch64-linux"
|
|
||||||
+
|
|
||||||
+; Check dynamic stack allocation and probing instructions do not have
|
|
||||||
+; the FrameSetup flag.
|
|
||||||
+
|
|
||||||
+; CHECK-NOT: frame-setup
|
|
||||||
+define void @no_frame_setup(i64 %size, ptr %out) #0 {
|
|
||||||
+ %v = alloca i8, i64 %size, align 1
|
|
||||||
+ store ptr %v, ptr %out, align 8
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000000..4d9ef77f7a0d
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
@@ -0,0 +1,362 @@
|
|
||||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
||||||
+; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s
|
|
||||||
+
|
|
||||||
+; Dynamically-sized allocation, needs a loop which can handle any size at
|
|
||||||
+; runtime. The final iteration of the loop will temporarily put SP below the
|
|
||||||
+; target address, but this doesn't break any of the ABI constraints on the
|
|
||||||
+; stack, and also doesn't probe below the target SP value.
|
|
||||||
+define void @dynamic(i64 %size, ptr %out) #0 {
|
|
||||||
+; CHECK-LABEL: dynamic:
|
|
||||||
+; CHECK: // %bb.0:
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -8
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -16
|
|
||||||
+; CHECK-NEXT: add x9, x0, #15
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
+; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
+; CHECK-NEXT: cmp sp, x8
|
|
||||||
+; CHECK-NEXT: b.le .LBB0_3
|
|
||||||
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB0_1 Depth=1
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: b .LBB0_1
|
|
||||||
+; CHECK-NEXT: .LBB0_3:
|
|
||||||
+; CHECK-NEXT: mov sp, x8
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: str x8, [x1]
|
|
||||||
+; CHECK-NEXT: mov sp, x29
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
||||||
+; CHECK-NEXT: .cfi_restore w30
|
|
||||||
+; CHECK-NEXT: .cfi_restore w29
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+ %v = alloca i8, i64 %size, align 1
|
|
||||||
+ store ptr %v, ptr %out, align 8
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+; This function has a fixed-size stack slot and a dynamic one. The fixed size
|
|
||||||
+; slot isn't large enough that we would normally probe it, but we need to do so
|
|
||||||
+; here otherwise the gap between the CSR save and the first probe of the
|
|
||||||
+; dynamic allocation could be too far apart when the size of the dynamic
|
|
||||||
+; allocation is close to the guard size.
|
|
||||||
+define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
|
|
||||||
+; CHECK-LABEL: dynamic_fixed:
|
|
||||||
+; CHECK: // %bb.0:
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -8
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -16
|
|
||||||
+; CHECK-NEXT: str xzr, [sp, #-64]!
|
|
||||||
+; CHECK-NEXT: add x9, x0, #15
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
+; CHECK-NEXT: sub x10, x29, #64
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
+; CHECK-NEXT: str x10, [x1]
|
|
||||||
+; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
+; CHECK-NEXT: cmp sp, x8
|
|
||||||
+; CHECK-NEXT: b.le .LBB1_3
|
|
||||||
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB1_1 Depth=1
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: b .LBB1_1
|
|
||||||
+; CHECK-NEXT: .LBB1_3:
|
|
||||||
+; CHECK-NEXT: mov sp, x8
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: str x8, [x2]
|
|
||||||
+; CHECK-NEXT: mov sp, x29
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
||||||
+; CHECK-NEXT: .cfi_restore w30
|
|
||||||
+; CHECK-NEXT: .cfi_restore w29
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+ %v1 = alloca i8, i64 64, align 1
|
|
||||||
+ store ptr %v1, ptr %out1, align 8
|
|
||||||
+ %v2 = alloca i8, i64 %size, align 1
|
|
||||||
+ store ptr %v2, ptr %out2, align 8
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+; Dynamic allocation, with an alignment requirement greater than the alignment
|
|
||||||
+; of SP. Done by ANDing the target SP with a constant to align it down, then
|
|
||||||
+; doing the loop as normal. Note that we also re-align the stack in the prolog,
|
|
||||||
+; which isn't actually needed because the only aligned allocations are dynamic,
|
|
||||||
+; this is done even without stack probing.
|
|
||||||
+define void @dynamic_align_64(i64 %size, ptr %out) #0 {
|
|
||||||
+; CHECK-LABEL: dynamic_align_64:
|
|
||||||
+; CHECK: // %bb.0:
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
||||||
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
||||||
+; CHECK-NEXT: .cfi_offset w19, -16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -24
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -32
|
|
||||||
+; CHECK-NEXT: sub x9, sp, #32
|
|
||||||
+; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
|
|
||||||
+; CHECK-NEXT: add x9, x0, #15
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
+; CHECK-NEXT: mov x19, sp
|
|
||||||
+; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
|
|
||||||
+; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
+; CHECK-NEXT: cmp sp, x8
|
|
||||||
+; CHECK-NEXT: b.le .LBB2_3
|
|
||||||
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: b .LBB2_1
|
|
||||||
+; CHECK-NEXT: .LBB2_3:
|
|
||||||
+; CHECK-NEXT: mov sp, x8
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: str x8, [x1]
|
|
||||||
+; CHECK-NEXT: mov sp, x29
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa wsp, 32
|
|
||||||
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
||||||
+; CHECK-NEXT: .cfi_restore w19
|
|
||||||
+; CHECK-NEXT: .cfi_restore w30
|
|
||||||
+; CHECK-NEXT: .cfi_restore w29
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+ %v = alloca i8, i64 %size, align 64
|
|
||||||
+ store ptr %v, ptr %out, align 8
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+; Dynamic allocation, with an alignment greater than the stack guard size. The
|
|
||||||
+; only difference to the dynamic allocation is the constant used for aligning
|
|
||||||
+; the target SP, the loop will probe the whole allocation without needing to
|
|
||||||
+; know about the alignment padding.
|
|
||||||
+define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
|
|
||||||
+; CHECK-LABEL: dynamic_align_8192:
|
|
||||||
+; CHECK: // %bb.0:
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
||||||
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
||||||
+; CHECK-NEXT: .cfi_offset w19, -16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -24
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -32
|
|
||||||
+; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096
|
|
||||||
+; CHECK-NEXT: sub x9, x9, #4064
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
|
|
||||||
+; CHECK-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
+; CHECK-NEXT: cmp sp, x9
|
|
||||||
+; CHECK-NEXT: b.le .LBB3_3
|
|
||||||
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB3_1 Depth=1
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: b .LBB3_1
|
|
||||||
+; CHECK-NEXT: .LBB3_3:
|
|
||||||
+; CHECK-NEXT: mov sp, x9
|
|
||||||
+; CHECK-NEXT: add x9, x0, #15
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
+; CHECK-NEXT: mov x19, sp
|
|
||||||
+; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
|
|
||||||
+; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
+; CHECK-NEXT: cmp sp, x8
|
|
||||||
+; CHECK-NEXT: b.le .LBB3_6
|
|
||||||
+; CHECK-NEXT: // %bb.5: // in Loop: Header=BB3_4 Depth=1
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: b .LBB3_4
|
|
||||||
+; CHECK-NEXT: .LBB3_6:
|
|
||||||
+; CHECK-NEXT: mov sp, x8
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: str x8, [x1]
|
|
||||||
+; CHECK-NEXT: mov sp, x29
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa wsp, 32
|
|
||||||
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
||||||
+; CHECK-NEXT: .cfi_restore w19
|
|
||||||
+; CHECK-NEXT: .cfi_restore w30
|
|
||||||
+; CHECK-NEXT: .cfi_restore w29
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+ %v = alloca i8, i64 %size, align 8192
|
|
||||||
+ store ptr %v, ptr %out, align 8
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+; For 64k guard pages, the only difference is the constant subtracted from SP
|
|
||||||
+; in the loop.
|
|
||||||
+define void @dynamic_64k_guard(i64 %size, ptr %out) #0 "stack-probe-size"="65536" {
|
|
||||||
+; CHECK-LABEL: dynamic_64k_guard:
|
|
||||||
+; CHECK: // %bb.0:
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -8
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -16
|
|
||||||
+; CHECK-NEXT: add x9, x0, #15
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
+; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: .LBB4_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
|
|
||||||
+; CHECK-NEXT: cmp sp, x8
|
|
||||||
+; CHECK-NEXT: b.le .LBB4_3
|
|
||||||
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB4_1 Depth=1
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: b .LBB4_1
|
|
||||||
+; CHECK-NEXT: .LBB4_3:
|
|
||||||
+; CHECK-NEXT: mov sp, x8
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: str x8, [x1]
|
|
||||||
+; CHECK-NEXT: mov sp, x29
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
||||||
+; CHECK-NEXT: .cfi_restore w30
|
|
||||||
+; CHECK-NEXT: .cfi_restore w29
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+ %v = alloca i8, i64 %size, align 1
|
|
||||||
+ store ptr %v, ptr %out, align 8
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+; If a function has variable-sized stack objects, then any function calls which
|
|
||||||
+; need to pass arguments on the stack must allocate the stack space for them
|
|
||||||
+; dynamically, to ensure they are at the bottom of the frame. We need to probe
|
|
||||||
+; that space when it is larger than the unprobed space allowed by the ABI (1024
|
|
||||||
+; bytes), so this needs a very large number of arguments.
|
|
||||||
+define void @no_reserved_call_frame(i64 %n) #0 {
|
|
||||||
+; CHECK-LABEL: no_reserved_call_frame:
|
|
||||||
+; CHECK: // %bb.0: // %entry
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -8
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -16
|
|
||||||
+; CHECK-NEXT: lsl x9, x0, #2
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
+; CHECK-NEXT: add x9, x9, #15
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
+; CHECK-NEXT: sub x0, x8, x9
|
|
||||||
+; CHECK-NEXT: .LBB5_1: // %entry
|
|
||||||
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
+; CHECK-NEXT: cmp sp, x0
|
|
||||||
+; CHECK-NEXT: b.le .LBB5_3
|
|
||||||
+; CHECK-NEXT: // %bb.2: // %entry
|
|
||||||
+; CHECK-NEXT: // in Loop: Header=BB5_1 Depth=1
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: b .LBB5_1
|
|
||||||
+; CHECK-NEXT: .LBB5_3: // %entry
|
|
||||||
+; CHECK-NEXT: mov sp, x0
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1104
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: bl callee_stack_args
|
|
||||||
+; CHECK-NEXT: add sp, sp, #1104
|
|
||||||
+; CHECK-NEXT: mov sp, x29
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
||||||
+; CHECK-NEXT: .cfi_restore w30
|
|
||||||
+; CHECK-NEXT: .cfi_restore w29
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+entry:
|
|
||||||
+ %v = alloca i32, i64 %n
|
|
||||||
+ call void @callee_stack_args(ptr %v, [138 x i64] undef)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+; Same as above but without a variable-sized allocation, so the reserved call
|
|
||||||
+; frame can be folded into the fixed-size allocation in the prologue.
|
|
||||||
+define void @reserved_call_frame(i64 %n) #0 {
|
|
||||||
+; CHECK-LABEL: reserved_call_frame:
|
|
||||||
+; CHECK: // %bb.0: // %entry
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
||||||
+; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
||||||
+; CHECK-NEXT: .cfi_offset w28, -16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -24
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -32
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1504
|
|
||||||
+; CHECK-NEXT: add x0, sp, #1104
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: bl callee_stack_args
|
|
||||||
+; CHECK-NEXT: add sp, sp, #1504
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa wsp, 32
|
|
||||||
+; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
||||||
+; CHECK-NEXT: .cfi_restore w28
|
|
||||||
+; CHECK-NEXT: .cfi_restore w30
|
|
||||||
+; CHECK-NEXT: .cfi_restore w29
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+entry:
|
|
||||||
+ %v = alloca i32, i64 100
|
|
||||||
+ call void @callee_stack_args(ptr %v, [138 x i64] undef)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+declare void @callee_stack_args(ptr, [138 x i64])
|
|
||||||
+
|
|
||||||
+; Dynamic allocation of SVE vectors
|
|
||||||
+define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" {
|
|
||||||
+; CHECK-LABEL: dynamic_sve:
|
|
||||||
+; CHECK: // %bb.0:
|
|
||||||
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
||||||
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: mov x29, sp
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
||||||
+; CHECK-NEXT: .cfi_offset w19, -16
|
|
||||||
+; CHECK-NEXT: .cfi_offset w30, -24
|
|
||||||
+; CHECK-NEXT: .cfi_offset w29, -32
|
|
||||||
+; CHECK-NEXT: rdvl x9, #1
|
|
||||||
+; CHECK-NEXT: mov x10, #15 // =0xf
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
+; CHECK-NEXT: madd x9, x0, x9, x10
|
|
||||||
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
+; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
+; CHECK-NEXT: cmp sp, x8
|
|
||||||
+; CHECK-NEXT: b.le .LBB7_3
|
|
||||||
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: b .LBB7_1
|
|
||||||
+; CHECK-NEXT: .LBB7_3:
|
|
||||||
+; CHECK-NEXT: mov sp, x8
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: str x8, [x1]
|
|
||||||
+; CHECK-NEXT: mov sp, x29
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa wsp, 32
|
|
||||||
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
||||||
+; CHECK-NEXT: .cfi_restore w19
|
|
||||||
+; CHECK-NEXT: .cfi_restore w30
|
|
||||||
+; CHECK-NEXT: .cfi_restore w29
|
|
||||||
+; CHECK-NEXT: ret
|
|
||||||
+ %v = alloca <vscale x 4 x float>, i64 %size, align 16
|
|
||||||
+ store ptr %v, ptr %out, align 8
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }
|
|
||||||
\ No newline at end of file
|
|
||||||
--
|
|
||||||
2.42.0.windows.2
|
|
||||||
|
|
||||||
@ -1,496 +0,0 @@
|
|||||||
From dbca022577e0da1f411ee84143d59c6c9d941969 Mon Sep 17 00:00:00 2001
|
|
||||||
From: rickyleung <leung.wing.chung@huawei.com>
|
|
||||||
Date: Fri, 26 Apr 2024 17:29:18 +0800
|
|
||||||
Subject: [PATCH 6/7] [backport][AArch64] Stack probing for dynamic allocas in
|
|
||||||
GlobalISel
|
|
||||||
|
|
||||||
Reference: https://github.com/llvm/llvm-project/commit/c1140d49ec3363bf903e4c1dbf7a3f5e8c1b6523
|
|
||||||
|
|
||||||
Co-authored-by: Oliver Stannard <oliver.stannard@linaro.org>
|
|
||||||
---
|
|
||||||
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 +
|
|
||||||
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 37 ++-
|
|
||||||
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 47 +++-
|
|
||||||
.../AArch64/GISel/AArch64LegalizerInfo.h | 1 +
|
|
||||||
.../GlobalISel/legalize-dyn-alloca.mir | 255 ++++++++++++++----
|
|
||||||
.../GlobalISel/legalizer-info-validation.mir | 7 +
|
|
||||||
.../CodeGen/AArch64/stack-probing-dynamic.ll | 3 +-
|
|
||||||
7 files changed, 284 insertions(+), 68 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
|
|
||||||
index 9288091874cf..7abbd1f03f16 100644
|
|
||||||
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
|
|
||||||
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
|
|
||||||
@@ -400,6 +400,8 @@ public:
|
|
||||||
LegalizeResult lowerUnmergeValues(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerShuffleVector(MachineInstr &MI);
|
|
||||||
+ Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize,
|
|
||||||
+ Align Alignment, LLT PtrTy);
|
|
||||||
LegalizeResult lowerDynStackAlloc(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerStackSave(MachineInstr &MI);
|
|
||||||
LegalizeResult lowerStackRestore(MachineInstr &MI);
|
|
||||||
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
|
|
||||||
index 75d9789be4d0..5557456e706d 100644
|
|
||||||
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
|
|
||||||
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
|
|
||||||
@@ -6777,21 +6777,12 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
|
|
||||||
return Legalized;
|
|
||||||
}
|
|
||||||
|
|
||||||
-LegalizerHelper::LegalizeResult
|
|
||||||
-LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
|
|
||||||
- const auto &MF = *MI.getMF();
|
|
||||||
- const auto &TFI = *MF.getSubtarget().getFrameLowering();
|
|
||||||
- if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
|
|
||||||
- return UnableToLegalize;
|
|
||||||
-
|
|
||||||
- Register Dst = MI.getOperand(0).getReg();
|
|
||||||
- Register AllocSize = MI.getOperand(1).getReg();
|
|
||||||
- Align Alignment = assumeAligned(MI.getOperand(2).getImm());
|
|
||||||
-
|
|
||||||
- LLT PtrTy = MRI.getType(Dst);
|
|
||||||
+Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
|
|
||||||
+ Register AllocSize,
|
|
||||||
+ Align Alignment,
|
|
||||||
+ LLT PtrTy) {
|
|
||||||
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
|
|
||||||
|
|
||||||
- Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
|
|
||||||
auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
|
|
||||||
SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
|
|
||||||
|
|
||||||
@@ -6806,7 +6797,25 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
|
|
||||||
Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
|
|
||||||
}
|
|
||||||
|
|
||||||
- SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
|
|
||||||
+ return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+LegalizerHelper::LegalizeResult
|
|
||||||
+LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
|
|
||||||
+ const auto &MF = *MI.getMF();
|
|
||||||
+ const auto &TFI = *MF.getSubtarget().getFrameLowering();
|
|
||||||
+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
|
|
||||||
+ return UnableToLegalize;
|
|
||||||
+
|
|
||||||
+ Register Dst = MI.getOperand(0).getReg();
|
|
||||||
+ Register AllocSize = MI.getOperand(1).getReg();
|
|
||||||
+ Align Alignment = assumeAligned(MI.getOperand(2).getImm());
|
|
||||||
+
|
|
||||||
+ LLT PtrTy = MRI.getType(Dst);
|
|
||||||
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
|
|
||||||
+ Register SPTmp =
|
|
||||||
+ getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
|
|
||||||
+
|
|
||||||
MIRBuilder.buildCopy(SPReg, SPTmp);
|
|
||||||
MIRBuilder.buildCopy(Dst, SPTmp);
|
|
||||||
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
|
|
||||||
index f0130a0be29d..0dd2b4d48dd6 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
|
|
||||||
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
|
|
||||||
@@ -797,9 +797,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
|
||||||
return Query.Types[0] == p0 && Query.Types[1] == s64;
|
|
||||||
});
|
|
||||||
|
|
||||||
- getActionDefinitionsBuilder({G_DYN_STACKALLOC,
|
|
||||||
- G_STACKSAVE,
|
|
||||||
- G_STACKRESTORE}).lower();
|
|
||||||
+ getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
|
|
||||||
+
|
|
||||||
+ getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
|
|
||||||
|
|
||||||
if (ST.hasMOPS()) {
|
|
||||||
// G_BZERO is not supported. Currently it is only emitted by
|
|
||||||
@@ -993,6 +993,8 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
|
|
||||||
return legalizeMemOps(MI, Helper);
|
|
||||||
case TargetOpcode::G_FCOPYSIGN:
|
|
||||||
return legalizeFCopySign(MI, Helper);
|
|
||||||
+ case TargetOpcode::G_DYN_STACKALLOC:
|
|
||||||
+ return legalizeDynStackAlloc(MI, Helper);
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm_unreachable("expected switch to return");
|
|
||||||
@@ -1689,3 +1691,42 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
|
|
||||||
MI.eraseFromParent();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+bool AArch64LegalizerInfo::legalizeDynStackAlloc(
|
|
||||||
+ MachineInstr &MI, LegalizerHelper &Helper) const {
|
|
||||||
+ MachineFunction &MF = *MI.getParent()->getParent();
|
|
||||||
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
|
||||||
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
||||||
+
|
|
||||||
+ // If stack probing is not enabled for this function, use the default
|
|
||||||
+ // lowering.
|
|
||||||
+ if (!MF.getFunction().hasFnAttribute("probe-stack") ||
|
|
||||||
+ MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
|
|
||||||
+ "inline-asm") {
|
|
||||||
+ Helper.lowerDynStackAlloc(MI);
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ Register Dst = MI.getOperand(0).getReg();
|
|
||||||
+ Register AllocSize = MI.getOperand(1).getReg();
|
|
||||||
+ Align Alignment = assumeAligned(MI.getOperand(2).getImm());
|
|
||||||
+
|
|
||||||
+ assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
|
|
||||||
+ "Unexpected type for dynamic alloca");
|
|
||||||
+ assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
|
|
||||||
+ "Unexpected type for dynamic alloca");
|
|
||||||
+
|
|
||||||
+ LLT PtrTy = MRI.getType(Dst);
|
|
||||||
+ Register SPReg =
|
|
||||||
+ Helper.getTargetLowering().getStackPointerRegisterToSaveRestore();
|
|
||||||
+ Register SPTmp =
|
|
||||||
+ Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
|
|
||||||
+ auto NewMI =
|
|
||||||
+ MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
|
|
||||||
+ MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
|
|
||||||
+ MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
|
|
||||||
+ MIRBuilder.buildCopy(Dst, SPTmp);
|
|
||||||
+
|
|
||||||
+ MI.eraseFromParent();
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
|
|
||||||
index c10f6e071ed4..94484ea59d15 100644
|
|
||||||
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
|
|
||||||
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
|
|
||||||
@@ -58,6 +58,7 @@ private:
|
|
||||||
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
|
|
||||||
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
|
|
||||||
bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const;
|
|
||||||
+ bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;
|
|
||||||
const AArch64Subtarget *ST;
|
|
||||||
};
|
|
||||||
} // End llvm namespace.
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir
|
|
||||||
index e9188fb89f69..882c7468e70f 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir
|
|
||||||
@@ -19,6 +19,21 @@
|
|
||||||
ret i128* %addr
|
|
||||||
}
|
|
||||||
|
|
||||||
+ define i8* @test_simple_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" {
|
|
||||||
+ %addr = alloca i8, i32 %numelts
|
|
||||||
+ ret i8* %addr
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ define i8* @test_aligned_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" {
|
|
||||||
+ %addr = alloca i8, i32 %numelts, align 32
|
|
||||||
+ ret i8* %addr
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ define i128* @test_natural_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" {
|
|
||||||
+ %addr = alloca i128, i32 %numelts
|
|
||||||
+ ret i128* %addr
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
...
|
|
||||||
---
|
|
||||||
name: test_simple_alloca
|
|
||||||
@@ -37,22 +52,23 @@ body: |
|
|
||||||
|
|
||||||
; CHECK-LABEL: name: test_simple_alloca
|
|
||||||
; CHECK: liveins: $w0
|
|
||||||
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
|
||||||
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
|
|
||||||
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
- ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
|
|
||||||
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
- ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
- ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
- ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
- ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[SUB]](s64)
|
|
||||||
- ; CHECK: $sp = COPY [[INTTOPTR]](p0)
|
|
||||||
- ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
- ; CHECK: $x0 = COPY [[COPY2]](p0)
|
|
||||||
- ; CHECK: RET_ReallyLR implicit $x0
|
|
||||||
+ ; CHECK-NEXT: {{ $}}
|
|
||||||
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
|
||||||
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
|
|
||||||
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
|
|
||||||
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[SUB]](s64)
|
|
||||||
+ ; CHECK-NEXT: $sp = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0)
|
|
||||||
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
|
|
||||||
%0:_(s32) = COPY $w0
|
|
||||||
%3:_(s64) = G_CONSTANT i64 1
|
|
||||||
%1:_(s64) = G_ZEXT %0(s32)
|
|
||||||
@@ -83,24 +99,25 @@ body: |
|
|
||||||
|
|
||||||
; CHECK-LABEL: name: test_aligned_alloca
|
|
||||||
; CHECK: liveins: $w0
|
|
||||||
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
|
||||||
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
|
|
||||||
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
- ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
|
|
||||||
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
- ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
- ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
- ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -32
|
|
||||||
- ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]]
|
|
||||||
- ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND1]](s64)
|
|
||||||
- ; CHECK: $sp = COPY [[INTTOPTR]](p0)
|
|
||||||
- ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
- ; CHECK: $x0 = COPY [[COPY2]](p0)
|
|
||||||
- ; CHECK: RET_ReallyLR implicit $x0
|
|
||||||
+ ; CHECK-NEXT: {{ $}}
|
|
||||||
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
|
||||||
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
|
|
||||||
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
|
|
||||||
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -32
|
|
||||||
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]]
|
|
||||||
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND1]](s64)
|
|
||||||
+ ; CHECK-NEXT: $sp = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0)
|
|
||||||
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
|
|
||||||
%0:_(s32) = COPY $w0
|
|
||||||
%3:_(s64) = G_CONSTANT i64 1
|
|
||||||
%1:_(s64) = G_ZEXT %0(s32)
|
|
||||||
@@ -131,22 +148,23 @@ body: |
|
|
||||||
|
|
||||||
; CHECK-LABEL: name: test_natural_alloca
|
|
||||||
; CHECK: liveins: $w0
|
|
||||||
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
|
||||||
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
|
|
||||||
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
- ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
|
|
||||||
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
- ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
- ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
- ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
- ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[SUB]](s64)
|
|
||||||
- ; CHECK: $sp = COPY [[INTTOPTR]](p0)
|
|
||||||
- ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
- ; CHECK: $x0 = COPY [[COPY2]](p0)
|
|
||||||
- ; CHECK: RET_ReallyLR implicit $x0
|
|
||||||
+ ; CHECK-NEXT: {{ $}}
|
|
||||||
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
|
||||||
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
|
|
||||||
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
|
|
||||||
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[SUB]](s64)
|
|
||||||
+ ; CHECK-NEXT: $sp = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0)
|
|
||||||
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
|
|
||||||
%0:_(s32) = COPY $w0
|
|
||||||
%3:_(s64) = G_CONSTANT i64 16
|
|
||||||
%1:_(s64) = G_ZEXT %0(s32)
|
|
||||||
@@ -160,3 +178,140 @@ body: |
|
|
||||||
RET_ReallyLR implicit $x0
|
|
||||||
|
|
||||||
...
|
|
||||||
+---
|
|
||||||
+name: test_simple_alloca_stack_probing
|
|
||||||
+alignment: 4
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$w0' }
|
|
||||||
+frameInfo:
|
|
||||||
+ maxAlignment: 1
|
|
||||||
+stack:
|
|
||||||
+ - { id: 0, name: addr, type: variable-sized, alignment: 1 }
|
|
||||||
+machineFunctionInfo: {}
|
|
||||||
+body: |
|
|
||||||
+ bb.1 (%ir-block.0):
|
|
||||||
+ liveins: $w0
|
|
||||||
+ ; CHECK-LABEL: name: test_simple_alloca_stack_probing
|
|
||||||
+ ; CHECK: liveins: $w0
|
|
||||||
+ ; CHECK-NEXT: {{ $}}
|
|
||||||
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
|
||||||
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s64)
|
|
||||||
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[SHL]], [[C1]]
|
|
||||||
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[SUB]](s64)
|
|
||||||
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit-def $nzcv, implicit $sp
|
|
||||||
+ ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0)
|
|
||||||
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
|
|
||||||
+ %0:_(s32) = COPY $w0
|
|
||||||
+ %1:_(s64) = G_ZEXT %0(s32)
|
|
||||||
+ %9:_(s64) = G_CONSTANT i64 0
|
|
||||||
+ %2:_(s64) = G_SHL %1, %9(s64)
|
|
||||||
+ %4:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ %5:_(s64) = nuw G_ADD %2, %4
|
|
||||||
+ %6:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ %7:_(s64) = G_AND %5, %6
|
|
||||||
+ %8:_(p0) = G_DYN_STACKALLOC %7(s64), 1
|
|
||||||
+ $x0 = COPY %8(p0)
|
|
||||||
+ RET_ReallyLR implicit $x0
|
|
||||||
+...
|
|
||||||
+---
|
|
||||||
+name: test_aligned_alloca_stack_probing
|
|
||||||
+alignment: 4
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$w0' }
|
|
||||||
+frameInfo:
|
|
||||||
+ maxAlignment: 32
|
|
||||||
+stack:
|
|
||||||
+ - { id: 0, name: addr, type: variable-sized, alignment: 32 }
|
|
||||||
+machineFunctionInfo: {}
|
|
||||||
+body: |
|
|
||||||
+ bb.1 (%ir-block.0):
|
|
||||||
+ liveins: $w0
|
|
||||||
+ ; CHECK-LABEL: name: test_aligned_alloca_stack_probing
|
|
||||||
+ ; CHECK: liveins: $w0
|
|
||||||
+ ; CHECK-NEXT: {{ $}}
|
|
||||||
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
|
||||||
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s64)
|
|
||||||
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[SHL]], [[C1]]
|
|
||||||
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -32
|
|
||||||
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]]
|
|
||||||
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[AND1]](s64)
|
|
||||||
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit-def $nzcv, implicit $sp
|
|
||||||
+ ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0)
|
|
||||||
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
|
|
||||||
+ %0:_(s32) = COPY $w0
|
|
||||||
+ %1:_(s64) = G_ZEXT %0(s32)
|
|
||||||
+ %9:_(s64) = G_CONSTANT i64 0
|
|
||||||
+ %2:_(s64) = G_SHL %1, %9(s64)
|
|
||||||
+ %4:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ %5:_(s64) = nuw G_ADD %2, %4
|
|
||||||
+ %6:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ %7:_(s64) = G_AND %5, %6
|
|
||||||
+ %8:_(p0) = G_DYN_STACKALLOC %7(s64), 32
|
|
||||||
+ $x0 = COPY %8(p0)
|
|
||||||
+ RET_ReallyLR implicit $x0
|
|
||||||
+...
|
|
||||||
+---
|
|
||||||
+name: test_natural_alloca_stack_probing
|
|
||||||
+alignment: 4
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$w0' }
|
|
||||||
+frameInfo:
|
|
||||||
+ maxAlignment: 1
|
|
||||||
+stack:
|
|
||||||
+ - { id: 0, name: addr, type: variable-sized, alignment: 1 }
|
|
||||||
+machineFunctionInfo: {}
|
|
||||||
+body: |
|
|
||||||
+ bb.1 (%ir-block.0):
|
|
||||||
+ liveins: $w0
|
|
||||||
+ ; CHECK-LABEL: name: test_natural_alloca_stack_probing
|
|
||||||
+ ; CHECK: liveins: $w0
|
|
||||||
+ ; CHECK-NEXT: {{ $}}
|
|
||||||
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
|
||||||
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
|
|
||||||
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
|
|
||||||
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s64)
|
|
||||||
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[SHL]], [[C1]]
|
|
||||||
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
|
|
||||||
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp
|
|
||||||
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0)
|
|
||||||
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]]
|
|
||||||
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[SUB]](s64)
|
|
||||||
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0)
|
|
||||||
+ ; CHECK-NEXT: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit-def $nzcv, implicit $sp
|
|
||||||
+ ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0)
|
|
||||||
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
|
|
||||||
+ %0:_(s32) = COPY $w0
|
|
||||||
+ %1:_(s64) = G_ZEXT %0(s32)
|
|
||||||
+ %9:_(s64) = G_CONSTANT i64 4
|
|
||||||
+ %2:_(s64) = G_SHL %1, %9(s64)
|
|
||||||
+ %4:_(s64) = G_CONSTANT i64 15
|
|
||||||
+ %5:_(s64) = nuw G_ADD %2, %4
|
|
||||||
+ %6:_(s64) = G_CONSTANT i64 -16
|
|
||||||
+ %7:_(s64) = G_AND %5, %6
|
|
||||||
+ %8:_(p0) = G_DYN_STACKALLOC %7(s64), 1
|
|
||||||
+ $x0 = COPY %8(p0)
|
|
||||||
+ RET_ReallyLR implicit $x0
|
|
||||||
+...
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
|
|
||||||
index 461161f5b338..efae9b66b53d 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
|
|
||||||
@@ -652,6 +652,13 @@
|
|
||||||
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to [[DYN_STACKALLOC]]
|
|
||||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
+# DEBUG-NEXT: G_STACKSAVE (opcode [[STACKSAVE:[0-9]+]]): 1 type index, 0 imm indices
|
|
||||||
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
+# DEBUG-NEXT: G_STACKRESTORE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
|
|
||||||
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to [[STACKSAVE]]
|
|
||||||
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
|
||||||
# DEBUG-NEXT: G_STRICT_FADD (opcode {{[0-9]+}}): 1 type index, 0 imm indices
|
|
||||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
|
|
||||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
index 4d9ef77f7a0d..ad9cdbe92b23 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
@@ -1,5 +1,6 @@
|
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
||||||
-; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s
|
|
||||||
+; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s
|
|
||||||
+; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s
|
|
||||||
|
|
||||||
; Dynamically-sized allocation, needs a loop which can handle any size at
|
|
||||||
; runtime. The final iteration of the loop will temporarily put SP below the
|
|
||||||
--
|
|
||||||
2.42.0.windows.2
|
|
||||||
|
|
||||||
@ -1,177 +0,0 @@
|
|||||||
From 9425ee5f8608ff8611628d83386f61950d7fff85 Mon Sep 17 00:00:00 2001
|
|
||||||
From: rickyleung <leung.wing.chung@huawei.com>
|
|
||||||
Date: Tue, 7 May 2024 21:37:03 +0800
|
|
||||||
Subject: [PATCH 7/7] Update testcase for stack clash protection backport
|
|
||||||
|
|
||||||
---
|
|
||||||
.../GlobalISel/legalize-dyn-alloca.mir | 3 +-
|
|
||||||
.../GlobalISel/stacksave-stackrestore.ll | 14 ++++++----
|
|
||||||
.../CodeGen/AArch64/stack-probing-dynamic.ll | 16 ++++++-----
|
|
||||||
.../AArch64/stack-probing-last-in-block.mir | 4 +--
|
|
||||||
.../X86/GlobalISel/stacksave-stackrestore.ll | 28 +++++++++++--------
|
|
||||||
5 files changed, 36 insertions(+), 29 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir
|
|
||||||
index 882c7468e70f..82781cebc55a 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir
|
|
||||||
@@ -313,5 +313,4 @@ body: |
|
|
||||||
%7:_(s64) = G_AND %5, %6
|
|
||||||
%8:_(p0) = G_DYN_STACKALLOC %7(s64), 1
|
|
||||||
$x0 = COPY %8(p0)
|
|
||||||
- RET_ReallyLR implicit $x0
|
|
||||||
-...
|
|
||||||
\ No newline at end of file
|
|
||||||
+ RET_ReallyLR implicit $x0
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
index 16bf85af9c17..97ecca0bd77b 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
@@ -15,14 +15,18 @@ define void @test_scoped_alloca(i64 %n) {
|
|
||||||
; CHECK-NEXT: .cfi_offset w19, -16
|
|
||||||
; CHECK-NEXT: .cfi_offset w30, -24
|
|
||||||
; CHECK-NEXT: .cfi_offset w29, -32
|
|
||||||
-; CHECK-NEXT: add x9, x0, #15
|
|
||||||
+; CHECK-NEXT: mov x19, x0
|
|
||||||
+; CHECK-NEXT: bl llvm.stacksave.p0
|
|
||||||
+; CHECK-NEXT: add x9, x19, #15
|
|
||||||
; CHECK-NEXT: mov x8, sp
|
|
||||||
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
-; CHECK-NEXT: mov x19, sp
|
|
||||||
-; CHECK-NEXT: sub x0, x8, x9
|
|
||||||
-; CHECK-NEXT: mov sp, x0
|
|
||||||
+; CHECK-NEXT: mov x19, x0
|
|
||||||
+; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: mov sp, x8
|
|
||||||
+; CHECK-NEXT: mov x0, x8
|
|
||||||
; CHECK-NEXT: bl use_addr
|
|
||||||
-; CHECK-NEXT: mov sp, x19
|
|
||||||
+; CHECK-NEXT: mov x0, x19
|
|
||||||
+; CHECK-NEXT: bl llvm.stackrestore.p0
|
|
||||||
; CHECK-NEXT: mov sp, x29
|
|
||||||
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
|
||||||
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
index ad9cdbe92b23..3cbcf7749b2a 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
|
|
||||||
@@ -59,10 +59,10 @@ define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
|
|
||||||
; CHECK-NEXT: str xzr, [sp, #-64]!
|
|
||||||
; CHECK-NEXT: add x9, x0, #15
|
|
||||||
; CHECK-NEXT: mov x8, sp
|
|
||||||
-; CHECK-NEXT: sub x10, x29, #64
|
|
||||||
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
-; CHECK-NEXT: str x10, [x1]
|
|
||||||
+; CHECK-NEXT: sub x10, x29, #64
|
|
||||||
; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: str x10, [x1]
|
|
||||||
; CHECK-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
; CHECK-NEXT: cmp sp, x8
|
|
||||||
@@ -108,10 +108,10 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
|
|
||||||
; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
|
|
||||||
; CHECK-NEXT: add x9, x0, #15
|
|
||||||
; CHECK-NEXT: mov x8, sp
|
|
||||||
-; CHECK-NEXT: str xzr, [sp]
|
|
||||||
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
-; CHECK-NEXT: mov x19, sp
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: mov x19, sp
|
|
||||||
; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
|
|
||||||
; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
@@ -167,10 +167,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
|
|
||||||
; CHECK-NEXT: mov sp, x9
|
|
||||||
; CHECK-NEXT: add x9, x0, #15
|
|
||||||
; CHECK-NEXT: mov x8, sp
|
|
||||||
-; CHECK-NEXT: str xzr, [sp]
|
|
||||||
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
-; CHECK-NEXT: mov x19, sp
|
|
||||||
+; CHECK-NEXT: str xzr, [sp]
|
|
||||||
; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
+; CHECK-NEXT: mov x19, sp
|
|
||||||
; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
|
|
||||||
; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
|
|
||||||
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
||||||
@@ -268,8 +268,10 @@ define void @no_reserved_call_frame(i64 %n) #0 {
|
|
||||||
; CHECK-NEXT: str xzr, [sp]
|
|
||||||
; CHECK-NEXT: sub sp, sp, #1104
|
|
||||||
; CHECK-NEXT: str xzr, [sp]
|
|
||||||
+; CHECK-NEXT: sub sp, sp, #1104
|
|
||||||
; CHECK-NEXT: bl callee_stack_args
|
|
||||||
; CHECK-NEXT: add sp, sp, #1104
|
|
||||||
+; CHECK-NEXT: add sp, sp, #1104
|
|
||||||
; CHECK-NEXT: mov sp, x29
|
|
||||||
; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
||||||
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
||||||
@@ -331,8 +333,8 @@ define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" {
|
|
||||||
; CHECK-NEXT: .cfi_offset w29, -32
|
|
||||||
; CHECK-NEXT: rdvl x9, #1
|
|
||||||
; CHECK-NEXT: mov x10, #15 // =0xf
|
|
||||||
-; CHECK-NEXT: mov x8, sp
|
|
||||||
; CHECK-NEXT: madd x9, x0, x9, x10
|
|
||||||
+; CHECK-NEXT: mov x8, sp
|
|
||||||
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
||||||
; CHECK-NEXT: sub x8, x8, x9
|
|
||||||
; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
|
|
||||||
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir b/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir
|
|
||||||
index a8a21ab330ba..9a173be5857e 100644
|
|
||||||
--- a/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir
|
|
||||||
+++ b/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir
|
|
||||||
@@ -141,6 +141,4 @@ body: |
|
|
||||||
B %bb.2
|
|
||||||
|
|
||||||
bb.2.exit:
|
|
||||||
- RET_ReallyLR
|
|
||||||
-
|
|
||||||
-...
|
|
||||||
\ No newline at end of file
|
|
||||||
+ RET_ReallyLR
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
index e86c04ee22db..8f665924577f 100644
|
|
||||||
--- a/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
+++ b/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll
|
|
||||||
@@ -13,21 +13,25 @@ define void @test_scoped_alloca(i64 %n) {
|
|
||||||
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
||||||
; CHECK-NEXT: movq %rsp, %rbp
|
|
||||||
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
||||||
+; CHECK-NEXT: pushq %r14
|
|
||||||
; CHECK-NEXT: pushq %rbx
|
|
||||||
-; CHECK-NEXT: pushq %rax
|
|
||||||
-; CHECK-NEXT: .cfi_offset %rbx, -24
|
|
||||||
-; CHECK-NEXT: movq %rsp, %rbx
|
|
||||||
-; CHECK-NEXT: movq %rsp, %rax
|
|
||||||
-; CHECK-NEXT: imulq $1, %rdi, %rcx
|
|
||||||
-; CHECK-NEXT: addq $15, %rcx
|
|
||||||
-; CHECK-NEXT: andq $-16, %rcx
|
|
||||||
-; CHECK-NEXT: subq %rcx, %rax
|
|
||||||
-; CHECK-NEXT: movq %rax, %rsp
|
|
||||||
-; CHECK-NEXT: movq %rax, %rdi
|
|
||||||
+; CHECK-NEXT: .cfi_offset %rbx, -32
|
|
||||||
+; CHECK-NEXT: .cfi_offset %r14, -24
|
|
||||||
+; CHECK-NEXT: movq %rdi, %rbx
|
|
||||||
+; CHECK-NEXT: callq llvm.stacksave.p0
|
|
||||||
+; CHECK-NEXT: movq %rax, %r14
|
|
||||||
+; CHECK-NEXT: movq %rsp, %rdi
|
|
||||||
+; CHECK-NEXT: imulq $1, %rbx, %rax
|
|
||||||
+; CHECK-NEXT: addq $15, %rax
|
|
||||||
+; CHECK-NEXT: andq $-16, %rax
|
|
||||||
+; CHECK-NEXT: subq %rax, %rdi
|
|
||||||
+; CHECK-NEXT: movq %rdi, %rsp
|
|
||||||
; CHECK-NEXT: callq use_addr
|
|
||||||
-; CHECK-NEXT: movq %rbx, %rsp
|
|
||||||
-; CHECK-NEXT: leaq -8(%rbp), %rsp
|
|
||||||
+; CHECK-NEXT: movq %r14, %rdi
|
|
||||||
+; CHECK-NEXT: callq llvm.stackrestore.p0
|
|
||||||
+; CHECK-NEXT: leaq -16(%rbp), %rsp
|
|
||||||
; CHECK-NEXT: popq %rbx
|
|
||||||
+; CHECK-NEXT: popq %r14
|
|
||||||
; CHECK-NEXT: popq %rbp
|
|
||||||
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
|
||||||
; CHECK-NEXT: retq
|
|
||||||
--
|
|
||||||
2.42.0.windows.2
|
|
||||||
|
|
||||||
12
llvm.spec
12
llvm.spec
@ -38,7 +38,7 @@
|
|||||||
|
|
||||||
Name: %{pkg_name}
|
Name: %{pkg_name}
|
||||||
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
|
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
|
||||||
Release: 11
|
Release: 10
|
||||||
Summary: The Low Level Virtual Machine
|
Summary: The Low Level Virtual Machine
|
||||||
|
|
||||||
License: NCSA
|
License: NCSA
|
||||||
@ -70,13 +70,6 @@ Patch18: 0018-Fix-declaration-definition-mismatch-for-classic-flang.patch
|
|||||||
Patch19: 0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch
|
Patch19: 0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch
|
||||||
Patch20: 0020-Update-llvm-lit-config-to-support-build_for_openeule.patch
|
Patch20: 0020-Update-llvm-lit-config-to-support-build_for_openeule.patch
|
||||||
|
|
||||||
Patch21: 0021-Backport-GlobalISel-Don-t-expand-stacksave-stackrestore-in-IRTranslator.patch
|
|
||||||
Patch22: 0022-Backport-AArch64-Refactor-allocation-of-locals-and-stack-realignment.patch
|
|
||||||
Patch23: 0023-Backport-AArch64-Stack-probing-for-function-prologues.patch
|
|
||||||
Patch24: 0024-Backport-AArch64-Stack-probing-for-dynamic-allocas-in-SelectionDAG.patch
|
|
||||||
Patch25: 0025-Backport-AArch64-Stack-probing-for-dynamic-allocas-in-GlobalISel.patch
|
|
||||||
Patch26: 0026-Update-testcase-for-stack-clash-protection-backport.patch
|
|
||||||
|
|
||||||
BuildRequires: binutils-devel
|
BuildRequires: binutils-devel
|
||||||
BuildRequires: cmake
|
BuildRequires: cmake
|
||||||
BuildRequires: gcc
|
BuildRequires: gcc
|
||||||
@ -360,9 +353,6 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C ./_build
|
|||||||
%{install_includedir}/llvm-gmock
|
%{install_includedir}/llvm-gmock
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Fri May 10 2024 rickyleung <leung.wing.chung@huawei.com> - 17.0.6-11
|
|
||||||
- Backport the patches to support stack clash protection
|
|
||||||
|
|
||||||
* Mon Apr 29 2024 wangqiang <wangqiang1@kylinos.cn> - 17.0.6-10
|
* Mon Apr 29 2024 wangqiang <wangqiang1@kylinos.cn> - 17.0.6-10
|
||||||
- Update llvm-lit config to support macro `build_for_openeuler`
|
- Update llvm-lit config to support macro `build_for_openeuler`
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user