[AArch64] Support HiSilicon's HIP09 Processor
This commit is contained in:
parent
c2d1ff775e
commit
95487e968f
517
0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
Normal file
517
0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
Normal file
@ -0,0 +1,517 @@
|
|||||||
|
From cac43828d26b178807d194b4bd7c5df69603df29 Mon Sep 17 00:00:00 2001
|
||||||
|
From: xiajingze <xiajingze1@huawei.com>
|
||||||
|
Date: Wed, 31 Jul 2024 18:37:29 +0800
|
||||||
|
Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 Processor
|
||||||
|
|
||||||
|
Signed-off-by: xiajingze <xiajingze1@huawei.com>
|
||||||
|
---
|
||||||
|
llvm/cmake/modules/HandleLLVMOptions.cmake | 8 ++
|
||||||
|
.../llvm/TargetParser/AArch64TargetParser.h | 7 ++
|
||||||
|
llvm/lib/Target/AArch64/AArch64.td | 36 +++++++
|
||||||
|
.../lib/Target/AArch64/AArch64MacroFusion.cpp | 55 +++++++++++
|
||||||
|
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 9 ++
|
||||||
|
llvm/lib/Target/AArch64/AArch64Subtarget.h | 9 +-
|
||||||
|
llvm/lib/Target/CMakeLists.txt | 4 +
|
||||||
|
llvm/lib/TargetParser/Host.cpp | 3 +
|
||||||
|
llvm/test/CodeGen/AArch64/cpus-hip09.ll | 11 +++
|
||||||
|
.../CodeGen/AArch64/macro-fusion-mvnclz.mir | 20 ++++
|
||||||
|
.../AArch64/misched-fusion-lit-hip09.ll | 73 ++++++++++++++
|
||||||
|
llvm/test/CodeGen/AArch64/remat-hip09.ll | 18 ++++
|
||||||
|
llvm/test/lit.site.cfg.py.in | 4 +
|
||||||
|
llvm/unittests/TargetParser/Host.cpp | 5 +
|
||||||
|
.../TargetParser/TargetParserTest.cpp | 16 +++
|
||||||
|
15 files changed, 277 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||||
|
create mode 100644 llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||||
|
create mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||||
|
create mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||||
|
|
||||||
|
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||||
|
index 8be5d4ba5..74e68e25d 100644
|
||||||
|
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||||
|
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||||
|
@@ -112,6 +112,14 @@ else()
|
||||||
|
set(LLVM_ENABLE_AUTOTUNER 0)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
+option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON)
|
||||||
|
+if(LLVM_ENABLE_AARCH64_HIP09)
|
||||||
|
+ set(LLVM_ENABLE_AARCH64_HIP09 1)
|
||||||
|
+ add_definitions( -DENABLE_AARCH64_HIP09 )
|
||||||
|
+else()
|
||||||
|
+ set(LLVM_ENABLE_AARCH64_HIP09 0)
|
||||||
|
+endif()
|
||||||
|
+
|
||||||
|
if(LLVM_ENABLE_EXPENSIVE_CHECKS)
|
||||||
|
add_compile_definitions(EXPENSIVE_CHECKS)
|
||||||
|
|
||||||
|
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||||
|
index dc4cdfa8e..07cd2fcbb 100644
|
||||||
|
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||||
|
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||||
|
@@ -542,6 +542,13 @@ inline constexpr CpuInfo CpuInfos[] = {
|
||||||
|
(AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 |
|
||||||
|
AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES |
|
||||||
|
AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)},
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ {"hip09", ARMV8_5A,
|
||||||
|
+ (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 |
|
||||||
|
+ AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
|
||||||
|
+ AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
|
||||||
|
+ AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)},
|
||||||
|
+#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
// An alias for a CPU.
|
||||||
|
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
|
||||||
|
index 8f50af4b7..c8bfd770f 100644
|
||||||
|
--- a/llvm/lib/Target/AArch64/AArch64.td
|
||||||
|
+++ b/llvm/lib/Target/AArch64/AArch64.td
|
||||||
|
@@ -296,6 +296,12 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
|
||||||
|
"fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
|
||||||
|
"CPU fuses (a + b + 1) and (a - b - 1)">;
|
||||||
|
|
||||||
|
+#ifdef ENABLE_AARCH64_HIP09
|
||||||
|
+def FeatureFuseMvnClz : SubtargetFeature<
|
||||||
|
+ "fuse-mvn-clz", "HasFuseMvnClz", "true",
|
||||||
|
+ "CPU fuses mvn+clz operations">;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
|
||||||
|
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
|
||||||
|
"Disable latency scheduling heuristic">;
|
||||||
|
@@ -1205,6 +1211,21 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
|
||||||
|
FeatureFuseAES,
|
||||||
|
FeaturePostRAScheduler]>;
|
||||||
|
|
||||||
|
+#ifdef ENABLE_AARCH64_HIP09
|
||||||
|
+def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09",
|
||||||
|
+ "HiSilicon HIP-09 processors", [
|
||||||
|
+ FeatureCustomCheapAsMoveHandling,
|
||||||
|
+ FeatureExperimentalZeroingPseudos,
|
||||||
|
+ FeatureFuseAES,
|
||||||
|
+ FeatureLSLFast,
|
||||||
|
+ FeatureAscendStoreAddress,
|
||||||
|
+ FeatureCmpBccFusion,
|
||||||
|
+ FeatureArithmeticBccFusion,
|
||||||
|
+ FeatureFuseLiterals,
|
||||||
|
+ FeatureFuseMvnClz,
|
||||||
|
+ FeaturePostRAScheduler]>;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
|
||||||
|
"Ampere Computing Ampere-1 processors", [
|
||||||
|
FeaturePostRAScheduler,
|
||||||
|
@@ -1359,6 +1380,14 @@ def ProcessorFeatures {
|
||||||
|
list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeaturePerfMon, FeatureSPE,
|
||||||
|
FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
|
||||||
|
+#ifdef ENABLE_AARCH64_HIP09
|
||||||
|
+ list<SubtargetFeature> HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
+ FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64,
|
||||||
|
+ FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE,
|
||||||
|
+ FeatureFullFP16, FeatureFP16FML, FeatureDotProd,
|
||||||
|
+ FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4,
|
||||||
|
+ FeatureSVE];
|
||||||
|
+#endif
|
||||||
|
list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
|
||||||
|
FeatureSSBS, FeatureRandGen, FeatureSB,
|
||||||
|
FeatureSHA2, FeatureSHA3, FeatureAES];
|
||||||
|
@@ -1464,8 +1493,15 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model,
|
||||||
|
// Marvell ThunderX3T110 Processors.
|
||||||
|
def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
|
||||||
|
ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>;
|
||||||
|
+
|
||||||
|
+// HiSilicon Processors.
|
||||||
|
def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
|
||||||
|
[TuneTSV110]>;
|
||||||
|
+#ifdef ENABLE_AARCH64_HIP09
|
||||||
|
+// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57.
|
||||||
|
+def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09,
|
||||||
|
+ [TuneHIP09]>;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
|
||||||
|
def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
|
||||||
|
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||||
|
index 05d60872b..4963ec350 100644
|
||||||
|
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||||
|
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||||
|
@@ -51,6 +51,12 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
|
||||||
|
case AArch64::SUBSXrr:
|
||||||
|
case AArch64::BICSWrr:
|
||||||
|
case AArch64::BICSXrr:
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ case AArch64::ADCSWr:
|
||||||
|
+ case AArch64::ADCSXr:
|
||||||
|
+ case AArch64::SBCSWr:
|
||||||
|
+ case AArch64::SBCSXr:
|
||||||
|
+#endif
|
||||||
|
return true;
|
||||||
|
case AArch64::ADDSWrs:
|
||||||
|
case AArch64::ADDSXrs:
|
||||||
|
@@ -183,6 +189,20 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
|
||||||
|
SecondMI.getOperand(3).getImm() == 16))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ // 32 bit immediate.
|
||||||
|
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) &&
|
||||||
|
+ (SecondMI.getOpcode() == AArch64::MOVKWi &&
|
||||||
|
+ SecondMI.getOperand(3).getImm() == 16))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ // Lower half of 64 bit immediate.
|
||||||
|
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNXi) &&
|
||||||
|
+ (SecondMI.getOpcode() == AArch64::MOVKWi &&
|
||||||
|
+ SecondMI.getOperand(3).getImm() == 16))
|
||||||
|
+ return true;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
// Upper half of 64 bit immediate.
|
||||||
|
if ((FirstMI == nullptr ||
|
||||||
|
(FirstMI->getOpcode() == AArch64::MOVKXi &&
|
||||||
|
@@ -437,6 +457,37 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI,
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+static bool isMvnClzPair(const MachineInstr *FirstMI,
|
||||||
|
+ const MachineInstr &SecondMI) {
|
||||||
|
+ // HIP09 supports fusion of MVN + CLZ.
|
||||||
|
+ // The CLZ can be fused with MVN and make execution faster.
|
||||||
|
+ // And the fusion is not allowed for shifted forms.
|
||||||
|
+ //
|
||||||
|
+ // Instruction alias info:
|
||||||
|
+ // 1. MVN <Wd>, <Wm>{, <shift> #<amount>} is equivalent to
|
||||||
|
+ // ORN <Wd>, WZR, <Wm>{, <shift> #<amount>}
|
||||||
|
+ // 2. MVN <Xd>, <Xm>{, <shift> #<amount>} is equivalent to
|
||||||
|
+ // ORN <Xd>, XZR, <Xm>{, <shift> #<amount>}
|
||||||
|
+ // Assume the 1st instr to be a wildcard if it is unspecified.
|
||||||
|
+ if ((FirstMI == nullptr ||
|
||||||
|
+ ((FirstMI->getOpcode() == AArch64::ORNWrs) &&
|
||||||
|
+ (FirstMI->getOperand(1).getReg() == AArch64::WZR) &&
|
||||||
|
+ (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) &&
|
||||||
|
+ (SecondMI.getOpcode() == AArch64::CLZWr))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ if ((FirstMI == nullptr ||
|
||||||
|
+ ((FirstMI->getOpcode() == AArch64::ORNXrs) &&
|
||||||
|
+ (FirstMI->getOperand(1).getReg() == AArch64::XZR) &&
|
||||||
|
+ (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) &&
|
||||||
|
+ (SecondMI.getOpcode() == AArch64::CLZXr))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
|
||||||
|
/// together. Given SecondMI, when FirstMI is unspecified, then check if
|
||||||
|
/// SecondMI may be part of a fused pair at all.
|
||||||
|
@@ -472,6 +523,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
|
||||||
|
if (ST.hasFuseAddSub2RegAndConstOne() &&
|
||||||
|
isAddSub2RegAndConstOnePair(FirstMI, SecondMI))
|
||||||
|
return true;
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI))
|
||||||
|
+ return true;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||||
|
index 450e27b8a..ddf22364c 100644
|
||||||
|
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||||
|
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||||
|
@@ -266,6 +266,15 @@ void AArch64Subtarget::initializeProperties() {
|
||||||
|
PrefFunctionAlignment = Align(16);
|
||||||
|
PrefLoopAlignment = Align(4);
|
||||||
|
break;
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ case HIP09:
|
||||||
|
+ CacheLineSize = 64;
|
||||||
|
+ PrefFunctionAlignment = Align(16);
|
||||||
|
+ PrefLoopAlignment = Align(4);
|
||||||
|
+ VScaleForTuning = 2;
|
||||||
|
+ DefaultSVETFOpts = TailFoldingOpts::Simple;
|
||||||
|
+ break;
|
||||||
|
+#endif
|
||||||
|
case ThunderX3T110:
|
||||||
|
CacheLineSize = 64;
|
||||||
|
PrefFunctionAlignment = Align(16);
|
||||||
|
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||||
|
index 5e20d1646..5f481f4f9 100644
|
||||||
|
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||||
|
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||||
|
@@ -87,7 +87,10 @@ public:
|
||||||
|
ThunderXT83,
|
||||||
|
ThunderXT88,
|
||||||
|
ThunderX3T110,
|
||||||
|
- TSV110
|
||||||
|
+ TSV110,
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ HIP09
|
||||||
|
+#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
protected:
|
||||||
|
@@ -239,7 +242,11 @@ public:
|
||||||
|
bool hasFusion() const {
|
||||||
|
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
|
||||||
|
hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz();
|
||||||
|
+#else
|
||||||
|
hasFuseAdrpAdd() || hasFuseLiterals();
|
||||||
|
+#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
|
||||||
|
diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
|
||||||
|
index 2739233f9..501ce1f2f 100644
|
||||||
|
--- a/llvm/lib/Target/CMakeLists.txt
|
||||||
|
+++ b/llvm/lib/Target/CMakeLists.txt
|
||||||
|
@@ -2,6 +2,10 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen)
|
||||||
|
|
||||||
|
list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target)
|
||||||
|
|
||||||
|
+if(LLVM_ENABLE_AARCH64_HIP09)
|
||||||
|
+ list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09")
|
||||||
|
+endif()
|
||||||
|
+
|
||||||
|
add_llvm_component_library(LLVMTarget
|
||||||
|
Target.cpp
|
||||||
|
TargetIntrinsicInfo.cpp
|
||||||
|
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
|
||||||
|
index d11dc605e..8b23be02e 100644
|
||||||
|
--- a/llvm/lib/TargetParser/Host.cpp
|
||||||
|
+++ b/llvm/lib/TargetParser/Host.cpp
|
||||||
|
@@ -257,6 +257,9 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
|
||||||
|
// contents are specified in the various processor manuals.
|
||||||
|
return StringSwitch<const char *>(Part)
|
||||||
|
.Case("0xd01", "tsv110")
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ .Case("0xd02", "hip09")
|
||||||
|
+#endif
|
||||||
|
.Default("generic");
|
||||||
|
|
||||||
|
if (Implementer == "0x51") // Qualcomm Technologies, Inc.
|
||||||
|
diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..dcf32e4dc
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||||
|
@@ -0,0 +1,11 @@
|
||||||
|
+; REQUIRES: enable_enable_aarch64_hip09
|
||||||
|
+; This tests that llc accepts all valid AArch64 CPUs
|
||||||
|
+
|
||||||
|
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s
|
||||||
|
+
|
||||||
|
+; CHECK-NOT: {{.*}} is not a recognized processor for this target
|
||||||
|
+; INVALID: {{.*}} is not a recognized processor for this target
|
||||||
|
+
|
||||||
|
+define i32 @f(i64 %z) {
|
||||||
|
+ ret i32 0
|
||||||
|
+}
|
||||||
|
diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..64bf15937
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||||
|
@@ -0,0 +1,20 @@
|
||||||
|
+# REQUIRES: enable_enable_aarch64_hip09
|
||||||
|
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
|
||||||
|
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
|
||||||
|
+---
|
||||||
|
+# CHECK-LABEL: name: fuse-mvn-clz
|
||||||
|
+# CHECK: $w2 = ORNWrs $wzr, $w1, 0
|
||||||
|
+# FUSION: $w0 = CLZWr killed renamable $w2
|
||||||
|
+# CHECK: $w3 = ADDWri killed renamable $w1, 1, 0
|
||||||
|
+# NOFUSION: $w0 = CLZWr killed renamable $w2
|
||||||
|
+name: fuse-mvn-clz
|
||||||
|
+tracksRegLiveness: true
|
||||||
|
+body: |
|
||||||
|
+ bb.0:
|
||||||
|
+ liveins: $w0, $w1, $w2, $w3
|
||||||
|
+
|
||||||
|
+ $w2 = ORNWrs $wzr, $w1, 0
|
||||||
|
+ $w3 = ADDWri killed renamable $w1, 1, 0
|
||||||
|
+ $w0 = CLZWr killed renamable $w2
|
||||||
|
+ RET undef $lr, implicit $w0
|
||||||
|
+...
|
||||||
|
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..d67fa5b43
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||||
|
@@ -0,0 +1,73 @@
|
||||||
|
+; REQUIRES: enable_enable_aarch64_hip09
|
||||||
|
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09
|
||||||
|
+
|
||||||
|
+@g = common local_unnamed_addr global ptr null, align 8
|
||||||
|
+
|
||||||
|
+define dso_local ptr @litp(i32 %a, i32 %b) {
|
||||||
|
+entry:
|
||||||
|
+ %add = add nsw i32 %b, %a
|
||||||
|
+ %idx.ext = sext i32 %add to i64
|
||||||
|
+ %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext
|
||||||
|
+ store ptr %add.ptr, ptr @g, align 8
|
||||||
|
+ ret ptr %add.ptr
|
||||||
|
+
|
||||||
|
+; CHECK-LABEL: litp:
|
||||||
|
+; CHECK: adrp [[R:x[0-9]+]], litp
|
||||||
|
+; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" {
|
||||||
|
+entry:
|
||||||
|
+ %add = add nsw i32 %b, %a
|
||||||
|
+ %idx.ext = sext i32 %add to i64
|
||||||
|
+ %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext
|
||||||
|
+ store ptr %add.ptr, ptr @g, align 8
|
||||||
|
+ ret ptr %add.ptr
|
||||||
|
+
|
||||||
|
+; CHECK-LABEL: litp_tune_generic:
|
||||||
|
+; CHECK: adrp [[R:x[0-9]+]], litp_tune_generic
|
||||||
|
+; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+define dso_local i32 @liti(i32 %a, i32 %b) {
|
||||||
|
+entry:
|
||||||
|
+ %add = add i32 %a, -262095121
|
||||||
|
+ %add1 = add i32 %add, %b
|
||||||
|
+ ret i32 %add1
|
||||||
|
+
|
||||||
|
+; CHECK-LABEL: liti:
|
||||||
|
+; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}}
|
||||||
|
+; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
|
||||||
|
+; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||||
|
+; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+; Function Attrs: norecurse nounwind readnone
|
||||||
|
+define dso_local i64 @litl(i64 %a, i64 %b) {
|
||||||
|
+entry:
|
||||||
|
+ %add = add i64 %a, 2208998440489107183
|
||||||
|
+ %add1 = add i64 %add, %b
|
||||||
|
+ ret i64 %add1
|
||||||
|
+
|
||||||
|
+; CHECK-LABEL: litl:
|
||||||
|
+; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}}
|
||||||
|
+; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
|
||||||
|
+; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||||
|
+; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32
|
||||||
|
+; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+; Function Attrs: norecurse nounwind readnone
|
||||||
|
+define dso_local double @litf() {
|
||||||
|
+entry:
|
||||||
|
+ ret double 0x400921FB54442D18
|
||||||
|
+
|
||||||
|
+; CHECK-LABEL: litf:
|
||||||
|
+; CHECK-DONT: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
|
||||||
|
+; CHECK-DONT-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
|
||||||
|
+; CHECKFUSE-HIP09: mov [[R:x[0-9]+]], #11544
|
||||||
|
+; CHECKFUSE-HIP09: movk [[R]], #21572, lsl #16
|
||||||
|
+; CHECKFUSE-HIP09: movk [[R]], #8699, lsl #32
|
||||||
|
+; CHECKFUSE-HIP09: movk [[R]], #16393, lsl #48
|
||||||
|
+; CHECKFUSE-HIP09: fmov {{d[0-9]+}}, [[R]]
|
||||||
|
+}
|
||||||
|
diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..aec0d18ae
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||||
|
@@ -0,0 +1,18 @@
|
||||||
|
+; REQUIRES: enable_enable_aarch64_hip09
|
||||||
|
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s
|
||||||
|
+
|
||||||
|
+%X = type { i64, i64, i64 }
|
||||||
|
+declare void @f(ptr)
|
||||||
|
+define void @t() {
|
||||||
|
+entry:
|
||||||
|
+ %tmp = alloca %X
|
||||||
|
+ call void @f(ptr %tmp)
|
||||||
|
+; CHECK: add x0, sp, #8
|
||||||
|
+; CHECK-NOT: mov
|
||||||
|
+; CHECK-NEXT: bl f
|
||||||
|
+ call void @f(ptr %tmp)
|
||||||
|
+; CHECK: add x0, sp, #8
|
||||||
|
+; CHECK-NOT: mov
|
||||||
|
+; CHECK-NEXT: bl f
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
|
||||||
|
index 20c1ecca1..6145a514f 100644
|
||||||
|
--- a/llvm/test/lit.site.cfg.py.in
|
||||||
|
+++ b/llvm/test/lit.site.cfg.py.in
|
||||||
|
@@ -64,9 +64,13 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
|
||||||
|
config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
|
||||||
|
config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
|
||||||
|
+config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@
|
||||||
|
|
||||||
|
import lit.llvm
|
||||||
|
lit.llvm.initialize(lit_config, config)
|
||||||
|
|
||||||
|
+if config.enable_enable_aarch64_hip09:
|
||||||
|
+ config.available_features.add("enable_enable_aarch64_hip09")
|
||||||
|
+
|
||||||
|
# Let the main config do the real work.
|
||||||
|
lit_config.load_config(
|
||||||
|
config, os.path.join(config.llvm_src_root, "test/lit.cfg.py"))
|
||||||
|
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
|
||||||
|
index 452d0326c..4b4c81514 100644
|
||||||
|
--- a/llvm/unittests/TargetParser/Host.cpp
|
||||||
|
+++ b/llvm/unittests/TargetParser/Host.cpp
|
||||||
|
@@ -250,6 +250,11 @@ CPU part : 0x0a1
|
||||||
|
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
|
||||||
|
"CPU part : 0xd01"),
|
||||||
|
"tsv110");
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
|
||||||
|
+ "CPU part : 0xd02"),
|
||||||
|
+ "hip09");
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
// Verify A64FX.
|
||||||
|
const std::string A64FXProcCpuInfo = R"(
|
||||||
|
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||||
|
index 741d5a2d4..94e0047e5 100644
|
||||||
|
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||||
|
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||||
|
@@ -1421,6 +1421,18 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
|
AArch64::AEK_PROFILE | AArch64::AEK_FP16 |
|
||||||
|
AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD,
|
||||||
|
"8.2-A"),
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+ ARMCPUTestParams(
|
||||||
|
+ "hip09", "armv8.5-a", "crypto-neon-fp-armv8",
|
||||||
|
+ AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD |
|
||||||
|
+ AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM |
|
||||||
|
+ AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_AES |
|
||||||
|
+ AArch64::AEK_SM4 | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
|
||||||
|
+ AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
|
||||||
|
+ AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
|
||||||
|
+ AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16,
|
||||||
|
+ "8.5-A"),
|
||||||
|
+#endif
|
||||||
|
ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8",
|
||||||
|
AArch64::AEK_CRC | AArch64::AEK_AES |
|
||||||
|
AArch64::AEK_SHA2 | AArch64::AEK_FP |
|
||||||
|
@@ -1437,7 +1449,11 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
|
"8.2-A")));
|
||||||
|
|
||||||
|
// Note: number of CPUs includes aliases.
|
||||||
|
+#if defined(ENABLE_AARCH64_HIP09)
|
||||||
|
+static constexpr unsigned NumAArch64CPUArchs = 63;
|
||||||
|
+#else
|
||||||
|
static constexpr unsigned NumAArch64CPUArchs = 62;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
TEST(TargetParserTest, testAArch64CPUArchList) {
|
||||||
|
SmallVector<StringRef, NumAArch64CPUArchs> List;
|
||||||
|
--
|
||||||
|
2.19.1
|
||||||
|
|
||||||
@ -44,7 +44,7 @@
|
|||||||
|
|
||||||
Name: %{pkg_name}
|
Name: %{pkg_name}
|
||||||
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
|
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
|
||||||
Release: 17
|
Release: 18
|
||||||
Summary: The Low Level Virtual Machine
|
Summary: The Low Level Virtual Machine
|
||||||
|
|
||||||
License: NCSA
|
License: NCSA
|
||||||
@ -77,6 +77,7 @@ Patch19: 0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache
|
|||||||
Patch20: 0020-Update-llvm-lit-config-to-support-build_for_openeule.patch
|
Patch20: 0020-Update-llvm-lit-config-to-support-build_for_openeule.patch
|
||||||
Patch21: 0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
|
Patch21: 0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
|
||||||
Patch22: 0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch
|
Patch22: 0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch
|
||||||
|
Patch23: 0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
|
||||||
|
|
||||||
BuildRequires: binutils-devel
|
BuildRequires: binutils-devel
|
||||||
BuildRequires: cmake
|
BuildRequires: cmake
|
||||||
@ -373,6 +374,9 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C ./_build
|
|||||||
%{install_includedir}/llvm-gmock
|
%{install_includedir}/llvm-gmock
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue Sep 10 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-18
|
||||||
|
- [AArch64] Support HiSilicon's HIP09 Processor
|
||||||
|
|
||||||
* Wed Sep 11 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-17
|
* Wed Sep 11 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-17
|
||||||
- doc add Provides llvm-help
|
- doc add Provides llvm-help
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user