!415 LoongArch: add 3A6000 support
From: @ticat-fp Reviewed-by: @xiongzhou4 Signed-off-by: @xiongzhou4
This commit is contained in:
commit
4c1af91efa
332
LoongArch-Add-LA664-support.patch
Normal file
332
LoongArch-Add-LA664-support.patch
Normal file
@ -0,0 +1,332 @@
|
|||||||
|
From c68463abbab98aa7f5a9b91e71ed6f6834c723df Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Thu, 16 Nov 2023 20:43:53 +0800
|
||||||
|
Subject: [PATCH] LoongArch: Add LA664 support.
|
||||||
|
|
||||||
|
Define ISA_BASE_LA64V110, which represents the base instruction set defined in LoongArch1.1.
|
||||||
|
Support the configure setting --with-arch =la664, and support -march=la664,-mtune=la664.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config.gcc: Support LA664.
|
||||||
|
* config/loongarch/genopts/loongarch-strings: Likewise.
|
||||||
|
* config/loongarch/genopts/loongarch.opt.in: Likewise.
|
||||||
|
* config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): Likewise.
|
||||||
|
* config/loongarch/loongarch-def.c: Likewise.
|
||||||
|
* config/loongarch/loongarch-def.h (N_ISA_BASE_TYPES): Likewise.
|
||||||
|
(ISA_BASE_LA64V110): Define macro.
|
||||||
|
(N_ARCH_TYPES): Update value.
|
||||||
|
(N_TUNE_TYPES): Update value.
|
||||||
|
(CPU_LA664): New macro.
|
||||||
|
* config/loongarch/loongarch-opts.cc (isa_default_abi): Likewise.
|
||||||
|
(isa_base_compat_p): Likewise.
|
||||||
|
* config/loongarch/loongarch-opts.h (TARGET_64BIT): This parameter is enabled
|
||||||
|
when la_target.isa.base is equal to ISA_BASE_LA64V100 or ISA_BASE_LA64V110.
|
||||||
|
(TARGET_uARCH_LA664): Define macro.
|
||||||
|
* config/loongarch/loongarch-str.h (STR_CPU_LA664): Likewise.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width):
|
||||||
|
Add LA664 support.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
|
||||||
|
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||||
|
---
|
||||||
|
gcc/config.gcc | 10 ++++-----
|
||||||
|
.../loongarch/genopts/loongarch-strings | 1 +
|
||||||
|
gcc/config/loongarch/genopts/loongarch.opt.in | 3 +++
|
||||||
|
gcc/config/loongarch/loongarch-cpu.cc | 4 ++++
|
||||||
|
gcc/config/loongarch/loongarch-def.c | 21 +++++++++++++++++++
|
||||||
|
gcc/config/loongarch/loongarch-def.h | 8 ++++---
|
||||||
|
gcc/config/loongarch/loongarch-opts.cc | 8 +++----
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 4 +++-
|
||||||
|
gcc/config/loongarch/loongarch-str.h | 1 +
|
||||||
|
gcc/config/loongarch/loongarch.cc | 1 +
|
||||||
|
gcc/config/loongarch/loongarch.opt | 3 +++
|
||||||
|
11 files changed, 51 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||||
|
index 6d51bd93f3f..b88591b6fd8 100644
|
||||||
|
--- a/gcc/config.gcc
|
||||||
|
+++ b/gcc/config.gcc
|
||||||
|
@@ -5039,7 +5039,7 @@ case "${target}" in
|
||||||
|
|
||||||
|
# Perform initial sanity checks on --with-* options.
|
||||||
|
case ${with_arch} in
|
||||||
|
- "" | abi-default | loongarch64 | la464) ;; # OK, append here.
|
||||||
|
+ "" | abi-default | loongarch64 | la[46]64) ;; # OK, append here.
|
||||||
|
native)
|
||||||
|
if test x${host} != x${target}; then
|
||||||
|
echo "--with-arch=native is illegal for cross-compiler." 1>&2
|
||||||
|
@@ -5088,7 +5088,7 @@ case "${target}" in
|
||||||
|
case ${abi_base}/${abi_ext} in
|
||||||
|
lp64*/base)
|
||||||
|
# architectures that support lp64* ABI
|
||||||
|
- arch_pattern="native|abi-default|loongarch64|la464"
|
||||||
|
+ arch_pattern="native|abi-default|loongarch64|la[46]64"
|
||||||
|
# default architecture for lp64* ABI
|
||||||
|
arch_default="abi-default"
|
||||||
|
;;
|
||||||
|
@@ -5163,7 +5163,7 @@ case "${target}" in
|
||||||
|
# Check default with_tune configuration using with_arch.
|
||||||
|
case ${with_arch} in
|
||||||
|
loongarch64)
|
||||||
|
- tune_pattern="native|abi-default|loongarch64|la464"
|
||||||
|
+ tune_pattern="native|abi-default|loongarch64|la[46]64"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# By default, $with_tune == $with_arch
|
||||||
|
@@ -5219,7 +5219,7 @@ case "${target}" in
|
||||||
|
# Fixed: use the default gcc configuration for all multilib
|
||||||
|
# builds by default.
|
||||||
|
with_multilib_default="" ;;
|
||||||
|
- arch,native|arch,loongarch64|arch,la464) # OK, append here.
|
||||||
|
+ arch,native|arch,loongarch64|arch,la[46]64) # OK, append here.
|
||||||
|
with_multilib_default="/march=${component}" ;;
|
||||||
|
arch,*)
|
||||||
|
with_multilib_default="/march=abi-default"
|
||||||
|
@@ -5307,7 +5307,7 @@ case "${target}" in
|
||||||
|
if test x${parse_state} = x"arch"; then
|
||||||
|
# -march option
|
||||||
|
case ${component} in
|
||||||
|
- native | abi-default | loongarch64 | la464) # OK, append here.
|
||||||
|
+ native | abi-default | loongarch64 | la[46]64) # OK, append here.
|
||||||
|
# Append -march spec for each multilib variant.
|
||||||
|
loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}"
|
||||||
|
parse_state="opts"
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
index 8e412f7536e..7bc4824007e 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
@@ -26,6 +26,7 @@ STR_CPU_NATIVE native
|
||||||
|
STR_CPU_ABI_DEFAULT abi-default
|
||||||
|
STR_CPU_LOONGARCH64 loongarch64
|
||||||
|
STR_CPU_LA464 la464
|
||||||
|
+STR_CPU_LA664 la664
|
||||||
|
|
||||||
|
# Base architecture
|
||||||
|
STR_ISA_BASE_LA64V100 la64
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
index 158701d327a..00b4733d75b 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
@@ -107,6 +107,9 @@ Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64)
|
||||||
|
EnumValue
|
||||||
|
Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
|
||||||
|
|
||||||
|
+EnumValue
|
||||||
|
+Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
|
||||||
|
+
|
||||||
|
m@@OPTSTR_ARCH@@=
|
||||||
|
Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
|
||||||
|
-m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA.
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
index 7a2866f60f9..f3a13414143 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
@@ -106,6 +106,10 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
native_cpu_type = CPU_LA464;
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ case 0x0014d000: /* LA664 */
|
||||||
|
+ native_cpu_type = CPU_LA664;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
default:
|
||||||
|
/* Unknown PRID. */
|
||||||
|
if (tune_native_p)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
|
||||||
|
index 430ef8b2d95..067629141b6 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.c
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.c
|
||||||
|
@@ -28,6 +28,7 @@ loongarch_cpu_strings[N_TUNE_TYPES] = {
|
||||||
|
[CPU_ABI_DEFAULT] = STR_CPU_ABI_DEFAULT,
|
||||||
|
[CPU_LOONGARCH64] = STR_CPU_LOONGARCH64,
|
||||||
|
[CPU_LA464] = STR_CPU_LA464,
|
||||||
|
+ [CPU_LA664] = STR_CPU_LA664,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct loongarch_isa
|
||||||
|
@@ -42,6 +43,11 @@ loongarch_cpu_default_isa[N_ARCH_TYPES] = {
|
||||||
|
.fpu = ISA_EXT_FPU64,
|
||||||
|
.simd = ISA_EXT_SIMD_LASX,
|
||||||
|
},
|
||||||
|
+ [CPU_LA664] = {
|
||||||
|
+ .base = ISA_BASE_LA64V110,
|
||||||
|
+ .fpu = ISA_EXT_FPU64,
|
||||||
|
+ .simd = ISA_EXT_SIMD_LASX,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
struct loongarch_cache
|
||||||
|
@@ -58,6 +64,12 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
|
||||||
|
.l2d_size = 256,
|
||||||
|
.simultaneous_prefetches = 4,
|
||||||
|
},
|
||||||
|
+ [CPU_LA664] = {
|
||||||
|
+ .l1d_line_size = 64,
|
||||||
|
+ .l1d_size = 64,
|
||||||
|
+ .l2d_size = 256,
|
||||||
|
+ .simultaneous_prefetches = 4,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
struct loongarch_align
|
||||||
|
@@ -70,6 +82,10 @@ loongarch_cpu_align[N_TUNE_TYPES] = {
|
||||||
|
.function = "32",
|
||||||
|
.label = "16",
|
||||||
|
},
|
||||||
|
+ [CPU_LA664] = {
|
||||||
|
+ .function = "32",
|
||||||
|
+ .label = "16",
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@@ -104,6 +120,9 @@ loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = {
|
||||||
|
[CPU_LA464] = {
|
||||||
|
DEFAULT_COSTS
|
||||||
|
},
|
||||||
|
+ [CPU_LA664] = {
|
||||||
|
+ DEFAULT_COSTS
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
/* RTX costs to use when optimizing for size. */
|
||||||
|
@@ -127,6 +146,7 @@ loongarch_cpu_issue_rate[N_TUNE_TYPES] = {
|
||||||
|
[CPU_NATIVE] = 4,
|
||||||
|
[CPU_LOONGARCH64] = 4,
|
||||||
|
[CPU_LA464] = 4,
|
||||||
|
+ [CPU_LA664] = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
int
|
||||||
|
@@ -134,6 +154,7 @@ loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = {
|
||||||
|
[CPU_NATIVE] = 4,
|
||||||
|
[CPU_LOONGARCH64] = 4,
|
||||||
|
[CPU_LA464] = 4,
|
||||||
|
+ [CPU_LA664] = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Wiring string definitions from loongarch-str.h to global arrays
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
index 6e2a6987910..db497f3ffe2 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
@@ -55,7 +55,8 @@ extern "C" {
|
||||||
|
/* enum isa_base */
|
||||||
|
extern const char* loongarch_isa_base_strings[];
|
||||||
|
#define ISA_BASE_LA64V100 0
|
||||||
|
-#define N_ISA_BASE_TYPES 1
|
||||||
|
+#define ISA_BASE_LA64V110 1
|
||||||
|
+#define N_ISA_BASE_TYPES 2
|
||||||
|
|
||||||
|
/* enum isa_ext_* */
|
||||||
|
extern const char* loongarch_isa_ext_strings[];
|
||||||
|
@@ -141,8 +142,9 @@ struct loongarch_target
|
||||||
|
#define CPU_ABI_DEFAULT 1
|
||||||
|
#define CPU_LOONGARCH64 2
|
||||||
|
#define CPU_LA464 3
|
||||||
|
-#define N_ARCH_TYPES 4
|
||||||
|
-#define N_TUNE_TYPES 4
|
||||||
|
+#define CPU_LA664 4
|
||||||
|
+#define N_ARCH_TYPES 5
|
||||||
|
+#define N_TUNE_TYPES 5
|
||||||
|
|
||||||
|
/* parallel tables. */
|
||||||
|
extern const char* loongarch_cpu_strings[];
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
index e5921189a06..67a59152a01 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
@@ -552,17 +552,17 @@ isa_default_abi (const struct loongarch_isa *isa)
|
||||||
|
switch (isa->fpu)
|
||||||
|
{
|
||||||
|
case ISA_EXT_FPU64:
|
||||||
|
- if (isa->base == ISA_BASE_LA64V100)
|
||||||
|
+ if (isa->base >= ISA_BASE_LA64V100)
|
||||||
|
abi.base = ABI_BASE_LP64D;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ISA_EXT_FPU32:
|
||||||
|
- if (isa->base == ISA_BASE_LA64V100)
|
||||||
|
+ if (isa->base >= ISA_BASE_LA64V100)
|
||||||
|
abi.base = ABI_BASE_LP64F;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ISA_EXT_NONE:
|
||||||
|
- if (isa->base == ISA_BASE_LA64V100)
|
||||||
|
+ if (isa->base >= ISA_BASE_LA64V100)
|
||||||
|
abi.base = ABI_BASE_LP64S;
|
||||||
|
break;
|
||||||
|
|
||||||
|
@@ -582,7 +582,7 @@ isa_base_compat_p (const struct loongarch_isa *set1,
|
||||||
|
switch (set2->base)
|
||||||
|
{
|
||||||
|
case ISA_BASE_LA64V100:
|
||||||
|
- return (set1->base == ISA_BASE_LA64V100);
|
||||||
|
+ return (set1->base >= ISA_BASE_LA64V100);
|
||||||
|
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index 6dd309aad96..0e1b3e528a1 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -76,7 +76,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
#define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64)
|
||||||
|
#define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D)
|
||||||
|
|
||||||
|
-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100)
|
||||||
|
+#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100 \
|
||||||
|
+ || la_target.isa.base == ISA_BASE_LA64V110)
|
||||||
|
#define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \
|
||||||
|
|| la_target.abi.base == ABI_BASE_LP64F \
|
||||||
|
|| la_target.abi.base == ABI_BASE_LP64S)
|
||||||
|
@@ -88,6 +89,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
|
||||||
|
/* TARGET_ macros for use in *.md template conditionals */
|
||||||
|
#define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464)
|
||||||
|
+#define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664)
|
||||||
|
|
||||||
|
/* Note: optimize_size may vary across functions,
|
||||||
|
while -m[no]-memcpy imposes a global constraint. */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
index 072558c28f1..fc4f41bfc1e 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-str.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#define STR_CPU_ABI_DEFAULT "abi-default"
|
||||||
|
#define STR_CPU_LOONGARCH64 "loongarch64"
|
||||||
|
#define STR_CPU_LA464 "la464"
|
||||||
|
+#define STR_CPU_LA664 "la664"
|
||||||
|
|
||||||
|
#define STR_ISA_BASE_LA64V100 "la64"
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 22ca24a1878..4cd509f11c6 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -10177,6 +10177,7 @@ loongarch_cpu_sched_reassociation_width (struct loongarch_target *target,
|
||||||
|
{
|
||||||
|
case CPU_LOONGARCH64:
|
||||||
|
case CPU_LA464:
|
||||||
|
+ case CPU_LA664:
|
||||||
|
/* Vector part. */
|
||||||
|
if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
|
||||||
|
{
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index a5988411fbb..7f129e53ba5 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -114,6 +114,9 @@ Enum(cpu_type) String(loongarch64) Value(CPU_LOONGARCH64)
|
||||||
|
EnumValue
|
||||||
|
Enum(cpu_type) String(la464) Value(CPU_LA464)
|
||||||
|
|
||||||
|
+EnumValue
|
||||||
|
+Enum(cpu_type) String(la664) Value(CPU_LA664)
|
||||||
|
+
|
||||||
|
march=
|
||||||
|
Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
|
||||||
|
-march=PROCESSOR Generate code for the given PROCESSOR ISA.
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
106
LoongArch-Fix-internal-error-running-gcc-march-nativ.patch
Normal file
106
LoongArch-Fix-internal-error-running-gcc-march-nativ.patch
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
From 56752a6bbfb3d3501d0899b23020c3e2eb58882c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Fri, 17 Nov 2023 20:44:17 +0800
|
||||||
|
Subject: [PATCH] LoongArch: Fix internal error running "gcc -march=native" on
|
||||||
|
LA664
|
||||||
|
|
||||||
|
On LA664, the PRID preset is ISA_BASE_LA64V110 but the base architecture
|
||||||
|
is guessed ISA_BASE_LA64V100. This causes a warning to be outputed:
|
||||||
|
|
||||||
|
cc1: warning: base architecture 'la64' differs from PRID preset '?'
|
||||||
|
|
||||||
|
But we've not set the "?" above in loongarch_isa_base_strings, thus it's
|
||||||
|
a nullptr and then an ICE is triggered.
|
||||||
|
|
||||||
|
Add ISA_BASE_LA64V110 to genopts and initialize
|
||||||
|
loongarch_isa_base_strings[ISA_BASE_LA64V110] correctly to fix the ICE.
|
||||||
|
The warning itself will be fixed later.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/genopts/loongarch-strings:
|
||||||
|
(STR_ISA_BASE_LA64V110): Add.
|
||||||
|
* config/loongarch/genopts/loongarch.opt.in:
|
||||||
|
(ISA_BASE_LA64V110): Add.
|
||||||
|
* config/loongarch/loongarch-def.c
|
||||||
|
(loongarch_isa_base_strings): Initialize [ISA_BASE_LA64V110]
|
||||||
|
to STR_ISA_BASE_LA64V110.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
* config/loongarch/loongarch-str.h: Regenerate.
|
||||||
|
|
||||||
|
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/genopts/loongarch-strings | 1 +
|
||||||
|
gcc/config/loongarch/genopts/loongarch.opt.in | 3 +++
|
||||||
|
gcc/config/loongarch/loongarch-def.c | 1 +
|
||||||
|
gcc/config/loongarch/loongarch-str.h | 1 +
|
||||||
|
gcc/config/loongarch/loongarch.opt | 3 +++
|
||||||
|
5 files changed, 9 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
index 7bc4824007e..b2070c83ed0 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
@@ -30,6 +30,7 @@ STR_CPU_LA664 la664
|
||||||
|
|
||||||
|
# Base architecture
|
||||||
|
STR_ISA_BASE_LA64V100 la64
|
||||||
|
+STR_ISA_BASE_LA64V110 la64v1.1
|
||||||
|
|
||||||
|
# -mfpu
|
||||||
|
OPTSTR_ISA_EXT_FPU fpu
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
index 00b4733d75b..b274b3fb21e 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
@@ -32,6 +32,9 @@ Basic ISAs of LoongArch:
|
||||||
|
EnumValue
|
||||||
|
Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100)
|
||||||
|
|
||||||
|
+EnumValue
|
||||||
|
+Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110)
|
||||||
|
+
|
||||||
|
;; ISA extensions / adjustments
|
||||||
|
Enum
|
||||||
|
Name(isa_ext_fpu) Type(int)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
|
||||||
|
index 067629141b6..f22d488acb2 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.c
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.c
|
||||||
|
@@ -165,6 +165,7 @@ loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = {
|
||||||
|
const char*
|
||||||
|
loongarch_isa_base_strings[N_ISA_BASE_TYPES] = {
|
||||||
|
[ISA_BASE_LA64V100] = STR_ISA_BASE_LA64V100,
|
||||||
|
+ [ISA_BASE_LA64V110] = STR_ISA_BASE_LA64V110,
|
||||||
|
};
|
||||||
|
|
||||||
|
const char*
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
index fc4f41bfc1e..114dbc692d7 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-str.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#define STR_CPU_LA664 "la664"
|
||||||
|
|
||||||
|
#define STR_ISA_BASE_LA64V100 "la64"
|
||||||
|
+#define STR_ISA_BASE_LA64V110 "la64v1.1"
|
||||||
|
|
||||||
|
#define OPTSTR_ISA_EXT_FPU "fpu"
|
||||||
|
#define STR_NONE "none"
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index 7f129e53ba5..350ca30d232 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -39,6 +39,9 @@ Basic ISAs of LoongArch:
|
||||||
|
EnumValue
|
||||||
|
Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100)
|
||||||
|
|
||||||
|
+EnumValue
|
||||||
|
+Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110)
|
||||||
|
+
|
||||||
|
;; ISA extensions / adjustments
|
||||||
|
Enum
|
||||||
|
Name(isa_ext_fpu) Type(int)
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
907
LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
Normal file
907
LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
Normal file
@ -0,0 +1,907 @@
|
|||||||
|
From 40366b89e9c8e727af70ecf7007cba6c51e4b7d2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 29 Nov 2023 11:16:59 +0800
|
||||||
|
Subject: [PATCH] LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests fail on
|
||||||
|
LA664 [PR112611]
|
||||||
|
|
||||||
|
For [x]vshuf instructions, if the index value in the selector exceeds 63, it triggers
|
||||||
|
undefined behavior on LA464, but not on LA664. To ensure compatibility of these two
|
||||||
|
tests on both LA464 and LA664, we have modified both tests to ensure that the index
|
||||||
|
value in the selector does not exceed 63.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
PR target/112611
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto.
|
||||||
|
|
||||||
|
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||||
|
---
|
||||||
|
.../loongarch/vector/lasx/lasx-xvshuf_b.c | 343 ++++++------------
|
||||||
|
.../loongarch/vector/lsx/lsx-vshuf.c | 162 +++------
|
||||||
|
2 files changed, 164 insertions(+), 341 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
|
||||||
|
index d8a29dbd225..b8ab387118a 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
|
||||||
|
@@ -43,9 +43,9 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0xfffffefefffffefe;
|
||||||
|
*((unsigned long *)&__m256i_op1[0]) = 0xfffffefefffffefe;
|
||||||
|
*((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0xfffffff8fffffff8;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[2]) = 0x3f3f3f383f3f3f38;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0xfffffff8fc000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[0]) = 0x3f3f3f383c000000;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0xfafafafafafafafa;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[1]) = 0xfefefefefefefefe;
|
||||||
|
@@ -137,33 +137,14 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[0]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x0000ffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x0000ffff0000ffff;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x0000ffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0x0000ffff0000ffff;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[3]) = 0x0000111111111111;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[2]) = 0x0000222200002222;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[1]) = 0x0000111111111111;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[0]) = 0x0000222200002222;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0xffff000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0xffff0000ffff0000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_result[1]) = 0xffff000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0xffff0000ffff0000;
|
||||||
|
- __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x000000000000ffff;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x000000000000ffff;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x000000000000ffff;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0x000000000000ffff;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff;
|
||||||
|
__m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
@@ -176,7 +157,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000077fff;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[2]) = 0x0000000000032f1f;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff;
|
||||||
|
@@ -186,9 +167,9 @@ main ()
|
||||||
|
__m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0xfffffffffffffefe;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000101;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0xfffffffffffffefe;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0011001100110011;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000001;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0011001100110011;
|
||||||
|
*((unsigned long *)&__m256i_op0[0]) = 0x0000000000000101;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x67eee33567eee435;
|
||||||
|
@@ -198,35 +179,16 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x00000000ffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff;
|
||||||
|
__m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0022002200000000;
|
||||||
|
*((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
- __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0xffffffff80000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0xffffffff80000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x001f001f00000000;
|
||||||
|
*((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff;
|
||||||
|
@@ -243,10 +205,10 @@ main ()
|
||||||
|
__m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0011001100110011;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0011001100110011;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0011001100110011;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0011001100110011;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0xffffffffffffffff;
|
||||||
|
@@ -255,17 +217,17 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0xffffffffffffffff;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff;
|
||||||
|
__m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x003f003f003f003f;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x003f003f003f003f;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x003f003f003f003f;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x003f003f003f003f;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0xefdfefdf00000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0xefdfefdfefdfefdf;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0xefdfefdf00000000;
|
||||||
|
@@ -274,36 +236,17 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
- __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0xefdfefdfefdfefdf;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0xefdfefdfefdfefdf;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0xefdfefdfefdfefdf;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0xefdfefdfefdfefdf;
|
||||||
|
__m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x7575ffff75757595;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x7575ffff7575f575;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x7575ffff75757595;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x7575ffff7575f575;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0035000000350005;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0035000000350015;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0035000000350025;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0035000000350035;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000000000000003;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x0000000000010001;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000003;
|
||||||
|
@@ -312,10 +255,10 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0x7575757575757575;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x7575757575757575;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x7575757575757575;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0x7575757575757575;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0x7575757575757575;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0x7575757575757575;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0x7575757575757575;
|
||||||
|
__m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
@@ -357,29 +300,10 @@ main ()
|
||||||
|
__m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
- __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x000000000000fffe;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x00000000000000f0;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x000000000000fffe;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x00000000000000f0;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x000000000000003e;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000010;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x000000000000003e;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000010;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
@@ -389,16 +313,16 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x8000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x000000ffff88ff88;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0xff88ff88ff880000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0xff88ff88ff880000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0xff88ff88ff88ff88;
|
||||||
|
*((unsigned long *)&__m256i_result[1]) = 0xff88ff88ff880000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0xff88ff88ff880000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0xff88ff88ff88ff88;
|
||||||
|
__m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x000000010000ffe1;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000101001e18;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x000000010000ffe1;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000101001e18;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0000000100000011;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000000100000018;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0000000100000001;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000100000008;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x98111cca98111cca;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x98111cca98111cca;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x98111cca98111cca;
|
||||||
|
@@ -407,17 +331,17 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0x0000000101001e18;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x000000010000ffe1;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x0000000101001e18;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000100000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0x0000000100000001;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0x0000000101001e18;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000100000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0x0000000100000001;
|
||||||
|
*((unsigned long *)&__m256i_result[0]) = 0x0000000101001e18;
|
||||||
|
__m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0001000100010001;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x80008000b3e8fef1;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0001000100010001;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x80008000802ea100;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x000000010000001a;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000001100000001;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0000002100000010;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x000000310000001f;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
@@ -426,17 +350,17 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0x0000000000000002;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x0000000000000001;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x00000000012e2110;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0x0000000000000001;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0x0000000200000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x012e2110012e2110;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0x00000000012e2110;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
__m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000082a54290;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x00000000028aa700;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000082a54290;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000002a54287;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0000002f00000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000001a00000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x000000010000001c;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000e0000000c;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x00000000002a542a;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
@@ -447,8 +371,8 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0x00000000002a542a;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0x00000000002a542a;
|
||||||
|
__m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
@@ -471,10 +395,10 @@ main ()
|
||||||
|
__m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0000000100000031;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000000100000031;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0000000100000031;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000100000031;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
@@ -490,10 +414,10 @@ main ()
|
||||||
|
__m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0001000100010001;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0001000100010001;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0001000100010001;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0001000100010001;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0000000200000001;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000000400000003;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0000000600000005;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000800000007;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x000000007fc00000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x000000007fc00000;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x000000007fc00000;
|
||||||
|
@@ -503,7 +427,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0xdfffffffdfffffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x8000000080000000;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0x8000000080000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x8000000080000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0x7fc00000dfffffff;
|
||||||
|
*((unsigned long *)&__m256i_result[1]) = 0x8000000080000000;
|
||||||
|
*((unsigned long *)&__m256i_result[0]) = 0x8000000080000000;
|
||||||
|
__m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
@@ -529,9 +453,9 @@ main ()
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
*((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0001000104000200;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000002000000030;
|
||||||
|
*((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0001000104000200;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000001000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0xffff0000ffff0000;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0xffffffffffffffff;
|
||||||
|
@@ -585,10 +509,10 @@ main ()
|
||||||
|
__m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000fffffe01fe52;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x00000000ff01ff02;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000fffffe01fe52;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x00000000ff01ff02;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000001;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000002;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000003;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000800000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x0000000080008001;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000800000000000;
|
||||||
|
@@ -597,36 +521,17 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0x000000000000ffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x000000000000ffff;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000080008001;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000080008001;
|
||||||
|
- __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0x000000000000ffff;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0x0000000080008001;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0x0000800000000000;
|
||||||
|
__m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
*((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000011;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000022;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000033;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
@@ -642,44 +547,6 @@ main ()
|
||||||
|
__m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
- __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[2]) = 0x0008000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op1[0]) = 0x0008000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
- __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
-
|
||||||
|
*((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
@@ -700,9 +567,9 @@ main ()
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
*((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000002000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000002000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000010;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000020;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000030;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000;
|
||||||
|
@@ -718,10 +585,10 @@ main ()
|
||||||
|
__m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0xfffeb6839ffffd80;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0xfffeb8649d0d6250;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0xfffeb6839ffffd80;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0xfffeb8649d0d6250;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x000000000000000a;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x000000000000001b;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x000000000000002c;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x000000000000003d;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0xfffeb6839ffffd80;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0xfffe97c020010001;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0xfffeb6839ffffd80;
|
||||||
|
@@ -730,17 +597,17 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0xfffe97c020010001;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0xfffeb6839ffffd80;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0xfffe97c020010001;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0xfffe97c020010001;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0xfffeb6839ffffd80;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0xfffe97c020010001;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0xfffeb6839ffffd80;
|
||||||
|
__m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[3]) = 0x000000000000001a;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[2]) = 0x000000000000001b;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000002;
|
||||||
|
+ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000007;
|
||||||
|
*((unsigned long *)&__m256i_op1[3]) = 0x0000000000010001;
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x0000000000010001;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x0000000000010001;
|
||||||
|
@@ -749,10 +616,10 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0x0000000000010001;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0x0000000000010001;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0x0000000000010001;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0x0000000000010001;
|
||||||
|
__m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
|
||||||
|
index 8153964cf1d..f3b800f8804 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
|
||||||
|
@@ -20,7 +20,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000401000001;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0001000100000004;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0x00000000007f0000;
|
||||||
|
+ *((unsigned long *)&__m128i_op2[0]) = 0x00000000003f0000;
|
||||||
|
*((unsigned long *)&__m128i_result[1]) = 0x0404040404040404;
|
||||||
|
*((unsigned long *)&__m128i_result[0]) = 0x0404040404000404;
|
||||||
|
__m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
@@ -31,7 +31,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0xffffffff00000000;
|
||||||
|
+ *((unsigned long *)&__m128i_op2[0]) = 0x3f2f1f0f00000000;
|
||||||
|
*((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
__m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
@@ -63,10 +63,10 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x52525252adadadad;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x52525252adadadad;
|
||||||
|
- *((unsigned long *)&__m128i_op2[1]) = 0x800000007fffffff;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0x800000007fffffff;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x00adadad00000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x00adadad00000000;
|
||||||
|
+ *((unsigned long *)&__m128i_op2[1]) = 0x2000000004030201;
|
||||||
|
+ *((unsigned long *)&__m128i_op2[0]) = 0x2000000014131211;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0xadadadad52adadad;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0xadadadadffffffff;
|
||||||
|
__m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
@@ -96,10 +96,10 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op0[0]) = 0x0000000000000100;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x04040403fafafafc;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x000000000000ff80;
|
||||||
|
- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x8080808080808080;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x8080808080808080;
|
||||||
|
+ *((unsigned long *)&__m128i_op2[1]) = 0x00101a1b1c1d1e1f;
|
||||||
|
+ *((unsigned long *)&__m128i_op2[0]) = 0x0807060504030201;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x8000020202000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0xfc000000000000ff;
|
||||||
|
__m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
@@ -118,10 +118,10 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op0[0]) = 0xffd7ff8dffa4ff7a;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x34947b4b11684f92;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0xee297a731e5c5f86;
|
||||||
|
- *((unsigned long *)&__m128i_op2[1]) = 0x7fffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0xffc0000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000868686868686;
|
||||||
|
+ *((unsigned long *)&__m128i_op2[1]) = 0x1f0710301a2b332d;
|
||||||
|
+ *((unsigned long *)&__m128i_op2[0]) = 0x1f20000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0xffee7a7a9811ff7b;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0xff86868686868686;
|
||||||
|
__m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
@@ -136,19 +136,19 @@ main ()
|
||||||
|
__m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x001f002f003f000f;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x001f002f003f000f;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x7fffffffffffffff;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x7fff7fff7fff7fff;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0x7fff7fff7fff7fff;
|
||||||
|
__m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x000100040010001f;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0002000300110012;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x00007fff00007fff;
|
||||||
|
@@ -169,74 +169,41 @@ main ()
|
||||||
|
__m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
- __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x000300037ff000ff;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0003000300a10003;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x000300030000001f;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0003000300000003;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x000300037ff000ff;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0003000300a10003;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x000000007ff000ff;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000003;
|
||||||
|
*((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
__m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0909000009090000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0909000009090000;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0019000000090000;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0019000000090000;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0909000009090000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0909000009090000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x002a05a2f059094a;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x05ad3ba576eae048;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0909e0480909e048;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0909e0480909e048;
|
||||||
|
- __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x909e0480909e048;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0x909e0480909e048;
|
||||||
|
__m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x00000000000000c0;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x00000001ffffff29;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000030;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000029;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x00000000000000c0;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x00000001ffffff29;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0xffffff2900000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0xffffff29ffffff29;
|
||||||
|
*((unsigned long *)&__m128i_result[0]) = 0x0000000100000001;
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
*((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
- __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x00000000000000ff;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x000000000000001f;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x1f54e0ab00000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0101010101010101;
|
||||||
|
@@ -246,19 +213,8 @@ main ()
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
- __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
- ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
-
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000007fff;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000002f0000002f;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000001000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000020000020;
|
||||||
|
@@ -279,30 +235,30 @@ main ()
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0000000004870ba0;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000900000010;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000002000000003;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000001000000010;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x8000000100000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x8000000000000103;
|
||||||
|
*((unsigned long *)&__m128i_result[1]) = 0x0000010300000103;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000010300000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0x0000010380000001;
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x000000ff0000857a;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x05fafe0101fe000e;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000001000000007;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000002000000001;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0xffffffff00000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0xada4808924882588;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0xacad25090caca5a4;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000001a0000001b;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000a0000000b;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x021b7d24c9678a35;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x030298a6a1030a49;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
@@ -312,8 +268,8 @@ main ()
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x00000000ffffffff;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000003;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000013;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
@@ -323,14 +279,14 @@ main ()
|
||||||
|
__m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0xdfa6e0c6d46cdc13;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x21fc7081ec69b5f2;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000011;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x000000002c002400;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0xffffb96bffff57c9;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0xffff6080ffff4417;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0xffffb96bffff57c9;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0xffffb96bffff57c9;
|
||||||
|
__m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
@@ -345,8 +301,8 @@ main ()
|
||||||
|
__m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000020;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000010;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000002000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0xf0003000f0003000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
@@ -356,30 +312,30 @@ main ()
|
||||||
|
__m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x021b7d2449678a35;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x030298a621030a49;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000007;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x000000000000001a;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x7fff7fff7fff7fff;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x021b7d24c9678a35;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x030298a6a1030a49;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x021b7d24c9678a35;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0x7fff7fff7fff7fff;
|
||||||
|
__m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0x7f7f00007f7f0000;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0x7f7f80807f7f8080;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000002;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000001;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000fffe0000fffe;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x7f8000007f800000;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x7f8000007f800000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x7f8000007f800000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x0000fffe0000fffe;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0x7f8000007f800000;
|
||||||
|
__m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
- *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m128i_op0[0]) = 0xfffffffffff10000;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000010;
|
||||||
|
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000020;
|
||||||
|
*((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
137
LoongArch-Use-finer-grained-DBAR-hints.patch
Normal file
137
LoongArch-Use-finer-grained-DBAR-hints.patch
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
From 4a70bfbf686c2b6a1ecd83fe851de826c612c3e0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Tue, 14 Nov 2023 05:32:38 +0800
|
||||||
|
Subject: [PATCH] LoongArch: Use finer-grained DBAR hints
|
||||||
|
|
||||||
|
LA664 defines DBAR hints 0x1 - 0x1f (except 0xf and 0x1f) as follows [1-2]:
|
||||||
|
|
||||||
|
- Bit 4: kind of constraint (0: completion, 1: ordering)
|
||||||
|
- Bit 3: barrier for previous read (0: true, 1: false)
|
||||||
|
- Bit 2: barrier for previous write (0: true, 1: false)
|
||||||
|
- Bit 1: barrier for succeeding read (0: true, 1: false)
|
||||||
|
- Bit 0: barrier for succeeding write (0: true, 1: false)
|
||||||
|
|
||||||
|
LLVM has already utilized them for different memory orders [3]:
|
||||||
|
|
||||||
|
- Bit 4 is always set to one because it's only intended to be zero for
|
||||||
|
things like MMIO devices, which are out of the scope of memory orders.
|
||||||
|
- An acquire barrier is used to implement acquire loads like
|
||||||
|
|
||||||
|
ld.d $a1, $t0, 0
|
||||||
|
dbar acquire_hint
|
||||||
|
|
||||||
|
where the load operation (ld.d) should not be reordered with any load
|
||||||
|
or store operation after the acquire load. To accomplish this
|
||||||
|
constraint, we need to prevent the load operation from being reordered
|
||||||
|
after the barrier, and also prevent any following load/store operation
|
||||||
|
from being reordered before the barrier. Thus bits 0, 1, and 3 must
|
||||||
|
be zero, and bit 2 can be one, so acquire_hint should be 0b10100.
|
||||||
|
- An release barrier is used to implement release stores like
|
||||||
|
|
||||||
|
dbar release_hint
|
||||||
|
st.d $a1, $t0, 0
|
||||||
|
|
||||||
|
where the store operation (st.d) should not be reordered with any load
|
||||||
|
or store operation before the release store. So we need to prevent
|
||||||
|
the store operation from being reordered before the barrier, and also
|
||||||
|
prevent any preceding load/store operation from being reordered after
|
||||||
|
the barrier. So bits 0, 2, 3 must be zero, and bit 1 can be one. So
|
||||||
|
release_hint should be 0b10010.
|
||||||
|
|
||||||
|
A similar mapping has been utilized for RISC-V GCC [4], LoongArch Linux
|
||||||
|
kernel [1], and LoongArch LLVM [3]. So the mapping should be correct.
|
||||||
|
And I've also bootstrapped & regtested GCC on a LA664 with this patch.
|
||||||
|
|
||||||
|
The LoongArch CPUs should treat "unknown" hints as dbar 0, so we can
|
||||||
|
unconditionally emit the new hints without a compiler switch.
|
||||||
|
|
||||||
|
[1]: https://git.kernel.org/torvalds/c/e031a5f3f1ed
|
||||||
|
[2]: https://github.com/loongson-community/docs/pull/12
|
||||||
|
[3]: https://github.com/llvm/llvm-project/pull/68787
|
||||||
|
[4]: https://gcc.gnu.org/r14-406
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/sync.md (mem_thread_fence): Remove redundant
|
||||||
|
check.
|
||||||
|
(mem_thread_fence_1): Emit finer-grained DBAR hints for
|
||||||
|
different memory models, instead of 0.
|
||||||
|
|
||||||
|
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/sync.md | 51 +++++++++++++++++++++++++++++-------
|
||||||
|
1 file changed, 42 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
|
||||||
|
index 9924d522bcd..1ad0c63e0d9 100644
|
||||||
|
--- a/gcc/config/loongarch/sync.md
|
||||||
|
+++ b/gcc/config/loongarch/sync.md
|
||||||
|
@@ -50,23 +50,56 @@
|
||||||
|
[(match_operand:SI 0 "const_int_operand" "")] ;; model
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- if (INTVAL (operands[0]) != MEMMODEL_RELAXED)
|
||||||
|
- {
|
||||||
|
- rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
|
||||||
|
- MEM_VOLATILE_P (mem) = 1;
|
||||||
|
- emit_insn (gen_mem_thread_fence_1 (mem, operands[0]));
|
||||||
|
- }
|
||||||
|
+ rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
|
||||||
|
+ MEM_VOLATILE_P (mem) = 1;
|
||||||
|
+ emit_insn (gen_mem_thread_fence_1 (mem, operands[0]));
|
||||||
|
+
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-;; Until the LoongArch memory model (hence its mapping from C++) is finalized,
|
||||||
|
-;; conservatively emit a full FENCE.
|
||||||
|
+;; DBAR hint encoding for LA664 and later micro-architectures, paraphrased from
|
||||||
|
+;; the Linux patch revealing it [1]:
|
||||||
|
+;;
|
||||||
|
+;; - Bit 4: kind of constraint (0: completion, 1: ordering)
|
||||||
|
+;; - Bit 3: barrier for previous read (0: true, 1: false)
|
||||||
|
+;; - Bit 2: barrier for previous write (0: true, 1: false)
|
||||||
|
+;; - Bit 1: barrier for succeeding read (0: true, 1: false)
|
||||||
|
+;; - Bit 0: barrier for succeeding write (0: true, 1: false)
|
||||||
|
+;;
|
||||||
|
+;; [1]: https://git.kernel.org/torvalds/c/e031a5f3f1ed
|
||||||
|
+;;
|
||||||
|
+;; Implementations without support for the finer-granularity hints simply treat
|
||||||
|
+;; all as the full barrier (DBAR 0), so we can unconditionally start emiting the
|
||||||
|
+;; more precise hints right away.
|
||||||
|
(define_insn "mem_thread_fence_1"
|
||||||
|
[(set (match_operand:BLK 0 "" "")
|
||||||
|
(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))
|
||||||
|
(match_operand:SI 1 "const_int_operand" "")] ;; model
|
||||||
|
""
|
||||||
|
- "dbar\t0")
|
||||||
|
+ {
|
||||||
|
+ enum memmodel model = memmodel_base (INTVAL (operands[1]));
|
||||||
|
+
|
||||||
|
+ switch (model)
|
||||||
|
+ {
|
||||||
|
+ case MEMMODEL_ACQUIRE:
|
||||||
|
+ return "dbar\t0b10100";
|
||||||
|
+ case MEMMODEL_RELEASE:
|
||||||
|
+ return "dbar\t0b10010";
|
||||||
|
+ case MEMMODEL_ACQ_REL:
|
||||||
|
+ case MEMMODEL_SEQ_CST:
|
||||||
|
+ return "dbar\t0b10000";
|
||||||
|
+ default:
|
||||||
|
+ /* GCC internal: "For the '__ATOMIC_RELAXED' model no instructions
|
||||||
|
+ need to be issued and this expansion is not invoked."
|
||||||
|
+
|
||||||
|
+ __atomic builtins doc: "Consume is implemented using the
|
||||||
|
+ stronger acquire memory order because of a deficiency in C++11's
|
||||||
|
+ semantics." See PR 59448 and get_memmodel in builtins.cc.
|
||||||
|
+
|
||||||
|
+ Other values should not be returned by memmodel_base. */
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+ })
|
||||||
|
|
||||||
|
;; Atomic memory operations.
|
||||||
|
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
15
gcc.spec
15
gcc.spec
@ -2,7 +2,7 @@
|
|||||||
%global gcc_major 12
|
%global gcc_major 12
|
||||||
# Note, gcc_release must be integer, if you want to add suffixes to
|
# Note, gcc_release must be integer, if you want to add suffixes to
|
||||||
# %%{release}, append them after %%{gcc_release} on Release: line.
|
# %%{release}, append them after %%{gcc_release} on Release: line.
|
||||||
%global gcc_release 22
|
%global gcc_release 23
|
||||||
|
|
||||||
%global _unpackaged_files_terminate_build 0
|
%global _unpackaged_files_terminate_build 0
|
||||||
%global _performance_build 1
|
%global _performance_build 1
|
||||||
@ -321,6 +321,10 @@ Patch3123: LoongArch-Change-the-value-of-branch_cost-from-2-to-.patch
|
|||||||
Patch3124: libsanitizer-add-LoongArch-support.patch
|
Patch3124: libsanitizer-add-LoongArch-support.patch
|
||||||
Patch3125: LoongArch-fix-error-building.patch
|
Patch3125: LoongArch-fix-error-building.patch
|
||||||
Patch3126: libjccjit-do-not-link-objects-contained-same-element.patch
|
Patch3126: libjccjit-do-not-link-objects-contained-same-element.patch
|
||||||
|
Patch3127: LoongArch-Use-finer-grained-DBAR-hints.patch
|
||||||
|
Patch3128: LoongArch-Add-LA664-support.patch
|
||||||
|
Patch3129: LoongArch-Fix-internal-error-running-gcc-march-nativ.patch
|
||||||
|
Patch3130: LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
# On ARM EABI systems, we do want -gnueabi to be part of the
|
# On ARM EABI systems, we do want -gnueabi to be part of the
|
||||||
@ -970,6 +974,10 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
|||||||
%patch3124 -p1
|
%patch3124 -p1
|
||||||
%patch3125 -p1
|
%patch3125 -p1
|
||||||
%patch3126 -p1
|
%patch3126 -p1
|
||||||
|
%patch3127 -p1
|
||||||
|
%patch3128 -p1
|
||||||
|
%patch3129 -p1
|
||||||
|
%patch3130 -p1
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE
|
echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE
|
||||||
@ -2416,6 +2424,8 @@ end
|
|||||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-def.h
|
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-def.h
|
||||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-tune.h
|
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-tune.h
|
||||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-driver.h
|
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-driver.h
|
||||||
|
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lsxintrin.h
|
||||||
|
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lasxintrin.h
|
||||||
%endif
|
%endif
|
||||||
%ifarch sparc sparcv9 sparc64
|
%ifarch sparc sparcv9 sparc64
|
||||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/visintrin.h
|
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/visintrin.h
|
||||||
@ -3228,6 +3238,9 @@ end
|
|||||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Apr 22 2024 Peng Fan <fanpeng@loongson.cn> - 12.3.1-23
|
||||||
|
- DESC: Add LoongArch 3A6000 support
|
||||||
|
|
||||||
* Fri Apr 12 2024 Zhengchen Hui <zhengchenhui1@huawei.com> - 12.3.1-22
|
* Fri Apr 12 2024 Zhengchen Hui <zhengchenhui1@huawei.com> - 12.3.1-22
|
||||||
- Type: Sync
|
- Type: Sync
|
||||||
- DESC: Sync patch from openeuler/gcc
|
- DESC: Sync patch from openeuler/gcc
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user