112 lines
3.9 KiB
Diff
112 lines
3.9 KiB
Diff
|
|
From fbcb1a5899b1bd3964aed78ed74041121e618d36 Mon Sep 17 00:00:00 2001
|
||
|
|
From: liuhongt <hongtao.liu@intel.com>
|
||
|
|
Date: Tue, 20 Jun 2023 15:41:00 +0800
|
||
|
|
Subject: [PATCH 07/32] Refine maskloadmn pattern with UNSPEC_MASKLOAD.
|
||
|
|
|
||
|
|
If mem_addr points to a memory region with less than whole vector size
|
||
|
|
bytes of accessible memory and k is a mask that would prevent reading
|
||
|
|
the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
|
||
|
|
it to be transformed to vpblendd.
|
||
|
|
|
||
|
|
gcc/ChangeLog:
|
||
|
|
|
||
|
|
PR target/110309
|
||
|
|
* config/i386/sse.md (maskload<mode><avx512fmaskmodelower>):
|
||
|
|
Refine pattern with UNSPEC_MASKLOAD.
|
||
|
|
(maskload<mode><avx512fmaskmodelower>): Ditto.
|
||
|
|
(*<avx512>_load<mode>_mask): Extend mode iterator to
|
||
|
|
VI12HF_AVX512VL.
|
||
|
|
(*<avx512>_load<mode>): Ditto.
|
||
|
|
|
||
|
|
gcc/testsuite/ChangeLog:
|
||
|
|
|
||
|
|
* gcc.target/i386/pr110309.c: New test.
|
||
|
|
---
|
||
|
|
gcc/config/i386/sse.md | 32 +++++++++++++-----------
|
||
|
|
gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++
|
||
|
|
2 files changed, 28 insertions(+), 14 deletions(-)
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c
|
||
|
|
|
||
|
|
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||
|
|
index eb767e56c..b30e96cb1 100644
|
||
|
|
--- a/gcc/config/i386/sse.md
|
||
|
|
+++ b/gcc/config/i386/sse.md
|
||
|
|
@@ -1411,12 +1411,12 @@
|
||
|
|
})
|
||
|
|
|
||
|
|
(define_insn "*<avx512>_load<mode>_mask"
|
||
|
|
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
||
|
|
- (vec_merge:VI12_AVX512VL
|
||
|
|
- (unspec:VI12_AVX512VL
|
||
|
|
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
|
||
|
|
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
|
||
|
|
+ (vec_merge:VI12HF_AVX512VL
|
||
|
|
+ (unspec:VI12HF_AVX512VL
|
||
|
|
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")]
|
||
|
|
UNSPEC_MASKLOAD)
|
||
|
|
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
|
||
|
|
+ (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand" "0C")
|
||
|
|
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
|
||
|
|
"TARGET_AVX512BW"
|
||
|
|
"vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
|
||
|
|
@@ -1425,9 +1425,9 @@
|
||
|
|
(set_attr "mode" "<sseinsnmode>")])
|
||
|
|
|
||
|
|
(define_insn_and_split "*<avx512>_load<mode>"
|
||
|
|
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
||
|
|
- (unspec:VI12_AVX512VL
|
||
|
|
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
|
||
|
|
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
|
||
|
|
+ (unspec:VI12HF_AVX512VL
|
||
|
|
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")]
|
||
|
|
UNSPEC_MASKLOAD))]
|
||
|
|
"TARGET_AVX512BW"
|
||
|
|
"#"
|
||
|
|
@@ -25973,17 +25973,21 @@
|
||
|
|
"TARGET_AVX")
|
||
|
|
|
||
|
|
(define_expand "maskload<mode><avx512fmaskmodelower>"
|
||
|
|
- [(set (match_operand:V48H_AVX512VL 0 "register_operand")
|
||
|
|
- (vec_merge:V48H_AVX512VL
|
||
|
|
- (match_operand:V48H_AVX512VL 1 "memory_operand")
|
||
|
|
+ [(set (match_operand:V48_AVX512VL 0 "register_operand")
|
||
|
|
+ (vec_merge:V48_AVX512VL
|
||
|
|
+ (unspec:V48_AVX512VL
|
||
|
|
+ [(match_operand:V48_AVX512VL 1 "memory_operand")]
|
||
|
|
+ UNSPEC_MASKLOAD)
|
||
|
|
(match_dup 0)
|
||
|
|
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||
|
|
"TARGET_AVX512F")
|
||
|
|
|
||
|
|
(define_expand "maskload<mode><avx512fmaskmodelower>"
|
||
|
|
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
|
||
|
|
- (vec_merge:VI12_AVX512VL
|
||
|
|
- (match_operand:VI12_AVX512VL 1 "memory_operand")
|
||
|
|
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
|
||
|
|
+ (vec_merge:VI12HF_AVX512VL
|
||
|
|
+ (unspec:VI12HF_AVX512VL
|
||
|
|
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand")]
|
||
|
|
+ UNSPEC_MASKLOAD)
|
||
|
|
(match_dup 0)
|
||
|
|
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||
|
|
"TARGET_AVX512BW")
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..f6e9e9c3c
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/pr110309.c
|
||
|
|
@@ -0,0 +1,10 @@
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */
|
||
|
|
+/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+void foo (int * __restrict a, int *b)
|
||
|
|
+{
|
||
|
|
+ for (int i = 0; i < 6; ++i)
|
||
|
|
+ a[i] = b[i] + 42;
|
||
|
|
+}
|
||
|
|
--
|
||
|
|
2.28.0.windows.1
|
||
|
|
|