164 lines
4.2 KiB
Diff
164 lines
4.2 KiB
Diff
|
|
From 88516507757932c1e67ce99d240596935971d2d0 Mon Sep 17 00:00:00 2001
|
||
|
|
From: liuhongt <hongtao.liu@intel.com>
|
||
|
|
Date: Thu, 9 Nov 2023 13:20:05 +0800
|
||
|
|
Subject: [PATCH 15/32] Fix wrong code due to vec_merge + pcmp to blendvb
|
||
|
|
splitter.
|
||
|
|
|
||
|
|
gcc/ChangeLog:
|
||
|
|
|
||
|
|
PR target/112443
|
||
|
|
* config/i386/sse.md (*avx2_pcmp<mode>3_4): Fix swap condition
|
||
|
|
from LT to GT since there's not in the pattern.
|
||
|
|
(*avx2_pcmp<mode>3_5): Ditto.
|
||
|
|
|
||
|
|
gcc/testsuite/ChangeLog:
|
||
|
|
|
||
|
|
* g++.target/i386/pr112443.C: New test.
|
||
|
|
|
||
|
|
(cherry picked from commit 9a0cc04b9c9b02426762892b88efc5c44ba546bd)
|
||
|
|
---
|
||
|
|
gcc/config/i386/sse.md | 4 +-
|
||
|
|
gcc/testsuite/g++.target/i386/pr112443.C | 108 +++++++++++++++++++++++
|
||
|
|
2 files changed, 110 insertions(+), 2 deletions(-)
|
||
|
|
create mode 100644 gcc/testsuite/g++.target/i386/pr112443.C
|
||
|
|
|
||
|
|
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||
|
|
index f25dd5f2b..23b858ab2 100644
|
||
|
|
--- a/gcc/config/i386/sse.md
|
||
|
|
+++ b/gcc/config/i386/sse.md
|
||
|
|
@@ -16358,7 +16358,7 @@
|
||
|
|
(match_dup 4))]
|
||
|
|
UNSPEC_BLENDV))]
|
||
|
|
{
|
||
|
|
- if (INTVAL (operands[5]) == 1)
|
||
|
|
+ if (INTVAL (operands[5]) == 5)
|
||
|
|
std::swap (operands[1], operands[2]);
|
||
|
|
operands[3] = gen_lowpart (<MODE>mode, operands[3]);
|
||
|
|
})
|
||
|
|
@@ -16388,7 +16388,7 @@
|
||
|
|
(match_dup 4))]
|
||
|
|
UNSPEC_BLENDV))]
|
||
|
|
{
|
||
|
|
- if (INTVAL (operands[5]) == 1)
|
||
|
|
+ if (INTVAL (operands[5]) == 5)
|
||
|
|
std::swap (operands[1], operands[2]);
|
||
|
|
})
|
||
|
|
|
||
|
|
diff --git a/gcc/testsuite/g++.target/i386/pr112443.C b/gcc/testsuite/g++.target/i386/pr112443.C
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..ebfa9b4a7
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/g++.target/i386/pr112443.C
|
||
|
|
@@ -0,0 +1,108 @@
|
||
|
|
+/* { dg-do run } */
|
||
|
|
+/* { dg-require-effective-target avx512bw } */
|
||
|
|
+/* { dg-require-effective-target avx512vl } */
|
||
|
|
+/* { dg-options "-O2 -std=c++17 -mavx512bw -mavx512vl" } */
|
||
|
|
+
|
||
|
|
+#include <cstdint>
|
||
|
|
+#include <x86intrin.h>
|
||
|
|
+#include <functional>
|
||
|
|
+#include <ostream>
|
||
|
|
+
|
||
|
|
+#define AVX512BW
|
||
|
|
+#define AVX512VL
|
||
|
|
+
|
||
|
|
+#include "avx512f-helper.h"
|
||
|
|
+
|
||
|
|
+struct TensorIteratorBase{
|
||
|
|
+ char* in;
|
||
|
|
+ char* out;
|
||
|
|
+
|
||
|
|
+ void for_each(std::function<void(char*, char*, int64_t size)> loop){
|
||
|
|
+ loop(out, in, 32);
|
||
|
|
+ }
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+class Vectorized {
|
||
|
|
+protected:
|
||
|
|
+ __m256i values;
|
||
|
|
+
|
||
|
|
+ static inline __m256i invert(const __m256i& v) {
|
||
|
|
+ const auto ones = _mm256_set1_epi64x(-1);
|
||
|
|
+ return _mm256_xor_si256(ones, v);
|
||
|
|
+ }
|
||
|
|
+public:
|
||
|
|
+ operator __m256i() const {
|
||
|
|
+ return values;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ static constexpr int size() {
|
||
|
|
+ return 32;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ Vectorized() {}
|
||
|
|
+ Vectorized(__m256i v) : values(v) {}
|
||
|
|
+ Vectorized(uint8_t v) { values = _mm256_set1_epi8(v); }
|
||
|
|
+ static Vectorized blendv(const Vectorized& a, const Vectorized& b,
|
||
|
|
+ const Vectorized& mask) {
|
||
|
|
+ return _mm256_blendv_epi8(a, b, mask);
|
||
|
|
+ }
|
||
|
|
+ static Vectorized loadu(const void* ptr) {
|
||
|
|
+ return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
|
||
|
|
+ }
|
||
|
|
+ void store(void* ptr) const {
|
||
|
|
+ _mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ Vectorized operator<(const Vectorized& other) const {
|
||
|
|
+ __m256i max = _mm256_max_epu8(values, other);
|
||
|
|
+ return invert(_mm256_cmpeq_epi8(max, values));
|
||
|
|
+ }
|
||
|
|
+ Vectorized operator-(const Vectorized& b) {
|
||
|
|
+ return _mm256_sub_epi8(values, b);
|
||
|
|
+ }
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+std::ostream& operator<<(std::ostream& stream, const Vectorized& vec) {
|
||
|
|
+ uint8_t buf[Vectorized::size()];
|
||
|
|
+ vec.store(buf);
|
||
|
|
+ stream << "vec[";
|
||
|
|
+ for (int i = 0; i != Vectorized::size(); i++) {
|
||
|
|
+ if (i != 0)
|
||
|
|
+ stream << ", ";
|
||
|
|
+ stream << buf[i]*1;
|
||
|
|
+ }
|
||
|
|
+ stream << "]";
|
||
|
|
+ return stream;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+void run(TensorIteratorBase iter){
|
||
|
|
+ Vectorized zero_vec(0);
|
||
|
|
+ Vectorized one_vec(1);
|
||
|
|
+
|
||
|
|
+ iter.for_each([=](char* out, char* in, int64_t size) {
|
||
|
|
+ for (int64_t i = 0; i <= size - Vectorized::size(); i += Vectorized::size()) {
|
||
|
|
+ auto self_vec = Vectorized::loadu(in + i);
|
||
|
|
+ auto left = Vectorized::blendv(zero_vec, one_vec, zero_vec < self_vec);
|
||
|
|
+ auto right = Vectorized::blendv(zero_vec, one_vec, self_vec < zero_vec);
|
||
|
|
+ auto outv = left - right;
|
||
|
|
+ outv.store(out + i);
|
||
|
|
+ }
|
||
|
|
+ });
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+test_256 (){
|
||
|
|
+ char in[32];
|
||
|
|
+ char out[32];
|
||
|
|
+ for(auto& x: in) x = 1;
|
||
|
|
+ run(TensorIteratorBase{in, out});
|
||
|
|
+ Vectorized::loadu (out);
|
||
|
|
+ for (int i = 0; i != 32; i++)
|
||
|
|
+ if (out[i] != 1)
|
||
|
|
+ __builtin_abort ();
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+test_128 ()
|
||
|
|
+{
|
||
|
|
+}
|
||
|
|
--
|
||
|
|
2.28.0.windows.1
|
||
|
|
|