gcc/LoongArch-Add-Loongson-ASX-base-instruction-support.patch
ticat_fp 7e7be47bfd LoongArch: Sync patch from gcc upstream
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
2024-03-27 09:22:13 +08:00

8377 lines
262 KiB
Diff
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 2f0874e6e6f5a866e71826983dc18295c408748b Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 16 Mar 2023 16:34:08 +0800
Subject: [PATCH 065/124] LoongArch: Add Loongson ASX base instruction support.
gcc/ChangeLog:
* config/loongarch/loongarch-modes.def
(VECTOR_MODES): Add Loongson ASX instruction support.
* config/loongarch/loongarch-protos.h (loongarch_split_256bit_move): Ditto.
(loongarch_split_256bit_move_p): Ditto.
(loongarch_expand_vector_group_init): Ditto.
(loongarch_expand_vec_perm_1): Ditto.
* config/loongarch/loongarch.cc (loongarch_symbol_insns): Ditto.
(loongarch_valid_offset_p): Ditto.
(loongarch_address_insns): Ditto.
(loongarch_const_insns): Ditto.
(loongarch_legitimize_move): Ditto.
(loongarch_builtin_vectorization_cost): Ditto.
(loongarch_split_move_p): Ditto.
(loongarch_split_move): Ditto.
(loongarch_output_move_index_float): Ditto.
(loongarch_split_256bit_move_p): Ditto.
(loongarch_split_256bit_move): Ditto.
(loongarch_output_move): Ditto.
(loongarch_print_operand_reloc): Ditto.
(loongarch_print_operand): Ditto.
(loongarch_hard_regno_mode_ok_uncached): Ditto.
(loongarch_hard_regno_nregs): Ditto.
(loongarch_class_max_nregs): Ditto.
(loongarch_can_change_mode_class): Ditto.
(loongarch_mode_ok_for_mov_fmt_p): Ditto.
(loongarch_vector_mode_supported_p): Ditto.
(loongarch_preferred_simd_mode): Ditto.
(loongarch_autovectorize_vector_modes): Ditto.
(loongarch_lsx_output_division): Ditto.
(loongarch_expand_lsx_shuffle): Ditto.
(loongarch_expand_vec_perm): Ditto.
(loongarch_expand_vec_perm_interleave): Ditto.
(loongarch_try_expand_lsx_vshuf_const): Ditto.
(loongarch_expand_vec_perm_even_odd_1): Ditto.
(loongarch_expand_vec_perm_even_odd): Ditto.
(loongarch_expand_vec_perm_1): Ditto.
(loongarch_expand_vec_perm_const_2): Ditto.
(loongarch_is_quad_duplicate): Ditto.
(loongarch_is_double_duplicate): Ditto.
(loongarch_is_odd_extraction): Ditto.
(loongarch_is_even_extraction): Ditto.
(loongarch_is_extraction_permutation): Ditto.
(loongarch_is_center_extraction): Ditto.
(loongarch_is_reversing_permutation): Ditto.
(loongarch_is_di_misalign_extract): Ditto.
(loongarch_is_si_misalign_extract): Ditto.
(loongarch_is_lasx_lowpart_interleave): Ditto.
(loongarch_is_lasx_lowpart_interleave_2): Ditto.
(COMPARE_SELECTOR): Ditto.
(loongarch_is_lasx_lowpart_extract): Ditto.
(loongarch_is_lasx_highpart_interleave): Ditto.
(loongarch_is_lasx_highpart_interleave_2): Ditto.
(loongarch_is_elem_duplicate): Ditto.
(loongarch_is_op_reverse_perm): Ditto.
(loongarch_is_single_op_perm): Ditto.
(loongarch_is_divisible_perm): Ditto.
(loongarch_is_triple_stride_extract): Ditto.
(loongarch_vectorize_vec_perm_const): Ditto.
(loongarch_cpu_sched_reassociation_width): Ditto.
(loongarch_expand_vector_extract): Ditto.
(emit_reduc_half): Ditto.
(loongarch_expand_vec_unpack): Ditto.
(loongarch_expand_vector_group_init): Ditto.
(loongarch_expand_vector_init): Ditto.
(loongarch_expand_lsx_cmp): Ditto.
(loongarch_builtin_support_vector_misalignment): Ditto.
* config/loongarch/loongarch.h (UNITS_PER_LASX_REG): Ditto.
(BITS_PER_LASX_REG): Ditto.
(STRUCTURE_SIZE_BOUNDARY): Ditto.
(LASX_REG_FIRST): Ditto.
(LASX_REG_LAST): Ditto.
(LASX_REG_NUM): Ditto.
(LASX_REG_P): Ditto.
(LASX_REG_RTX_P): Ditto.
(LASX_SUPPORTED_MODE_P): Ditto.
* config/loongarch/loongarch.md: Ditto.
* config/loongarch/lasx.md: New file.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/lasx.md | 5104 ++++++++++++++++++++++
gcc/config/loongarch/loongarch-modes.def | 1 +
gcc/config/loongarch/loongarch-protos.h | 4 +
gcc/config/loongarch/loongarch.cc | 2567 ++++++++++-
gcc/config/loongarch/loongarch.h | 60 +-
gcc/config/loongarch/loongarch.md | 20 +-
6 files changed, 7637 insertions(+), 119 deletions(-)
create mode 100644 gcc/config/loongarch/lasx.md
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
new file mode 100644
index 000000000..8111c8bb7
--- /dev/null
+++ b/gcc/config/loongarch/lasx.md
@@ -0,0 +1,5104 @@
+;; Machine Description for LARCH Loongson ASX ASE
+;;
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+
+(define_c_enum "unspec" [
+ UNSPEC_LASX_XVABSD_S
+ UNSPEC_LASX_XVABSD_U
+ UNSPEC_LASX_XVAVG_S
+ UNSPEC_LASX_XVAVG_U
+ UNSPEC_LASX_XVAVGR_S
+ UNSPEC_LASX_XVAVGR_U
+ UNSPEC_LASX_XVBITCLR
+ UNSPEC_LASX_XVBITCLRI
+ UNSPEC_LASX_XVBITREV
+ UNSPEC_LASX_XVBITREVI
+ UNSPEC_LASX_XVBITSET
+ UNSPEC_LASX_XVBITSETI
+ UNSPEC_LASX_XVFCMP_CAF
+ UNSPEC_LASX_XVFCLASS
+ UNSPEC_LASX_XVFCMP_CUNE
+ UNSPEC_LASX_XVFCVT
+ UNSPEC_LASX_XVFCVTH
+ UNSPEC_LASX_XVFCVTL
+ UNSPEC_LASX_XVFLOGB
+ UNSPEC_LASX_XVFRECIP
+ UNSPEC_LASX_XVFRINT
+ UNSPEC_LASX_XVFRSQRT
+ UNSPEC_LASX_XVFCMP_SAF
+ UNSPEC_LASX_XVFCMP_SEQ
+ UNSPEC_LASX_XVFCMP_SLE
+ UNSPEC_LASX_XVFCMP_SLT
+ UNSPEC_LASX_XVFCMP_SNE
+ UNSPEC_LASX_XVFCMP_SOR
+ UNSPEC_LASX_XVFCMP_SUEQ
+ UNSPEC_LASX_XVFCMP_SULE
+ UNSPEC_LASX_XVFCMP_SULT
+ UNSPEC_LASX_XVFCMP_SUN
+ UNSPEC_LASX_XVFCMP_SUNE
+ UNSPEC_LASX_XVFTINT_S
+ UNSPEC_LASX_XVFTINT_U
+ UNSPEC_LASX_XVCLO
+ UNSPEC_LASX_XVSAT_S
+ UNSPEC_LASX_XVSAT_U
+ UNSPEC_LASX_XVREPLVE0
+ UNSPEC_LASX_XVREPL128VEI
+ UNSPEC_LASX_XVSRAR
+ UNSPEC_LASX_XVSRARI
+ UNSPEC_LASX_XVSRLR
+ UNSPEC_LASX_XVSRLRI
+ UNSPEC_LASX_XVSHUF
+ UNSPEC_LASX_XVSHUF_B
+ UNSPEC_LASX_BRANCH
+ UNSPEC_LASX_BRANCH_V
+
+ UNSPEC_LASX_XVMUH_S
+ UNSPEC_LASX_XVMUH_U
+ UNSPEC_LASX_MXVEXTW_U
+ UNSPEC_LASX_XVSLLWIL_S
+ UNSPEC_LASX_XVSLLWIL_U
+ UNSPEC_LASX_XVSRAN
+ UNSPEC_LASX_XVSSRAN_S
+ UNSPEC_LASX_XVSSRAN_U
+ UNSPEC_LASX_XVSRARN
+ UNSPEC_LASX_XVSSRARN_S
+ UNSPEC_LASX_XVSSRARN_U
+ UNSPEC_LASX_XVSRLN
+ UNSPEC_LASX_XVSSRLN_U
+ UNSPEC_LASX_XVSRLRN
+ UNSPEC_LASX_XVSSRLRN_U
+ UNSPEC_LASX_XVFRSTPI
+ UNSPEC_LASX_XVFRSTP
+ UNSPEC_LASX_XVSHUF4I
+ UNSPEC_LASX_XVBSRL_V
+ UNSPEC_LASX_XVBSLL_V
+ UNSPEC_LASX_XVEXTRINS
+ UNSPEC_LASX_XVMSKLTZ
+ UNSPEC_LASX_XVSIGNCOV
+ UNSPEC_LASX_XVFTINTRNE_W_S
+ UNSPEC_LASX_XVFTINTRNE_L_D
+ UNSPEC_LASX_XVFTINTRP_W_S
+ UNSPEC_LASX_XVFTINTRP_L_D
+ UNSPEC_LASX_XVFTINTRM_W_S
+ UNSPEC_LASX_XVFTINTRM_L_D
+ UNSPEC_LASX_XVFTINT_W_D
+ UNSPEC_LASX_XVFFINT_S_L
+ UNSPEC_LASX_XVFTINTRZ_W_D
+ UNSPEC_LASX_XVFTINTRP_W_D
+ UNSPEC_LASX_XVFTINTRM_W_D
+ UNSPEC_LASX_XVFTINTRNE_W_D
+ UNSPEC_LASX_XVFTINTH_L_S
+ UNSPEC_LASX_XVFTINTL_L_S
+ UNSPEC_LASX_XVFFINTH_D_W
+ UNSPEC_LASX_XVFFINTL_D_W
+ UNSPEC_LASX_XVFTINTRZH_L_S
+ UNSPEC_LASX_XVFTINTRZL_L_S
+ UNSPEC_LASX_XVFTINTRPH_L_S
+ UNSPEC_LASX_XVFTINTRPL_L_S
+ UNSPEC_LASX_XVFTINTRMH_L_S
+ UNSPEC_LASX_XVFTINTRML_L_S
+ UNSPEC_LASX_XVFTINTRNEL_L_S
+ UNSPEC_LASX_XVFTINTRNEH_L_S
+ UNSPEC_LASX_XVFRINTRNE_S
+ UNSPEC_LASX_XVFRINTRNE_D
+ UNSPEC_LASX_XVFRINTRZ_S
+ UNSPEC_LASX_XVFRINTRZ_D
+ UNSPEC_LASX_XVFRINTRP_S
+ UNSPEC_LASX_XVFRINTRP_D
+ UNSPEC_LASX_XVFRINTRM_S
+ UNSPEC_LASX_XVFRINTRM_D
+ UNSPEC_LASX_XVREPLVE0_Q
+ UNSPEC_LASX_XVPERM_W
+ UNSPEC_LASX_XVPERMI_Q
+ UNSPEC_LASX_XVPERMI_D
+
+ UNSPEC_LASX_XVADDWEV
+ UNSPEC_LASX_XVADDWEV2
+ UNSPEC_LASX_XVADDWEV3
+ UNSPEC_LASX_XVSUBWEV
+ UNSPEC_LASX_XVSUBWEV2
+ UNSPEC_LASX_XVMULWEV
+ UNSPEC_LASX_XVMULWEV2
+ UNSPEC_LASX_XVMULWEV3
+ UNSPEC_LASX_XVADDWOD
+ UNSPEC_LASX_XVADDWOD2
+ UNSPEC_LASX_XVADDWOD3
+ UNSPEC_LASX_XVSUBWOD
+ UNSPEC_LASX_XVSUBWOD2
+ UNSPEC_LASX_XVMULWOD
+ UNSPEC_LASX_XVMULWOD2
+ UNSPEC_LASX_XVMULWOD3
+ UNSPEC_LASX_XVMADDWEV
+ UNSPEC_LASX_XVMADDWEV2
+ UNSPEC_LASX_XVMADDWEV3
+ UNSPEC_LASX_XVMADDWOD
+ UNSPEC_LASX_XVMADDWOD2
+ UNSPEC_LASX_XVMADDWOD3
+ UNSPEC_LASX_XVHADDW_Q_D
+ UNSPEC_LASX_XVHSUBW_Q_D
+ UNSPEC_LASX_XVHADDW_QU_DU
+ UNSPEC_LASX_XVHSUBW_QU_DU
+ UNSPEC_LASX_XVROTR
+ UNSPEC_LASX_XVADD_Q
+ UNSPEC_LASX_XVSUB_Q
+ UNSPEC_LASX_XVREPLVE
+ UNSPEC_LASX_XVSHUF4
+ UNSPEC_LASX_XVMSKGEZ
+ UNSPEC_LASX_XVMSKNZ
+ UNSPEC_LASX_XVEXTH_Q_D
+ UNSPEC_LASX_XVEXTH_QU_DU
+ UNSPEC_LASX_XVEXTL_Q_D
+ UNSPEC_LASX_XVSRLNI
+ UNSPEC_LASX_XVSRLRNI
+ UNSPEC_LASX_XVSSRLNI
+ UNSPEC_LASX_XVSSRLNI2
+ UNSPEC_LASX_XVSSRLRNI
+ UNSPEC_LASX_XVSSRLRNI2
+ UNSPEC_LASX_XVSRANI
+ UNSPEC_LASX_XVSRARNI
+ UNSPEC_LASX_XVSSRANI
+ UNSPEC_LASX_XVSSRANI2
+ UNSPEC_LASX_XVSSRARNI
+ UNSPEC_LASX_XVSSRARNI2
+ UNSPEC_LASX_XVPERMI
+ UNSPEC_LASX_XVINSVE0
+ UNSPEC_LASX_XVPICKVE
+ UNSPEC_LASX_XVSSRLN
+ UNSPEC_LASX_XVSSRLRN
+ UNSPEC_LASX_XVEXTL_QU_DU
+ UNSPEC_LASX_XVLDI
+ UNSPEC_LASX_XVLDX
+ UNSPEC_LASX_XVSTX
+])
+
+;; All vector modes with 256 bits.
+(define_mode_iterator LASX [V4DF V8SF V4DI V8SI V16HI V32QI])
+
+;; Same as LASX. Used by vcond to iterate two modes.
+(define_mode_iterator LASX_2 [V4DF V8SF V4DI V8SI V16HI V32QI])
+
+;; Only used for splitting insert_d and copy_{u,s}.d.
+(define_mode_iterator LASX_D [V4DI V4DF])
+
+;; Only used for splitting insert_d and copy_{u,s}.d.
+(define_mode_iterator LASX_WD [V4DI V4DF V8SI V8SF])
+
+;; Only used for copy256_{u,s}.w.
+(define_mode_iterator LASX_W [V8SI V8SF])
+
+;; Only integer modes in LASX.
+(define_mode_iterator ILASX [V4DI V8SI V16HI V32QI])
+
+;; As ILASX but excludes V32QI.
+(define_mode_iterator ILASX_DWH [V4DI V8SI V16HI])
+
+;; As LASX but excludes V32QI.
+(define_mode_iterator LASX_DWH [V4DF V8SF V4DI V8SI V16HI])
+
+;; As ILASX but excludes V4DI.
+(define_mode_iterator ILASX_WHB [V8SI V16HI V32QI])
+
+;; Only integer modes equal or larger than a word.
+(define_mode_iterator ILASX_DW [V4DI V8SI])
+
+;; Only integer modes smaller than a word.
+(define_mode_iterator ILASX_HB [V16HI V32QI])
+
+;; Only floating-point modes in LASX.
+(define_mode_iterator FLASX [V4DF V8SF])
+
+;; Only used for immediate set shuffle elements instruction.
+(define_mode_iterator LASX_WHB_W [V8SI V16HI V32QI V8SF])
+
+;; The attribute gives the integer vector mode with same size in Loongson ASX.
+(define_mode_attr VIMODE256
+ [(V4DF "V4DI")
+ (V8SF "V8SI")
+ (V4DI "V4DI")
+ (V8SI "V8SI")
+ (V16HI "V16HI")
+ (V32QI "V32QI")])
+
+;;attribute gives half modes for vector modes.
+;;attribute gives half modes (Same Size) for vector modes.
+(define_mode_attr VHSMODE256
+ [(V16HI "V32QI")
+ (V8SI "V16HI")
+ (V4DI "V8SI")])
+
+;;attribute gives half modes for vector modes.
+(define_mode_attr VHMODE256
+ [(V32QI "V16QI")
+ (V16HI "V8HI")
+ (V8SI "V4SI")
+ (V4DI "V2DI")])
+
+;;attribute gives half float modes for vector modes.
+(define_mode_attr VFHMODE256
+ [(V8SF "V4SF")
+ (V4DF "V2DF")])
+
+;; The attribute gives double modes for vector modes in LASX.
+(define_mode_attr VDMODE256
+ [(V8SI "V4DI")
+ (V16HI "V8SI")
+ (V32QI "V16HI")])
+
+;; extended from VDMODE256
+(define_mode_attr VDMODEEXD256
+ [(V4DI "V4DI")
+ (V8SI "V4DI")
+ (V16HI "V8SI")
+ (V32QI "V16HI")])
+
+;; The attribute gives half modes with same number of elements for vector modes.
+(define_mode_attr VTRUNCMODE256
+ [(V16HI "V16QI")
+ (V8SI "V8HI")
+ (V4DI "V4SI")])
+
+;; Double-sized Vector MODE with same elemet type. "Vector, Enlarged-MODE"
+(define_mode_attr VEMODE256
+ [(V8SF "V16SF")
+ (V8SI "V16SI")
+ (V4DF "V8DF")
+ (V4DI "V8DI")])
+
+;; This attribute gives the mode of the result for "copy_s_b, copy_u_b" etc.
+(define_mode_attr VRES256
+ [(V4DF "DF")
+ (V8SF "SF")
+ (V4DI "DI")
+ (V8SI "SI")
+ (V16HI "SI")
+ (V32QI "SI")])
+
+;; Only used with LASX_D iterator.
+(define_mode_attr lasx_d
+ [(V4DI "reg_or_0")
+ (V4DF "register")])
+
+;; This attribute gives the 256 bit integer vector mode with same size.
+(define_mode_attr mode256_i
+ [(V4DF "v4di")
+ (V8SF "v8si")
+ (V4DI "v4di")
+ (V8SI "v8si")
+ (V16HI "v16hi")
+ (V32QI "v32qi")])
+
+
+;; This attribute gives the 256 bit float vector mode with same size.
+(define_mode_attr mode256_f
+ [(V4DF "v4df")
+ (V8SF "v8sf")
+ (V4DI "v4df")
+ (V8SI "v8sf")])
+
+ ;; This attribute gives suffix for LASX instructions. HOW?
+(define_mode_attr lasxfmt
+ [(V4DF "d")
+ (V8SF "w")
+ (V4DI "d")
+ (V8SI "w")
+ (V16HI "h")
+ (V32QI "b")])
+
+(define_mode_attr flasxfmt
+ [(V4DF "d")
+ (V8SF "s")])
+
+(define_mode_attr lasxfmt_u
+ [(V4DF "du")
+ (V8SF "wu")
+ (V4DI "du")
+ (V8SI "wu")
+ (V16HI "hu")
+ (V32QI "bu")])
+
+(define_mode_attr ilasxfmt
+ [(V4DF "l")
+ (V8SF "w")])
+
+(define_mode_attr ilasxfmt_u
+ [(V4DF "lu")
+ (V8SF "wu")])
+
+;; This attribute gives suffix for integers in VHMODE256.
+(define_mode_attr hlasxfmt
+ [(V4DI "w")
+ (V8SI "h")
+ (V16HI "b")])
+
+(define_mode_attr hlasxfmt_u
+ [(V4DI "wu")
+ (V8SI "hu")
+ (V16HI "bu")])
+
+;; This attribute gives suffix for integers in VHSMODE256.
+(define_mode_attr hslasxfmt
+ [(V4DI "w")
+ (V8SI "h")
+ (V16HI "b")])
+
+;; This attribute gives define_insn suffix for LASX instructions that need
+;; distinction between integer and floating point.
+(define_mode_attr lasxfmt_f
+ [(V4DF "d_f")
+ (V8SF "w_f")
+ (V4DI "d")
+ (V8SI "w")
+ (V16HI "h")
+ (V32QI "b")])
+
+(define_mode_attr flasxfmt_f
+ [(V4DF "d_f")
+ (V8SF "s_f")
+ (V4DI "d")
+ (V8SI "w")
+ (V16HI "h")
+ (V32QI "b")])
+
+;; This attribute gives define_insn suffix for LASX instructions that need
+;; distinction between integer and floating point.
+(define_mode_attr lasxfmt_f_wd
+ [(V4DF "d_f")
+ (V8SF "w_f")
+ (V4DI "d")
+ (V8SI "w")])
+
+;; This attribute gives suffix for integers in VHMODE256.
+(define_mode_attr dlasxfmt
+ [(V8SI "d")
+ (V16HI "w")
+ (V32QI "h")])
+
+(define_mode_attr dlasxfmt_u
+ [(V8SI "du")
+ (V16HI "wu")
+ (V32QI "hu")])
+
+;; for VDMODEEXD256
+(define_mode_attr dlasxqfmt
+ [(V4DI "q")
+ (V8SI "d")
+ (V16HI "w")
+ (V32QI "h")])
+
+;; This is used to form an immediate operand constraint using
+;; "const_<indeximm256>_operand".
+(define_mode_attr indeximm256
+ [(V4DF "0_to_3")
+ (V8SF "0_to_7")
+ (V4DI "0_to_3")
+ (V8SI "0_to_7")
+ (V16HI "uimm4")
+ (V32QI "uimm5")])
+
+;; This is used to form an immediate operand constraint using to ref high half
+;; "const_<indeximm_hi>_operand".
+(define_mode_attr indeximm_hi
+ [(V4DF "2_or_3")
+ (V8SF "4_to_7")
+ (V4DI "2_or_3")
+ (V8SI "4_to_7")
+ (V16HI "8_to_15")
+ (V32QI "16_to_31")])
+
+;; This is used to form an immediate operand constraint using to ref low half
+;; "const_<indeximm_lo>_operand".
+(define_mode_attr indeximm_lo
+ [(V4DF "0_or_1")
+ (V8SF "0_to_3")
+ (V4DI "0_or_1")
+ (V8SI "0_to_3")
+ (V16HI "uimm3")
+ (V32QI "uimm4")])
+
+;; This attribute represents bitmask needed for vec_merge using in lasx
+;; "const_<bitmask256>_operand".
+(define_mode_attr bitmask256
+ [(V4DF "exp_4")
+ (V8SF "exp_8")
+ (V4DI "exp_4")
+ (V8SI "exp_8")
+ (V16HI "exp_16")
+ (V32QI "exp_32")])
+
+;; This attribute represents bitmask needed for vec_merge using to ref low half
+;; "const_<bitmask_lo>_operand".
+(define_mode_attr bitmask_lo
+ [(V4DF "exp_2")
+ (V8SF "exp_4")
+ (V4DI "exp_2")
+ (V8SI "exp_4")
+ (V16HI "exp_8")
+ (V32QI "exp_16")])
+
+
+;; This attribute is used to form an immediate operand constraint using
+;; "const_<bitimm256>_operand".
+(define_mode_attr bitimm256
+ [(V32QI "uimm3")
+ (V16HI "uimm4")
+ (V8SI "uimm5")
+ (V4DI "uimm6")])
+
+
+(define_mode_attr d2lasxfmt
+ [(V8SI "q")
+ (V16HI "d")
+ (V32QI "w")])
+
+(define_mode_attr d2lasxfmt_u
+ [(V8SI "qu")
+ (V16HI "du")
+ (V32QI "wu")])
+
+(define_mode_attr VD2MODE256
+ [(V8SI "V4DI")
+ (V16HI "V4DI")
+ (V32QI "V8SI")])
+
+(define_mode_attr lasxfmt_wd
+ [(V4DI "d")
+ (V8SI "w")
+ (V16HI "w")
+ (V32QI "w")])
+
+(define_int_iterator FRINT256_S [UNSPEC_LASX_XVFRINTRP_S
+ UNSPEC_LASX_XVFRINTRZ_S
+ UNSPEC_LASX_XVFRINT
+ UNSPEC_LASX_XVFRINTRM_S])
+
+(define_int_iterator FRINT256_D [UNSPEC_LASX_XVFRINTRP_D
+ UNSPEC_LASX_XVFRINTRZ_D
+ UNSPEC_LASX_XVFRINT
+ UNSPEC_LASX_XVFRINTRM_D])
+
+(define_int_attr frint256_pattern_s
+ [(UNSPEC_LASX_XVFRINTRP_S "ceil")
+ (UNSPEC_LASX_XVFRINTRZ_S "btrunc")
+ (UNSPEC_LASX_XVFRINT "rint")
+ (UNSPEC_LASX_XVFRINTRM_S "floor")])
+
+(define_int_attr frint256_pattern_d
+ [(UNSPEC_LASX_XVFRINTRP_D "ceil")
+ (UNSPEC_LASX_XVFRINTRZ_D "btrunc")
+ (UNSPEC_LASX_XVFRINT "rint")
+ (UNSPEC_LASX_XVFRINTRM_D "floor")])
+
+(define_int_attr frint256_suffix
+ [(UNSPEC_LASX_XVFRINTRP_S "rp")
+ (UNSPEC_LASX_XVFRINTRP_D "rp")
+ (UNSPEC_LASX_XVFRINTRZ_S "rz")
+ (UNSPEC_LASX_XVFRINTRZ_D "rz")
+ (UNSPEC_LASX_XVFRINT "")
+ (UNSPEC_LASX_XVFRINTRM_S "rm")
+ (UNSPEC_LASX_XVFRINTRM_D "rm")])
+
+(define_expand "vec_init<mode><unitmode>"
+ [(match_operand:LASX 0 "register_operand")
+ (match_operand:LASX 1 "")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vector_init (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_initv32qiv16qi"
+ [(match_operand:V32QI 0 "register_operand")
+ (match_operand:V16QI 1 "")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vector_group_init (operands[0], operands[1]);
+ DONE;
+})
+
+;; FIXME: Delete.
+(define_insn "vec_pack_trunc_<mode>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (vec_concat:<VHSMODE256>
+ (truncate:<VTRUNCMODE256>
+ (match_operand:ILASX_DWH 1 "register_operand" "f"))
+ (truncate:<VTRUNCMODE256>
+ (match_operand:ILASX_DWH 2 "register_operand" "f"))))]
+ "ISA_HAS_LASX"
+ "xvpickev.<hslasxfmt>\t%u0,%u2,%u1\n\txvpermi.d\t%u0,%u0,0xd8"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "8")])
+
+(define_expand "vec_unpacks_hi_v8sf"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (match_dup 2))))]
+ "ISA_HAS_LASX"
+{
+ operands[2] = loongarch_lsx_vec_parallel_const_half (V8SFmode,
+ true/*high_p*/);
+})
+
+(define_expand "vec_unpacks_lo_v8sf"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (match_dup 2))))]
+ "ISA_HAS_LASX"
+{
+ operands[2] = loongarch_lsx_vec_parallel_const_half (V8SFmode,
+ false/*high_p*/);
+})
+
+(define_expand "vec_unpacks_hi_<mode>"
+ [(match_operand:<VDMODE256> 0 "register_operand")
+ (match_operand:ILASX_WHB 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vec_unpack (operands, false/*unsigned_p*/,
+ true/*high_p*/);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_<mode>"
+ [(match_operand:<VDMODE256> 0 "register_operand")
+ (match_operand:ILASX_WHB 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_<mode>"
+ [(match_operand:<VDMODE256> 0 "register_operand")
+ (match_operand:ILASX_WHB 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_<mode>"
+ [(match_operand:<VDMODE256> 0 "register_operand")
+ (match_operand:ILASX_WHB 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/);
+ DONE;
+})
+
+(define_insn "lasx_xvinsgr2vr_<lasxfmt_f_wd>"
+ [(set (match_operand:ILASX_DW 0 "register_operand" "=f")
+ (vec_merge:ILASX_DW
+ (vec_duplicate:ILASX_DW
+ (match_operand:<UNITMODE> 1 "reg_or_0_operand" "rJ"))
+ (match_operand:ILASX_DW 2 "register_operand" "0")
+ (match_operand 3 "const_<bitmask256>_operand" "")))]
+ "ISA_HAS_LASX"
+{
+#if 0
+ if (!TARGET_64BIT && (<MODE>mode == V4DImode || <MODE>mode == V4DFmode))
+ return "#";
+ else
+#endif
+ return "xvinsgr2vr.<lasxfmt>\t%u0,%z1,%y3";
+}
+ [(set_attr "type" "simd_insert")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "vec_concatv4di"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return "xvpermi.q\t%u0,%u2,0x20";
+}
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "vec_concatv8si"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return "xvpermi.q\t%u0,%u2,0x20";
+}
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "vec_concatv16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return "xvpermi.q\t%u0,%u2,0x20";
+}
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "vec_concatv32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return "xvpermi.q\t%u0,%u2,0x20";
+}
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "vec_concatv4df"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return "xvpermi.q\t%u0,%u2,0x20";
+}
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "vec_concatv8sf"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return "xvpermi.q\t%u0,%u2,0x20";
+}
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V4DI")])
+
+;; xshuf.w
+(define_insn "lasx_xvperm_<lasxfmt_f_wd>"
+ [(set (match_operand:LASX_W 0 "register_operand" "=f")
+ (unspec:LASX_W
+ [(match_operand:LASX_W 1 "nonimmediate_operand" "f")
+ (match_operand:V8SI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVPERM_W))]
+ "ISA_HAS_LASX"
+ "xvperm.w\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+;; xvpermi.d
+(define_insn "lasx_xvpermi_d_<LASX:mode>"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (unspec:LASX
+ [(match_operand:LASX 1 "register_operand" "f")
+ (match_operand:SI 2 "const_uimm8_operand")]
+ UNSPEC_LASX_XVPERMI_D))]
+ "ISA_HAS_LASX"
+ "xvpermi.d\t%u0,%u1,%2"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvpermi_d_<mode>_1"
+ [(set (match_operand:LASX_D 0 "register_operand" "=f")
+ (vec_select:LASX_D
+ (match_operand:LASX_D 1 "register_operand" "f")
+ (parallel [(match_operand 2 "const_0_to_3_operand")
+ (match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_0_to_3_operand")])))]
+ "ISA_HAS_LASX"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+ return "xvpermi.d\t%u0,%u1,%2";
+}
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+;; xvpermi.q
+(define_insn "lasx_xvpermi_q_<LASX:mode>"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (unspec:LASX
+ [(match_operand:LASX 1 "register_operand" "0")
+ (match_operand:LASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand")]
+ UNSPEC_LASX_XVPERMI_Q))]
+ "ISA_HAS_LASX"
+ "xvpermi.q\t%u0,%u2,%3"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvpickve2gr_d<u>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (any_extend:DI
+ (vec_select:DI
+ (match_operand:V4DI 1 "register_operand" "f")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")]))))]
+ "ISA_HAS_LASX"
+ "xvpickve2gr.d<u>\t%0,%u1,%2"
+ [(set_attr "type" "simd_copy")
+ (set_attr "mode" "V4DI")])
+
+(define_expand "vec_set<mode>"
+ [(match_operand:ILASX_DW 0 "register_operand")
+ (match_operand:<UNITMODE> 1 "reg_or_0_operand")
+ (match_operand 2 "const_<indeximm256>_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx index = GEN_INT (1 << INTVAL (operands[2]));
+ emit_insn (gen_lasx_xvinsgr2vr_<lasxfmt_f_wd> (operands[0], operands[1],
+ operands[0], index));
+ DONE;
+})
+
+(define_expand "vec_set<mode>"
+ [(match_operand:FLASX 0 "register_operand")
+ (match_operand:<UNITMODE> 1 "reg_or_0_operand")
+ (match_operand 2 "const_<indeximm256>_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx index = GEN_INT (1 << INTVAL (operands[2]));
+ emit_insn (gen_lasx_xvinsve0_<lasxfmt_f>_scalar (operands[0], operands[1],
+ operands[0], index));
+ DONE;
+})
+
+(define_expand "vec_extract<mode><unitmode>"
+ [(match_operand:<UNITMODE> 0 "register_operand")
+ (match_operand:LASX 1 "register_operand")
+ (match_operand 2 "const_<indeximm256>_operand")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vector_extract (operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_perm<mode>"
+ [(match_operand:LASX 0 "register_operand")
+ (match_operand:LASX 1 "register_operand")
+ (match_operand:LASX 2 "register_operand")
+ (match_operand:<VIMODE256> 3 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vec_perm_1 (operands);
+ DONE;
+})
+
+;; FIXME: 256??
+(define_expand "vcondu<LASX:mode><ILASX:mode>"
+ [(match_operand:LASX 0 "register_operand")
+ (match_operand:LASX 1 "reg_or_m1_operand")
+ (match_operand:LASX 2 "reg_or_0_operand")
+ (match_operator 3 ""
+ [(match_operand:ILASX 4 "register_operand")
+ (match_operand:ILASX 5 "register_operand")])]
+ "ISA_HAS_LASX
+ && (GET_MODE_NUNITS (<LASX:MODE>mode)
+ == GET_MODE_NUNITS (<ILASX:MODE>mode))"
+{
+ loongarch_expand_vec_cond_expr (<LASX:MODE>mode, <LASX:VIMODE256>mode,
+ operands);
+ DONE;
+})
+
+;; FIXME: 256??
+(define_expand "vcond<LASX:mode><LASX_2:mode>"
+ [(match_operand:LASX 0 "register_operand")
+ (match_operand:LASX 1 "reg_or_m1_operand")
+ (match_operand:LASX 2 "reg_or_0_operand")
+ (match_operator 3 ""
+ [(match_operand:LASX_2 4 "register_operand")
+ (match_operand:LASX_2 5 "register_operand")])]
+ "ISA_HAS_LASX
+ && (GET_MODE_NUNITS (<LASX:MODE>mode)
+ == GET_MODE_NUNITS (<LASX_2:MODE>mode))"
+{
+ loongarch_expand_vec_cond_expr (<LASX:MODE>mode, <LASX:VIMODE256>mode,
+ operands);
+ DONE;
+})
+
+;; Same as vcond_
+(define_expand "vcond_mask_<ILASX:mode><ILASX:mode>"
+ [(match_operand:ILASX 0 "register_operand")
+ (match_operand:ILASX 1 "reg_or_m1_operand")
+ (match_operand:ILASX 2 "reg_or_0_operand")
+ (match_operand:ILASX 3 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ loongarch_expand_vec_cond_mask_expr (<ILASX:MODE>mode,
+ <ILASX:VIMODE256>mode, operands);
+ DONE;
+})
+
+(define_expand "lasx_xvrepli<mode>"
+ [(match_operand:ILASX 0 "register_operand")
+ (match_operand 1 "const_imm10_operand")]
+ "ISA_HAS_LASX"
+{
+ if (<MODE>mode == V32QImode)
+ operands[1] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]),
+ <UNITMODE>mode));
+ emit_move_insn (operands[0],
+ loongarch_gen_const_int_vector (<MODE>mode, INTVAL (operands[1])));
+ DONE;
+})
+
+(define_expand "mov<mode>"
+ [(set (match_operand:LASX 0)
+ (match_operand:LASX 1))]
+ "ISA_HAS_LASX"
+{
+ if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
+ DONE;
+})
+
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:LASX 0)
+ (match_operand:LASX 1))]
+ "ISA_HAS_LASX"
+{
+ if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
+ DONE;
+})
+
+;; 256-bit LASX modes can only exist in LASX registers or memory.
+(define_insn "mov<mode>_lasx"
+ [(set (match_operand:LASX 0 "nonimmediate_operand" "=f,f,R,*r,*f")
+ (match_operand:LASX 1 "move_operand" "fYGYI,R,f,*f,*r"))]
+ "ISA_HAS_LASX"
+ { return loongarch_output_move (operands[0], operands[1]); }
+ [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "8,4,4,4,4")])
+
+
+(define_split
+ [(set (match_operand:LASX 0 "nonimmediate_operand")
+ (match_operand:LASX 1 "move_operand"))]
+ "reload_completed && ISA_HAS_LASX
+ && loongarch_split_move_insn_p (operands[0], operands[1])"
+ [(const_int 0)]
+{
+ loongarch_split_move_insn (operands[0], operands[1], curr_insn);
+ DONE;
+})
+
+;; Offset load
+(define_expand "lasx_mxld_<lasxfmt_f>"
+ [(match_operand:LASX 0 "register_operand")
+ (match_operand 1 "pmode_register_operand")
+ (match_operand 2 "aq10<lasxfmt>_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
+ INTVAL (operands[2]));
+ loongarch_emit_move (operands[0], gen_rtx_MEM (<MODE>mode, addr));
+ DONE;
+})
+
+;; Offset store
+(define_expand "lasx_mxst_<lasxfmt_f>"
+ [(match_operand:LASX 0 "register_operand")
+ (match_operand 1 "pmode_register_operand")
+ (match_operand 2 "aq10<lasxfmt>_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
+ INTVAL (operands[2]));
+ loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands[0]);
+ DONE;
+})
+
+;; LASX
+(define_insn "add<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f,f")
+ (plus:ILASX
+ (match_operand:ILASX 1 "register_operand" "f,f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_ximm5_operand" "f,Unv5,Uuv5")))]
+ "ISA_HAS_LASX"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "xvadd.<lasxfmt>\t%u0,%u1,%u2";
+ case 1:
+ {
+ HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (operands[2], 0));
+
+ operands[2] = GEN_INT (-val);
+ return "xvsubi.<lasxfmt_u>\t%u0,%u1,%d2";
+ }
+ case 2:
+ return "xvaddi.<lasxfmt_u>\t%u0,%u1,%E2";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "alu_type" "simd_add")
+ (set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sub<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (minus:ILASX
+ (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))]
+ "ISA_HAS_LASX"
+ "@
+ xvsub.<lasxfmt>\t%u0,%u1,%u2
+ xvsubi.<lasxfmt_u>\t%u0,%u1,%E2"
+ [(set_attr "alu_type" "simd_add")
+ (set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "mul<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (mult:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvmul.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_mul")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvmadd_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (plus:ILASX (mult:ILASX (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand:ILASX 3 "register_operand" "f"))
+ (match_operand:ILASX 1 "register_operand" "0")))]
+ "ISA_HAS_LASX"
+ "xvmadd.<lasxfmt>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_mul")
+ (set_attr "mode" "<MODE>")])
+
+
+
+(define_insn "lasx_xvmsub_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (minus:ILASX (match_operand:ILASX 1 "register_operand" "0")
+ (mult:ILASX (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand:ILASX 3 "register_operand" "f"))))]
+ "ISA_HAS_LASX"
+ "xvmsub.<lasxfmt>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_mul")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "div<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (div:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return loongarch_lsx_output_division ("xvdiv.<lasxfmt>\t%u0,%u1,%u2",
+ operands);
+}
+ [(set_attr "type" "simd_div")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "udiv<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (udiv:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return loongarch_lsx_output_division ("xvdiv.<lasxfmt_u>\t%u0,%u1,%u2",
+ operands);
+}
+ [(set_attr "type" "simd_div")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "mod<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (mod:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return loongarch_lsx_output_division ("xvmod.<lasxfmt>\t%u0,%u1,%u2",
+ operands);
+}
+ [(set_attr "type" "simd_div")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "umod<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (umod:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+{
+ return loongarch_lsx_output_division ("xvmod.<lasxfmt_u>\t%u0,%u1,%u2",
+ operands);
+}
+ [(set_attr "type" "simd_div")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "xor<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f,f")
+ (xor:ILASX
+ (match_operand:ILASX 1 "register_operand" "f,f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
+ "ISA_HAS_LASX"
+ "@
+ xvxor.v\t%u0,%u1,%u2
+ xvbitrevi.%v0\t%u0,%u1,%V2
+ xvxori.b\t%u0,%u1,%B2"
+ [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "ior<mode>3"
+ [(set (match_operand:LASX 0 "register_operand" "=f,f,f")
+ (ior:LASX
+ (match_operand:LASX 1 "register_operand" "f,f,f")
+ (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
+ "ISA_HAS_LASX"
+ "@
+ xvor.v\t%u0,%u1,%u2
+ xvbitseti.%v0\t%u0,%u1,%V2
+ xvori.b\t%u0,%u1,%B2"
+ [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "and<mode>3"
+ [(set (match_operand:LASX 0 "register_operand" "=f,f,f")
+ (and:LASX
+ (match_operand:LASX 1 "register_operand" "f,f,f")
+ (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))]
+ "ISA_HAS_LASX"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "xvand.v\t%u0,%u1,%u2";
+ case 1:
+ {
+ rtx elt0 = CONST_VECTOR_ELT (operands[2], 0);
+ unsigned HOST_WIDE_INT val = ~UINTVAL (elt0);
+ operands[2] = loongarch_gen_const_int_vector (<MODE>mode, val & (-val));
+ return "xvbitclri.%v0\t%u0,%u1,%V2";
+ }
+ case 2:
+ return "xvandi.b\t%u0,%u1,%B2";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "one_cmpl<mode>2"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (not:ILASX (match_operand:ILASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvnor.v\t%u0,%u1,%u1"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "V32QI")])
+
+;; LASX
+(define_insn "vlshr<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (lshiftrt:ILASX
+ (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ "ISA_HAS_LASX"
+ "@
+ xvsrl.<lasxfmt>\t%u0,%u1,%u2
+ xvsrli.<lasxfmt>\t%u0,%u1,%E2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+;; LASX ">>"
+(define_insn "vashr<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (ashiftrt:ILASX
+ (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ "ISA_HAS_LASX"
+ "@
+ xvsra.<lasxfmt>\t%u0,%u1,%u2
+ xvsrai.<lasxfmt>\t%u0,%u1,%E2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+;; LASX "<<"
+(define_insn "vashl<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (ashift:ILASX
+ (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ "ISA_HAS_LASX"
+ "@
+ xvsll.<lasxfmt>\t%u0,%u1,%u2
+ xvslli.<lasxfmt>\t%u0,%u1,%E2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+
+(define_insn "add<mode>3"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (plus:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfadd.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sub<mode>3"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (minus:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfsub.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "mul<mode>3"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (mult:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfmul.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fmul")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "div<mode>3"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (div:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfdiv.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fma<mode>4"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (fma:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")
+ (match_operand:FLASX 3 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfmadd.<flasxfmt>\t%u0,%u1,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fnma<mode>4"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (fma:FLASX (neg:FLASX (match_operand:FLASX 1 "register_operand" "f"))
+ (match_operand:FLASX 2 "register_operand" "f")
+ (match_operand:FLASX 3 "register_operand" "0")))]
+ "ISA_HAS_LASX"
+ "xvfnmsub.<flasxfmt>\t%u0,%u1,%u2,%u0"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sqrt<mode>2"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (sqrt:FLASX (match_operand:FLASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfsqrt.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvadda_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (plus:ILASX (abs:ILASX (match_operand:ILASX 1 "register_operand" "f"))
+ (abs:ILASX (match_operand:ILASX 2 "register_operand" "f"))))]
+ "ISA_HAS_LASX"
+ "xvadda.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "ssadd<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (ss_plus:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvsadd.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "usadd<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (us_plus:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvsadd.<lasxfmt_u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvabsd_s_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVABSD_S))]
+ "ISA_HAS_LASX"
+ "xvabsd.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvabsd_u_<lasxfmt_u>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVABSD_U))]
+ "ISA_HAS_LASX"
+ "xvabsd.<lasxfmt_u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvavg_s_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVAVG_S))]
+ "ISA_HAS_LASX"
+ "xvavg.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvavg_u_<lasxfmt_u>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVAVG_U))]
+ "ISA_HAS_LASX"
+ "xvavg.<lasxfmt_u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvavgr_s_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVAVGR_S))]
+ "ISA_HAS_LASX"
+ "xvavgr.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvavgr_u_<lasxfmt_u>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVAVGR_U))]
+ "ISA_HAS_LASX"
+ "xvavgr.<lasxfmt_u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvbitclr_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVBITCLR))]
+ "ISA_HAS_LASX"
+ "xvbitclr.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvbitclri_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVBITCLRI))]
+ "ISA_HAS_LASX"
+ "xvbitclri.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvbitrev_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVBITREV))]
+ "ISA_HAS_LASX"
+ "xvbitrev.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvbitrevi_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVBITREVI))]
+ "ISA_HAS_LASX"
+ "xvbitrevi.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvbitsel_<lasxfmt_f>"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (ior:LASX (and:LASX (not:LASX
+ (match_operand:LASX 3 "register_operand" "f"))
+ (match_operand:LASX 1 "register_operand" "f"))
+ (and:LASX (match_dup 3)
+ (match_operand:LASX 2 "register_operand" "f"))))]
+ "ISA_HAS_LASX"
+ "xvbitsel.v\t%u0,%u1,%u2,%u3"
+ [(set_attr "type" "simd_bitmov")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvbitseli_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (ior:V32QI (and:V32QI (not:V32QI
+ (match_operand:V32QI 1 "register_operand" "0"))
+ (match_operand:V32QI 2 "register_operand" "f"))
+ (and:V32QI (match_dup 1)
+ (match_operand:V32QI 3 "const_vector_same_val_operand" "Urv8"))))]
+ "ISA_HAS_LASX"
+ "xvbitseli.b\t%u0,%u2,%B3"
+ [(set_attr "type" "simd_bitmov")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvbitset_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVBITSET))]
+ "ISA_HAS_LASX"
+ "xvbitset.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvbitseti_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVBITSETI))]
+ "ISA_HAS_LASX"
+ "xvbitseti.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvs<ICC:icc>_<ILASX:lasxfmt><cmpi_1>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (ICC:ILASX
+ (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_<ICC:cmpi>imm5_operand" "f,U<ICC:cmpi>v5")))]
+ "ISA_HAS_LASX"
+ "@
+ xvs<ICC:icc>.<ILASX:lasxfmt><cmpi_1>\t%u0,%u1,%u2
+ xvs<ICC:icci>.<ILASX:lasxfmt><cmpi_1>\t%u0,%u1,%E2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "vec_cmp<mode><mode256_i>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand")
+ (match_operator 1 ""
+ [(match_operand:LASX 2 "register_operand")
+ (match_operand:LASX 3 "register_operand")]))]
+ "ISA_HAS_LASX"
+{
+ bool ok = loongarch_expand_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmpu<ILASX:mode><mode256_i>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand")
+ (match_operator 1 ""
+ [(match_operand:ILASX 2 "register_operand")
+ (match_operand:ILASX 3 "register_operand")]))]
+ "ISA_HAS_LASX"
+{
+ bool ok = loongarch_expand_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_insn "lasx_xvfclass_<flasxfmt>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFCLASS))]
+ "ISA_HAS_LASX"
+ "xvfclass.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fclass")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfcmp_caf_<flasxfmt>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFCMP_CAF))]
+ "ISA_HAS_LASX"
+ "xvfcmp.caf.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fcmp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfcmp_cune_<FLASX:flasxfmt>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFCMP_CUNE))]
+ "ISA_HAS_LASX"
+ "xvfcmp.cune.<FLASX:flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fcmp")
+ (set_attr "mode" "<MODE>")])
+
+
+
+(define_int_iterator FSC256_UNS [UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SUN
+ UNSPEC_LASX_XVFCMP_SOR UNSPEC_LASX_XVFCMP_SEQ
+ UNSPEC_LASX_XVFCMP_SNE UNSPEC_LASX_XVFCMP_SUEQ
+ UNSPEC_LASX_XVFCMP_SUNE UNSPEC_LASX_XVFCMP_SULE
+ UNSPEC_LASX_XVFCMP_SULT UNSPEC_LASX_XVFCMP_SLE
+ UNSPEC_LASX_XVFCMP_SLT])
+
+(define_int_attr fsc256
+ [(UNSPEC_LASX_XVFCMP_SAF "saf")
+ (UNSPEC_LASX_XVFCMP_SUN "sun")
+ (UNSPEC_LASX_XVFCMP_SOR "sor")
+ (UNSPEC_LASX_XVFCMP_SEQ "seq")
+ (UNSPEC_LASX_XVFCMP_SNE "sne")
+ (UNSPEC_LASX_XVFCMP_SUEQ "sueq")
+ (UNSPEC_LASX_XVFCMP_SUNE "sune")
+ (UNSPEC_LASX_XVFCMP_SULE "sule")
+ (UNSPEC_LASX_XVFCMP_SULT "sult")
+ (UNSPEC_LASX_XVFCMP_SLE "sle")
+ (UNSPEC_LASX_XVFCMP_SLT "slt")])
+
+(define_insn "lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (vfcond:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfcmp.<vfcond:fcc>.<FLASX:flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fcmp")
+ (set_attr "mode" "<MODE>")])
+
+
+(define_insn "lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")]
+ FSC256_UNS))]
+ "ISA_HAS_LASX"
+ "xvfcmp.<fsc256>.<FLASX:flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fcmp")
+ (set_attr "mode" "<MODE>")])
+
+
+(define_mode_attr fint256
+ [(V8SF "v8si")
+ (V4DF "v4di")])
+
+(define_mode_attr FINTCNV256
+ [(V8SF "I2S")
+ (V4DF "I2D")])
+
+(define_mode_attr FINTCNV256_2
+ [(V8SF "S2I")
+ (V4DF "D2I")])
+
+(define_insn "float<fint256><FLASX:mode>2"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (float:FLASX (match_operand:<VIMODE256> 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvffint.<flasxfmt>.<ilasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "cnv_mode" "<FINTCNV256>")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "floatuns<fint256><FLASX:mode>2"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unsigned_float:FLASX
+ (match_operand:<VIMODE256> 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvffint.<flasxfmt>.<ilasxfmt_u>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "cnv_mode" "<FINTCNV256>")
+ (set_attr "mode" "<MODE>")])
+
+(define_mode_attr FFQ256
+ [(V4SF "V16HI")
+ (V2DF "V8SI")])
+
+(define_insn "lasx_xvreplgr2vr_<lasxfmt_f>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (vec_duplicate:ILASX
+ (match_operand:<UNITMODE> 1 "reg_or_0_operand" "r,J")))]
+ "ISA_HAS_LASX"
+{
+ if (which_alternative == 1)
+ return "xvldi.b\t%u0,0" ;
+
+ if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode))
+ return "#";
+ else
+ return "xvreplgr2vr.<lasxfmt>\t%u0,%z1";
+}
+ [(set_attr "type" "simd_fill")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "8")])
+
+(define_insn "logb<mode>2"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFLOGB))]
+ "ISA_HAS_LASX"
+ "xvflogb.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_flog2")
+ (set_attr "mode" "<MODE>")])
+
+
+(define_insn "smax<mode>3"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (smax:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfmax.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fminmax")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfmaxa_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (if_then_else:FLASX
+ (gt (abs:FLASX (match_operand:FLASX 1 "register_operand" "f"))
+ (abs:FLASX (match_operand:FLASX 2 "register_operand" "f")))
+ (match_dup 1)
+ (match_dup 2)))]
+ "ISA_HAS_LASX"
+ "xvfmaxa.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fminmax")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "smin<mode>3"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (smin:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfmin.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fminmax")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfmina_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (if_then_else:FLASX
+ (lt (abs:FLASX (match_operand:FLASX 1 "register_operand" "f"))
+ (abs:FLASX (match_operand:FLASX 2 "register_operand" "f")))
+ (match_dup 1)
+ (match_dup 2)))]
+ "ISA_HAS_LASX"
+ "xvfmina.<flasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fminmax")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfrecip_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRECIP))]
+ "ISA_HAS_LASX"
+ "xvfrecip.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfrint_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINT))]
+ "ISA_HAS_LASX"
+ "xvfrint.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfrsqrt_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRSQRT))]
+ "ISA_HAS_LASX"
+ "xvfrsqrt.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvftint_s_<ilasxfmt>_<flasxfmt>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINT_S))]
+ "ISA_HAS_LASX"
+ "xvftint.<ilasxfmt>.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "cnv_mode" "<FINTCNV256_2>")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINT_U))]
+ "ISA_HAS_LASX"
+ "xvftint.<ilasxfmt_u>.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "cnv_mode" "<FINTCNV256_2>")
+ (set_attr "mode" "<MODE>")])
+
+
+
+(define_insn "fix_trunc<FLASX:mode><mode256_i>2"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (fix:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvftintrz.<ilasxfmt>.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "cnv_mode" "<FINTCNV256_2>")
+ (set_attr "mode" "<MODE>")])
+
+
+(define_insn "fixuns_trunc<FLASX:mode><mode256_i>2"
+ [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
+ (unsigned_fix:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvftintrz.<ilasxfmt_u>.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "cnv_mode" "<FINTCNV256_2>")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvh<optab>w_h<u>_b<u>"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (addsub:V16HI
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)])))
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)])))))]
+ "ISA_HAS_LASX"
+ "xvh<optab>w.h<u>.b<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_xvh<optab>w_w<u>_h<u>"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (addsub:V8SI
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)])))
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))))]
+ "ISA_HAS_LASX"
+ "xvh<optab>w.w<u>.h<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvh<optab>w_d<u>_w<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (addsub:V4DI
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)])))
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))))]
+ "ISA_HAS_LASX"
+ "xvh<optab>w.d<u>.w<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvpackev_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (vec_select:V32QI
+ (vec_concat:V64QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (match_operand:V32QI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 32)
+ (const_int 2) (const_int 34)
+ (const_int 4) (const_int 36)
+ (const_int 6) (const_int 38)
+ (const_int 8) (const_int 40)
+ (const_int 10) (const_int 42)
+ (const_int 12) (const_int 44)
+ (const_int 14) (const_int 46)
+ (const_int 16) (const_int 48)
+ (const_int 18) (const_int 50)
+ (const_int 20) (const_int 52)
+ (const_int 22) (const_int 54)
+ (const_int 24) (const_int 56)
+ (const_int 26) (const_int 58)
+ (const_int 28) (const_int 60)
+ (const_int 30) (const_int 62)])))]
+ "ISA_HAS_LASX"
+ "xvpackev.b\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V32QI")])
+
+
+(define_insn "lasx_xvpackev_h"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (vec_select:V16HI
+ (vec_concat:V32HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (match_operand:V16HI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 2) (const_int 18)
+ (const_int 4) (const_int 20)
+ (const_int 6) (const_int 22)
+ (const_int 8) (const_int 24)
+ (const_int 10) (const_int 26)
+ (const_int 12) (const_int 28)
+ (const_int 14) (const_int 30)])))]
+ "ISA_HAS_LASX"
+ "xvpackev.h\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_xvpackev_w"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (vec_select:V8SI
+ (vec_concat:V16SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (match_operand:V8SI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 2) (const_int 10)
+ (const_int 4) (const_int 12)
+ (const_int 6) (const_int 14)])))]
+ "ISA_HAS_LASX"
+ "xvpackev.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvpackev_w_f"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (match_operand:V8SF 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 2) (const_int 10)
+ (const_int 4) (const_int 12)
+ (const_int 6) (const_int 14)])))]
+ "ISA_HAS_LASX"
+ "xvpackev.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvilvh_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (vec_select:V32QI
+ (vec_concat:V64QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (match_operand:V32QI 2 "register_operand" "f"))
+ (parallel [(const_int 8) (const_int 40)
+ (const_int 9) (const_int 41)
+ (const_int 10) (const_int 42)
+ (const_int 11) (const_int 43)
+ (const_int 12) (const_int 44)
+ (const_int 13) (const_int 45)
+ (const_int 14) (const_int 46)
+ (const_int 15) (const_int 47)
+ (const_int 24) (const_int 56)
+ (const_int 25) (const_int 57)
+ (const_int 26) (const_int 58)
+ (const_int 27) (const_int 59)
+ (const_int 28) (const_int 60)
+ (const_int 29) (const_int 61)
+ (const_int 30) (const_int 62)
+ (const_int 31) (const_int 63)])))]
+ "ISA_HAS_LASX"
+ "xvilvh.b\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvilvh_h"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (vec_select:V16HI
+ (vec_concat:V32HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (match_operand:V16HI 2 "register_operand" "f"))
+ (parallel [(const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "ISA_HAS_LASX"
+ "xvilvh.h\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V16HI")])
+
+(define_mode_attr xvilvh_suffix
+ [(V8SI "") (V8SF "_f")
+ (V4DI "") (V4DF "_f")])
+
+(define_insn "lasx_xvilvh_w<xvilvh_suffix>"
+ [(set (match_operand:LASX_W 0 "register_operand" "=f")
+ (vec_select:LASX_W
+ (vec_concat:<VEMODE256>
+ (match_operand:LASX_W 1 "register_operand" "f")
+ (match_operand:LASX_W 2 "register_operand" "f"))
+ (parallel [(const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "ISA_HAS_LASX"
+ "xvilvh.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvilvh_d<xvilvh_suffix>"
+ [(set (match_operand:LASX_D 0 "register_operand" "=f")
+ (vec_select:LASX_D
+ (vec_concat:<VEMODE256>
+ (match_operand:LASX_D 1 "register_operand" "f")
+ (match_operand:LASX_D 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))]
+ "ISA_HAS_LASX"
+ "xvilvh.d\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvpackod_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (vec_select:V32QI
+ (vec_concat:V64QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (match_operand:V32QI 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 33)
+ (const_int 3) (const_int 35)
+ (const_int 5) (const_int 37)
+ (const_int 7) (const_int 39)
+ (const_int 9) (const_int 41)
+ (const_int 11) (const_int 43)
+ (const_int 13) (const_int 45)
+ (const_int 15) (const_int 47)
+ (const_int 17) (const_int 49)
+ (const_int 19) (const_int 51)
+ (const_int 21) (const_int 53)
+ (const_int 23) (const_int 55)
+ (const_int 25) (const_int 57)
+ (const_int 27) (const_int 59)
+ (const_int 29) (const_int 61)
+ (const_int 31) (const_int 63)])))]
+ "ISA_HAS_LASX"
+ "xvpackod.b\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V32QI")])
+
+
+(define_insn "lasx_xvpackod_h"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (vec_select:V16HI
+ (vec_concat:V32HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (match_operand:V16HI 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 17)
+ (const_int 3) (const_int 19)
+ (const_int 5) (const_int 21)
+ (const_int 7) (const_int 23)
+ (const_int 9) (const_int 25)
+ (const_int 11) (const_int 27)
+ (const_int 13) (const_int 29)
+ (const_int 15) (const_int 31)])))]
+ "ISA_HAS_LASX"
+ "xvpackod.h\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V16HI")])
+
+
+(define_insn "lasx_xvpackod_w"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (vec_select:V8SI
+ (vec_concat:V16SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (match_operand:V8SI 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 9)
+ (const_int 3) (const_int 11)
+ (const_int 5) (const_int 13)
+ (const_int 7) (const_int 15)])))]
+ "ISA_HAS_LASX"
+ "xvpackod.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SI")])
+
+
+(define_insn "lasx_xvpackod_w_f"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (match_operand:V8SF 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 9)
+ (const_int 3) (const_int 11)
+ (const_int 5) (const_int 13)
+ (const_int 7) (const_int 15)])))]
+ "ISA_HAS_LASX"
+ "xvpackod.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvilvl_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (vec_select:V32QI
+ (vec_concat:V64QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (match_operand:V32QI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 32)
+ (const_int 1) (const_int 33)
+ (const_int 2) (const_int 34)
+ (const_int 3) (const_int 35)
+ (const_int 4) (const_int 36)
+ (const_int 5) (const_int 37)
+ (const_int 6) (const_int 38)
+ (const_int 7) (const_int 39)
+ (const_int 16) (const_int 48)
+ (const_int 17) (const_int 49)
+ (const_int 18) (const_int 50)
+ (const_int 19) (const_int 51)
+ (const_int 20) (const_int 52)
+ (const_int 21) (const_int 53)
+ (const_int 22) (const_int 54)
+ (const_int 23) (const_int 55)])))]
+ "ISA_HAS_LASX"
+ "xvilvl.b\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvilvl_h"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (vec_select:V16HI
+ (vec_concat:V32HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (match_operand:V16HI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)])))]
+ "ISA_HAS_LASX"
+ "xvilvl.h\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_xvilvl_w"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (vec_select:V8SI
+ (vec_concat:V16SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (match_operand:V8SI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)])))]
+ "ISA_HAS_LASX"
+ "xvilvl.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvilvl_w_f"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (match_operand:V8SF 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)])))]
+ "ISA_HAS_LASX"
+ "xvilvl.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvilvl_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (vec_select:V4DI
+ (vec_concat:V8DI
+ (match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))]
+ "ISA_HAS_LASX"
+ "xvilvl.d\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvilvl_d_f"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "f")
+ (match_operand:V4DF 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))]
+ "ISA_HAS_LASX"
+ "xvilvl.d\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "smax<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (smax:ILASX (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_simm5_operand" "f,Usv5")))]
+ "ISA_HAS_LASX"
+ "@
+ xvmax.<lasxfmt>\t%u0,%u1,%u2
+ xvmaxi.<lasxfmt>\t%u0,%u1,%E2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "umax<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (umax:ILASX (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))]
+ "ISA_HAS_LASX"
+ "@
+ xvmax.<lasxfmt_u>\t%u0,%u1,%u2
+ xvmaxi.<lasxfmt_u>\t%u0,%u1,%B2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "smin<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (smin:ILASX (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_simm5_operand" "f,Usv5")))]
+ "ISA_HAS_LASX"
+ "@
+ xvmin.<lasxfmt>\t%u0,%u1,%u2
+ xvmini.<lasxfmt>\t%u0,%u1,%E2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "umin<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (umin:ILASX (match_operand:ILASX 1 "register_operand" "f,f")
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))]
+ "ISA_HAS_LASX"
+ "@
+ xvmin.<lasxfmt_u>\t%u0,%u1,%u2
+ xvmini.<lasxfmt_u>\t%u0,%u1,%B2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvclo_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (clz:ILASX (not:ILASX (match_operand:ILASX 1 "register_operand" "f"))))]
+ "ISA_HAS_LASX"
+ "xvclo.<lasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "clz<mode>2"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (clz:ILASX (match_operand:ILASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvclz.<lasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvnor_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f,f")
+ (and:ILASX (not:ILASX (match_operand:ILASX 1 "register_operand" "f,f"))
+ (not:ILASX (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,Urv8"))))]
+ "ISA_HAS_LASX"
+ "@
+ xvnor.v\t%u0,%u1,%u2
+ xvnori.b\t%u0,%u1,%B2"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvpickev_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (vec_select:V32QI
+ (vec_concat:V64QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (match_operand:V32QI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 32) (const_int 34)
+ (const_int 36) (const_int 38)
+ (const_int 40) (const_int 42)
+ (const_int 44) (const_int 46)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)
+ (const_int 48) (const_int 50)
+ (const_int 52) (const_int 54)
+ (const_int 56) (const_int 58)
+ (const_int 60) (const_int 62)])))]
+ "ISA_HAS_LASX"
+ "xvpickev.b\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvpickev_h"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (vec_select:V16HI
+ (vec_concat:V32HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (match_operand:V16HI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)])))]
+ "ISA_HAS_LASX"
+ "xvpickev.h\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_xvpickev_w"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (vec_select:V8SI
+ (vec_concat:V16SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (match_operand:V8SI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 8) (const_int 10)
+ (const_int 4) (const_int 6)
+ (const_int 12) (const_int 14)])))]
+ "ISA_HAS_LASX"
+ "xvpickev.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvpickev_w_f"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (match_operand:V8SF 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 8) (const_int 10)
+ (const_int 4) (const_int 6)
+ (const_int 12) (const_int 14)])))]
+ "ISA_HAS_LASX"
+ "xvpickev.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvpickod_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (vec_select:V32QI
+ (vec_concat:V64QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (match_operand:V32QI 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 33) (const_int 35)
+ (const_int 37) (const_int 39)
+ (const_int 41) (const_int 43)
+ (const_int 45) (const_int 47)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)
+ (const_int 49) (const_int 51)
+ (const_int 53) (const_int 55)
+ (const_int 57) (const_int 59)
+ (const_int 61) (const_int 63)])))]
+ "ISA_HAS_LASX"
+ "xvpickod.b\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvpickod_h"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (vec_select:V16HI
+ (vec_concat:V32HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (match_operand:V16HI 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)])))]
+ "ISA_HAS_LASX"
+ "xvpickod.h\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_xvpickod_w"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (vec_select:V8SI
+ (vec_concat:V16SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (match_operand:V8SI 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 9) (const_int 11)
+ (const_int 5) (const_int 7)
+ (const_int 13) (const_int 15)])))]
+ "ISA_HAS_LASX"
+ "xvpickod.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvpickod_w_f"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (match_operand:V8SF 2 "register_operand" "f"))
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 9) (const_int 11)
+ (const_int 5) (const_int 7)
+ (const_int 13) (const_int 15)])))]
+ "ISA_HAS_LASX"
+ "xvpickod.w\t%u0,%u2,%u1"
+ [(set_attr "type" "simd_permute")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "popcount<mode>2"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (popcount:ILASX (match_operand:ILASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvpcnt.<lasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_pcnt")
+ (set_attr "mode" "<MODE>")])
+
+
+(define_insn "lasx_xvsat_s_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVSAT_S))]
+ "ISA_HAS_LASX"
+ "xvsat.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_sat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsat_u_<lasxfmt_u>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVSAT_U))]
+ "ISA_HAS_LASX"
+ "xvsat.<lasxfmt_u>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_sat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvshuf4i_<lasxfmt_f>"
+ [(set (match_operand:LASX_WHB_W 0 "register_operand" "=f")
+ (unspec:LASX_WHB_W [(match_operand:LASX_WHB_W 1 "register_operand" "f")
+ (match_operand 2 "const_uimm8_operand")]
+ UNSPEC_LASX_XVSHUF4I))]
+ "ISA_HAS_LASX"
+ "xvshuf4i.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shf")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvshuf4i_<lasxfmt_f>_1"
+ [(set (match_operand:LASX_W 0 "register_operand" "=f")
+ (vec_select:LASX_W
+ (match_operand:LASX_W 1 "nonimmediate_operand" "f")
+ (parallel [(match_operand 2 "const_0_to_3_operand")
+ (match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_0_to_3_operand")
+ (match_operand 6 "const_4_to_7_operand")
+ (match_operand 7 "const_4_to_7_operand")
+ (match_operand 8 "const_4_to_7_operand")
+ (match_operand 9 "const_4_to_7_operand")])))]
+ "ISA_HAS_LASX
+ && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
+ && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
+ && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
+ && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "xvshuf4i.w\t%u0,%u1,%2";
+}
+ [(set_attr "type" "simd_shf")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrar_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSRAR))]
+ "ISA_HAS_LASX"
+ "xvsrar.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrari_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVSRARI))]
+ "ISA_HAS_LASX"
+ "xvsrari.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrlr_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSRLR))]
+ "ISA_HAS_LASX"
+ "xvsrlr.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrlri_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVSRLRI))]
+ "ISA_HAS_LASX"
+ "xvsrlri.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssub_s_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (ss_minus:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvssub.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssub_u_<lasxfmt_u>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (us_minus:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvssub.<lasxfmt_u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvshuf_<lasxfmt_f>"
+ [(set (match_operand:LASX_DWH 0 "register_operand" "=f")
+ (unspec:LASX_DWH [(match_operand:LASX_DWH 1 "register_operand" "0")
+ (match_operand:LASX_DWH 2 "register_operand" "f")
+ (match_operand:LASX_DWH 3 "register_operand" "f")]
+ UNSPEC_LASX_XVSHUF))]
+ "ISA_HAS_LASX"
+ "xvshuf.<lasxfmt>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_sld")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvshuf_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (unspec:V32QI [(match_operand:V32QI 1 "register_operand" "f")
+ (match_operand:V32QI 2 "register_operand" "f")
+ (match_operand:V32QI 3 "register_operand" "f")]
+ UNSPEC_LASX_XVSHUF_B))]
+ "ISA_HAS_LASX"
+ "xvshuf.b\t%u0,%u1,%u2,%u3"
+ [(set_attr "type" "simd_sld")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvreplve0_<lasxfmt_f>"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (vec_duplicate:LASX
+ (vec_select:<UNITMODE>
+ (match_operand:LASX 1 "register_operand" "f")
+ (parallel [(const_int 0)]))))]
+ "ISA_HAS_LASX"
+ "xvreplve0.<lasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvrepl128vei_b_internal"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (vec_duplicate:V32QI
+ (vec_select:V32QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (parallel [(match_operand 2 "const_uimm4_operand" "")
+ (match_dup 2) (match_dup 2) (match_dup 2)
+ (match_dup 2) (match_dup 2) (match_dup 2)
+ (match_dup 2) (match_dup 2) (match_dup 2)
+ (match_dup 2) (match_dup 2) (match_dup 2)
+ (match_dup 2) (match_dup 2) (match_dup 2)
+ (match_operand 3 "const_16_to_31_operand" "")
+ (match_dup 3) (match_dup 3) (match_dup 3)
+ (match_dup 3) (match_dup 3) (match_dup 3)
+ (match_dup 3) (match_dup 3) (match_dup 3)
+ (match_dup 3) (match_dup 3) (match_dup 3)
+ (match_dup 3) (match_dup 3) (match_dup 3)]))))]
+ "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 16)"
+ "xvrepl128vei.b\t%u0,%u1,%2"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvrepl128vei_h_internal"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (vec_duplicate:V16HI
+ (vec_select:V16HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (parallel [(match_operand 2 "const_uimm3_operand" "")
+ (match_dup 2) (match_dup 2) (match_dup 2)
+ (match_dup 2) (match_dup 2) (match_dup 2)
+ (match_dup 2)
+ (match_operand 3 "const_8_to_15_operand" "")
+ (match_dup 3) (match_dup 3) (match_dup 3)
+ (match_dup 3) (match_dup 3) (match_dup 3)
+ (match_dup 3)]))))]
+ "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 8)"
+ "xvrepl128vei.h\t%u0,%u1,%2"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_xvrepl128vei_w_internal"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (vec_duplicate:V8SI
+ (vec_select:V8SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_dup 2) (match_dup 2) (match_dup 2)
+ (match_operand 3 "const_4_to_7_operand" "")
+ (match_dup 3) (match_dup 3) (match_dup 3)]))))]
+ "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 4)"
+ "xvrepl128vei.w\t%u0,%u1,%2"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvrepl128vei_d_internal"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (vec_duplicate:V4DI
+ (vec_select:V4DI
+ (match_operand:V4DI 1 "register_operand" "f")
+ (parallel [(match_operand 2 "const_0_or_1_operand" "")
+ (match_dup 2)
+ (match_operand 3 "const_2_or_3_operand" "")
+ (match_dup 3)]))))]
+ "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 2)"
+ "xvrepl128vei.d\t%u0,%u1,%2"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvrepl128vei_<lasxfmt_f>"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (unspec:LASX [(match_operand:LASX 1 "register_operand" "f")
+ (match_operand 2 "const_<indeximm_lo>_operand" "")]
+ UNSPEC_LASX_XVREPL128VEI))]
+ "ISA_HAS_LASX"
+ "xvrepl128vei.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvreplve0_<lasxfmt_f>_scalar"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (vec_duplicate:FLASX
+ (match_operand:<UNITMODE> 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvreplve0.<lasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvreplve0_q"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (unspec:V32QI [(match_operand:V32QI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVREPLVE0_Q))]
+ "ISA_HAS_LASX"
+ "xvreplve0.q\t%u0,%u1"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvfcvt_h_s"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (unspec:V16HI [(match_operand:V8SF 1 "register_operand" "f")
+ (match_operand:V8SF 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFCVT))]
+ "ISA_HAS_LASX"
+ "xvfcvt.h.s\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_xvfcvt_s_d"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V4DF 1 "register_operand" "f")
+ (match_operand:V4DF 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFCVT))]
+ "ISA_HAS_LASX"
+ "xvfcvt.s.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_pack_trunc_v4df"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (vec_concat:V8SF
+ (float_truncate:V4SF (match_operand:V4DF 1 "register_operand" "f"))
+ (float_truncate:V4SF (match_operand:V4DF 2 "register_operand" "f"))))]
+ "ISA_HAS_LASX"
+ "xvfcvt.s.d\t%u0,%u2,%u1\n\txvpermi.d\t%u0,%u0,0xd8"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V8SF")
+ (set_attr "length" "8")])
+
+;; Define for builtin function.
+(define_insn "lasx_xvfcvth_s_h"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V16HI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFCVTH))]
+ "ISA_HAS_LASX"
+ "xvfcvth.s.h\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V8SF")])
+
+;; Define for builtin function.
+(define_insn "lasx_xvfcvth_d_s"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 6) (const_int 7)]))))]
+ "ISA_HAS_LASX"
+ "xvfcvth.d.s\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V4DF")
+ (set_attr "length" "12")])
+
+;; Define for gen insn.
+(define_insn "lasx_xvfcvth_d_insn"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "ISA_HAS_LASX"
+ "xvpermi.d\t%u0,%u1,0xfa\n\txvfcvtl.d.s\t%u0,%u0"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V4DF")
+ (set_attr "length" "12")])
+
+;; Define for builtin function.
+(define_insn "lasx_xvfcvtl_s_h"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V16HI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFCVTL))]
+ "ISA_HAS_LASX"
+ "xvfcvtl.s.h\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V8SF")])
+
+;; Define for builtin function.
+(define_insn "lasx_xvfcvtl_d_s"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 4) (const_int 5)]))))]
+ "ISA_HAS_LASX"
+ "xvfcvtl.d.s\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V4DF")
+ (set_attr "length" "8")])
+
+;; Define for gen insn.
+(define_insn "lasx_xvfcvtl_d_insn"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "ISA_HAS_LASX"
+ "xvpermi.d\t%u0,%u1,0x50\n\txvfcvtl.d.s\t%u0,%u0"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V4DF")
+ (set_attr "length" "8")])
+
+(define_code_attr lasxbr
+ [(eq "xbz")
+ (ne "xbnz")])
+
+(define_code_attr lasxeq_v
+ [(eq "eqz")
+ (ne "nez")])
+
+(define_code_attr lasxne_v
+ [(eq "nez")
+ (ne "eqz")])
+
+(define_code_attr lasxeq
+ [(eq "anyeqz")
+ (ne "allnez")])
+
+(define_code_attr lasxne
+ [(eq "allnez")
+ (ne "anyeqz")])
+
+(define_insn "lasx_<lasxbr>_<lasxfmt_f>"
+ [(set (pc)
+ (if_then_else
+ (equality_op
+ (unspec:SI [(match_operand:LASX 1 "register_operand" "f")]
+ UNSPEC_LASX_BRANCH)
+ (match_operand:SI 2 "const_0_operand"))
+ (label_ref (match_operand 0))
+ (pc)))
+ (clobber (match_scratch:FCC 3 "=z"))]
+ "ISA_HAS_LASX"
+{
+ return loongarch_output_conditional_branch (insn, operands,
+ "xvset<lasxeq>.<lasxfmt>\t%Z3%u1\n\tbcnez\t%Z3%0",
+ "xvset<lasxne>.<lasxfmt>\t%z3%u1\n\tbcnez\t%Z3%0");
+}
+ [(set_attr "type" "simd_branch")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_<lasxbr>_v_<lasxfmt_f>"
+ [(set (pc)
+ (if_then_else
+ (equality_op
+ (unspec:SI [(match_operand:LASX 1 "register_operand" "f")]
+ UNSPEC_LASX_BRANCH_V)
+ (match_operand:SI 2 "const_0_operand"))
+ (label_ref (match_operand 0))
+ (pc)))
+ (clobber (match_scratch:FCC 3 "=z"))]
+ "ISA_HAS_LASX"
+{
+ return loongarch_output_conditional_branch (insn, operands,
+ "xvset<lasxeq_v>.v\t%Z3%u1\n\tbcnez\t%Z3%0",
+ "xvset<lasxne_v>.v\t%Z3%u1\n\tbcnez\t%Z3%0");
+}
+ [(set_attr "type" "simd_branch")
+ (set_attr "mode" "<MODE>")])
+
+;; loongson-asx.
+(define_insn "lasx_vext2xv_h<u>_b<u>"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "ISA_HAS_LASX"
+ "vext2xv.h<u>.b<u>\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_vext2xv_w<u>_h<u>"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "ISA_HAS_LASX"
+ "vext2xv.w<u>.h<u>\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_vext2xv_d<u>_w<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "ISA_HAS_LASX"
+ "vext2xv.d<u>.w<u>\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_vext2xv_w<u>_b<u>"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (any_extend:V8SI
+ (vec_select:V8QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "ISA_HAS_LASX"
+ "vext2xv.w<u>.b<u>\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_vext2xv_d<u>_h<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (any_extend:V4DI
+ (vec_select:V4HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "ISA_HAS_LASX"
+ "vext2xv.d<u>.h<u>\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_vext2xv_d<u>_b<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (any_extend:V4DI
+ (vec_select:V4QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "ISA_HAS_LASX"
+ "vext2xv.d<u>.b<u>\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DI")])
+
+;; Extend loongson-sx to loongson-asx.
+(define_insn "xvandn<mode>3"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f"))
+ (match_operand:LASX 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvandn.v\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "abs<mode>2"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (abs:ILASX (match_operand:ILASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvsigncov.<lasxfmt>\t%u0,%u1,%u1"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "neg<mode>2"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (neg:ILASX (match_operand:ILASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvneg.<lasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvmuh_s_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVMUH_S))]
+ "ISA_HAS_LASX"
+ "xvmuh.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvmuh_u_<lasxfmt_u>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVMUH_U))]
+ "ISA_HAS_LASX"
+ "xvmuh.<lasxfmt_u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsllwil_s_<dlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VDMODE256> 0 "register_operand" "=f")
+ (unspec:<VDMODE256> [(match_operand:ILASX_WHB 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVSLLWIL_S))]
+ "ISA_HAS_LASX"
+ "xvsllwil.<dlasxfmt>.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsllwil_u_<dlasxfmt_u>_<lasxfmt_u>"
+ [(set (match_operand:<VDMODE256> 0 "register_operand" "=f")
+ (unspec:<VDMODE256> [(match_operand:ILASX_WHB 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")]
+ UNSPEC_LASX_XVSLLWIL_U))]
+ "ISA_HAS_LASX"
+ "xvsllwil.<dlasxfmt_u>.<lasxfmt_u>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsran_<hlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSRAN))]
+ "ISA_HAS_LASX"
+ "xvsran.<hlasxfmt>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssran_s_<hlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSSRAN_S))]
+ "ISA_HAS_LASX"
+ "xvssran.<hlasxfmt>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssran_u_<hlasxfmt_u>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSSRAN_U))]
+ "ISA_HAS_LASX"
+ "xvssran.<hlasxfmt_u>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrarn_<hlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSRARN))]
+ "ISA_HAS_LASX"
+ "xvsrarn.<hlasxfmt>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrarn_s_<hlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSSRARN_S))]
+ "ISA_HAS_LASX"
+ "xvssrarn.<hlasxfmt>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrarn_u_<hlasxfmt_u>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSSRARN_U))]
+ "ISA_HAS_LASX"
+ "xvssrarn.<hlasxfmt_u>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrln_<hlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSRLN))]
+ "ISA_HAS_LASX"
+ "xvsrln.<hlasxfmt>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrln_u_<hlasxfmt_u>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSSRLN_U))]
+ "ISA_HAS_LASX"
+ "xvssrln.<hlasxfmt_u>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrlrn_<hlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSRLRN))]
+ "ISA_HAS_LASX"
+ "xvsrlrn.<hlasxfmt>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrlrn_u_<hlasxfmt_u>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSSRLRN_U))]
+ "ISA_HAS_LASX"
+ "xvssrlrn.<hlasxfmt_u>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfrstpi_<lasxfmt>"
+ [(set (match_operand:ILASX_HB 0 "register_operand" "=f")
+ (unspec:ILASX_HB [(match_operand:ILASX_HB 1 "register_operand" "0")
+ (match_operand:ILASX_HB 2 "register_operand" "f")
+ (match_operand 3 "const_uimm5_operand" "")]
+ UNSPEC_LASX_XVFRSTPI))]
+ "ISA_HAS_LASX"
+ "xvfrstpi.<lasxfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvfrstp_<lasxfmt>"
+ [(set (match_operand:ILASX_HB 0 "register_operand" "=f")
+ (unspec:ILASX_HB [(match_operand:ILASX_HB 1 "register_operand" "0")
+ (match_operand:ILASX_HB 2 "register_operand" "f")
+ (match_operand:ILASX_HB 3 "register_operand" "f")]
+ UNSPEC_LASX_XVFRSTP))]
+ "ISA_HAS_LASX"
+ "xvfrstp.<lasxfmt>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvshuf4i_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand")]
+ UNSPEC_LASX_XVSHUF4I))]
+ "ISA_HAS_LASX"
+ "xvshuf4i.d\t%u0,%u2,%3"
+ [(set_attr "type" "simd_sld")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvbsrl_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_uimm5_operand" "")]
+ UNSPEC_LASX_XVBSRL_V))]
+ "ISA_HAS_LASX"
+ "xvbsrl.v\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvbsll_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_uimm5_operand" "")]
+ UNSPEC_LASX_XVBSLL_V))]
+ "ISA_HAS_LASX"
+ "xvbsll.v\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvextrins_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVEXTRINS))]
+ "ISA_HAS_LASX"
+ "xvextrins.<lasxfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvmskltz_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVMSKLTZ))]
+ "ISA_HAS_LASX"
+ "xvmskltz.<lasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsigncov_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSIGNCOV))]
+ "ISA_HAS_LASX"
+ "xvsigncov.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "copysign<mode>3"
+ [(set (match_dup 4)
+ (and:FLASX
+ (not:FLASX (match_dup 3))
+ (match_operand:FLASX 1 "register_operand")))
+ (set (match_dup 5)
+ (and:FLASX (match_dup 3)
+ (match_operand:FLASX 2 "register_operand")))
+ (set (match_operand:FLASX 0 "register_operand")
+ (ior:FLASX (match_dup 4) (match_dup 5)))]
+ "ISA_HAS_LASX"
+{
+ operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
+
+ operands[4] = gen_reg_rtx (<MODE>mode);
+ operands[5] = gen_reg_rtx (<MODE>mode);
+})
+
+
+(define_insn "absv4df2"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (abs:V4DF (match_operand:V4DF 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvbitclri.d\t%u0,%u1,63"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "absv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (abs:V8SF (match_operand:V8SF 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvbitclri.w\t%u0,%u1,31"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "negv4df2"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (neg:V4DF (match_operand:V4DF 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvbitrevi.d\t%u0,%u1,63"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "negv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (neg:V8SF (match_operand:V8SF 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvbitrevi.w\t%u0,%u1,31"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "xvfmadd<mode>4"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (fma:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")
+ (match_operand:FLASX 3 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvfmadd.<flasxfmt>\t%u0,%u1,$u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fms<mode>4"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (fma:FLASX (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")
+ (neg:FLASX (match_operand:FLASX 3 "register_operand" "f"))))]
+ "ISA_HAS_LASX"
+ "xvfmsub.<flasxfmt>\t%u0,%u1,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "xvfnmsub<mode>4_nmsub4"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (neg:FLASX
+ (fma:FLASX
+ (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")
+ (neg:FLASX (match_operand:FLASX 3 "register_operand" "f")))))]
+ "ISA_HAS_LASX"
+ "xvfnmsub.<flasxfmt>\t%u0,%u1,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "<MODE>")])
+
+
+(define_insn "xvfnmadd<mode>4_nmadd4"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (neg:FLASX
+ (fma:FLASX
+ (match_operand:FLASX 1 "register_operand" "f")
+ (match_operand:FLASX 2 "register_operand" "f")
+ (match_operand:FLASX 3 "register_operand" "f"))))]
+ "ISA_HAS_LASX"
+ "xvfnmadd.<flasxfmt>\t%u0,%u1,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvftintrne_w_s"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRNE_W_S))]
+ "ISA_HAS_LASX"
+ "xvftintrne.w.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintrne_l_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRNE_L_D))]
+ "ISA_HAS_LASX"
+ "xvftintrne.l.d\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvftintrp_w_s"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRP_W_S))]
+ "ISA_HAS_LASX"
+ "xvftintrp.w.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintrp_l_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRP_L_D))]
+ "ISA_HAS_LASX"
+ "xvftintrp.l.d\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvftintrm_w_s"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRM_W_S))]
+ "ISA_HAS_LASX"
+ "xvftintrm.w.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintrm_l_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRM_L_D))]
+ "ISA_HAS_LASX"
+ "xvftintrm.l.d\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvftint_w_d"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f")
+ (match_operand:V4DF 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINT_W_D))]
+ "ISA_HAS_LASX"
+ "xvftint.w.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvffint_s_l"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFFINT_S_L))]
+ "ISA_HAS_LASX"
+ "xvffint.s.l\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvftintrz_w_d"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f")
+ (match_operand:V4DF 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRZ_W_D))]
+ "ISA_HAS_LASX"
+ "xvftintrz.w.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvftintrp_w_d"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f")
+ (match_operand:V4DF 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRP_W_D))]
+ "ISA_HAS_LASX"
+ "xvftintrp.w.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvftintrm_w_d"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f")
+ (match_operand:V4DF 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRM_W_D))]
+ "ISA_HAS_LASX"
+ "xvftintrm.w.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvftintrne_w_d"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f")
+ (match_operand:V4DF 2 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRNE_W_D))]
+ "ISA_HAS_LASX"
+ "xvftintrne.w.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvftinth_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTH_L_S))]
+ "ISA_HAS_LASX"
+ "xvftinth.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintl_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTL_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintl.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvffinth_d_w"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (unspec:V4DF [(match_operand:V8SI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFFINTH_D_W))]
+ "ISA_HAS_LASX"
+ "xvffinth.d.w\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvffintl_d_w"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (unspec:V4DF [(match_operand:V8SI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFFINTL_D_W))]
+ "ISA_HAS_LASX"
+ "xvffintl.d.w\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvftintrzh_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRZH_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintrzh.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintrzl_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRZL_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintrzl.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "lasx_xvftintrph_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRPH_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintrph.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "lasx_xvftintrpl_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRPL_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintrpl.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintrmh_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRMH_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintrmh.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintrml_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRML_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintrml.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintrneh_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRNEH_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintrneh.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvftintrnel_l_s"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFTINTRNEL_L_S))]
+ "ISA_HAS_LASX"
+ "xvftintrnel.l.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvfrintrne_s"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINTRNE_S))]
+ "ISA_HAS_LASX"
+ "xvfrintrne.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvfrintrne_d"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINTRNE_D))]
+ "ISA_HAS_LASX"
+ "xvfrintrne.d\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvfrintrz_s"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINTRZ_S))]
+ "ISA_HAS_LASX"
+ "xvfrintrz.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvfrintrz_d"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINTRZ_D))]
+ "ISA_HAS_LASX"
+ "xvfrintrz.d\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvfrintrp_s"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINTRP_S))]
+ "ISA_HAS_LASX"
+ "xvfrintrp.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvfrintrp_d"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINTRP_D))]
+ "ISA_HAS_LASX"
+ "xvfrintrp.d\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "lasx_xvfrintrm_s"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINTRM_S))]
+ "ISA_HAS_LASX"
+ "xvfrintrm.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "lasx_xvfrintrm_d"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINTRM_D))]
+ "ISA_HAS_LASX"
+ "xvfrintrm.d\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DF")])
+
+;; Vector versions of the floating-point frint patterns.
+;; Expands to btrunc, ceil, floor, rint.
+(define_insn "<FRINT256_S:frint256_pattern_s>v8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=f")
+ (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")]
+ FRINT256_S))]
+ "ISA_HAS_LASX"
+ "xvfrint<FRINT256_S:frint256_suffix>.s\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "<FRINT256_D:frint256_pattern_d>v4df2"
+ [(set (match_operand:V4DF 0 "register_operand" "=f")
+ (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")]
+ FRINT256_D))]
+ "ISA_HAS_LASX"
+ "xvfrint<FRINT256_D:frint256_suffix>.d\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "V4DF")])
+
+;; Expands to round.
+(define_insn "round<mode>2"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRINT))]
+ "ISA_HAS_LASX"
+ "xvfrint.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+;; Offset load and broadcast
+(define_expand "lasx_xvldrepl_<lasxfmt_f>"
+ [(match_operand:LASX 0 "register_operand")
+ (match_operand 2 "aq12<lasxfmt>_operand")
+ (match_operand 1 "pmode_register_operand")]
+ "ISA_HAS_LASX"
+{
+ emit_insn (gen_lasx_xvldrepl_<lasxfmt_f>_insn
+ (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn "lasx_xvldrepl_<lasxfmt_f>_insn"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (vec_duplicate:LASX
+ (mem:<UNITMODE> (plus:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand 2 "aq12<lasxfmt>_operand")))))]
+ "ISA_HAS_LASX"
+{
+ return "xvldrepl.<lasxfmt>\t%u0,%1,%2";
+}
+ [(set_attr "type" "simd_load")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "4")])
+
+;; Offset is "0"
+(define_insn "lasx_xvldrepl_<lasxfmt_f>_insn_0"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (vec_duplicate:LASX
+ (mem:<UNITMODE> (match_operand:DI 1 "register_operand" "r"))))]
+ "ISA_HAS_LASX"
+{
+ return "xvldrepl.<lasxfmt>\t%u0,%1,0";
+}
+ [(set_attr "type" "simd_load")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "4")])
+
+;;XVADDWEV.H.B XVSUBWEV.H.B XVMULWEV.H.B
+;;XVADDWEV.H.BU XVSUBWEV.H.BU XVMULWEV.H.BU
+(define_insn "lasx_xv<optab>wev_h_b<u>"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (addsubmul:V16HI
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)])))
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wev.h.b<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V16HI")])
+
+;;XVADDWEV.W.H XVSUBWEV.W.H XVMULWEV.W.H
+;;XVADDWEV.W.HU XVSUBWEV.W.HU XVMULWEV.W.HU
+(define_insn "lasx_xv<optab>wev_w_h<u>"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (addsubmul:V8SI
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wev.w.h<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V8SI")])
+
+;;XVADDWEV.D.W XVSUBWEV.D.W XVMULWEV.D.W
+;;XVADDWEV.D.WU XVSUBWEV.D.WU XVMULWEV.D.WU
+(define_insn "lasx_xv<optab>wev_d_w<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (addsubmul:V4DI
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wev.d.w<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVADDWEV.Q.D
+;;TODO2
+(define_insn "lasx_xvaddwev_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVADDWEV))]
+ "ISA_HAS_LASX"
+ "xvaddwev.q.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVSUBWEV.Q.D
+;;TODO2
+(define_insn "lasx_xvsubwev_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSUBWEV))]
+ "ISA_HAS_LASX"
+ "xvsubwev.q.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMULWEV.Q.D
+;;TODO2
+(define_insn "lasx_xvmulwev_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVMULWEV))]
+ "ISA_HAS_LASX"
+ "xvmulwev.q.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+
+;;XVADDWOD.H.B XVSUBWOD.H.B XVMULWOD.H.B
+;;XVADDWOD.H.BU XVSUBWOD.H.BU XVMULWOD.H.BU
+(define_insn "lasx_xv<optab>wod_h_b<u>"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (addsubmul:V16HI
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)])))
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wod.h.b<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V16HI")])
+
+;;XVADDWOD.W.H XVSUBWOD.W.H XVMULWOD.W.H
+;;XVADDWOD.W.HU XVSUBWOD.W.HU XVMULWOD.W.HU
+(define_insn "lasx_xv<optab>wod_w_h<u>"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (addsubmul:V8SI
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)])))
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wod.w.h<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V8SI")])
+
+
+;;XVADDWOD.D.W XVSUBWOD.D.W XVMULWOD.D.W
+;;XVADDWOD.D.WU XVSUBWOD.D.WU XVMULWOD.D.WU
+(define_insn "lasx_xv<optab>wod_d_w<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (addsubmul:V4DI
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)])))
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wod.d.w<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVADDWOD.Q.D
+;;TODO2
+(define_insn "lasx_xvaddwod_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVADDWOD))]
+ "ISA_HAS_LASX"
+ "xvaddwod.q.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVSUBWOD.Q.D
+;;TODO2
+(define_insn "lasx_xvsubwod_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSUBWOD))]
+ "ISA_HAS_LASX"
+ "xvsubwod.q.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMULWOD.Q.D
+;;TODO2
+(define_insn "lasx_xvmulwod_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVMULWOD))]
+ "ISA_HAS_LASX"
+ "xvmulwod.q.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVADDWEV.Q.DU
+;;TODO2
+(define_insn "lasx_xvaddwev_q_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVADDWEV2))]
+ "ISA_HAS_LASX"
+ "xvaddwev.q.du\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVSUBWEV.Q.DU
+;;TODO2
+(define_insn "lasx_xvsubwev_q_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSUBWEV2))]
+ "ISA_HAS_LASX"
+ "xvsubwev.q.du\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMULWEV.Q.DU
+;;TODO2
+(define_insn "lasx_xvmulwev_q_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVMULWEV2))]
+ "ISA_HAS_LASX"
+ "xvmulwev.q.du\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVADDWOD.Q.DU
+;;TODO2
+(define_insn "lasx_xvaddwod_q_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVADDWOD2))]
+ "ISA_HAS_LASX"
+ "xvaddwod.q.du\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVSUBWOD.Q.DU
+;;TODO2
+(define_insn "lasx_xvsubwod_q_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSUBWOD2))]
+ "ISA_HAS_LASX"
+ "xvsubwod.q.du\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMULWOD.Q.DU
+;;TODO2
+(define_insn "lasx_xvmulwod_q_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVMULWOD2))]
+ "ISA_HAS_LASX"
+ "xvmulwod.q.du\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVADDWEV.H.BU.B XVMULWEV.H.BU.B
+(define_insn "lasx_xv<optab>wev_h_bu_b"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (addmul:V16HI
+ (zero_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)])))
+ (sign_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wev.h.bu.b\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V16HI")])
+
+;;XVADDWEV.W.HU.H XVMULWEV.W.HU.H
+(define_insn "lasx_xv<optab>wev_w_hu_h"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (addmul:V8SI
+ (zero_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))
+ (sign_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wev.w.hu.h\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V8SI")])
+
+;;XVADDWEV.D.WU.W XVMULWEV.D.WU.W
+(define_insn "lasx_xv<optab>wev_d_wu_w"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (addmul:V4DI
+ (zero_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (sign_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wev.d.wu.w\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVADDWOD.H.BU.B XVMULWOD.H.BU.B
+(define_insn "lasx_xv<optab>wod_h_bu_b"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (addmul:V16HI
+ (zero_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)])))
+ (sign_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wod.h.bu.b\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V16HI")])
+
+;;XVADDWOD.W.HU.H XVMULWOD.W.HU.H
+(define_insn "lasx_xv<optab>wod_w_hu_h"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (addmul:V8SI
+ (zero_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)])))
+ (sign_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wod.w.hu.h\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V8SI")])
+
+;;XVADDWOD.D.WU.W XVMULWOD.D.WU.W
+(define_insn "lasx_xv<optab>wod_d_wu_w"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (addmul:V4DI
+ (zero_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)])))
+ (sign_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)])))))]
+ "ISA_HAS_LASX"
+ "xv<optab>wod.d.wu.w\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWEV.H.B XVMADDWEV.H.BU
+(define_insn "lasx_xvmaddwev_h_b<u>"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (plus:V16HI
+ (match_operand:V16HI 1 "register_operand" "0")
+ (mult:V16HI
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)])))
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 3 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.h.b<u>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V16HI")])
+
+;;XVMADDWEV.W.H XVMADDWEV.W.HU
+(define_insn "lasx_xvmaddwev_w_h<u>"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (plus:V8SI
+ (match_operand:V8SI 1 "register_operand" "0")
+ (mult:V8SI
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 3 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.w.h<u>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V8SI")])
+
+;;XVMADDWEV.D.W XVMADDWEV.D.WU
+(define_insn "lasx_xvmaddwev_d_w<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (plus:V4DI
+ (match_operand:V4DI 1 "register_operand" "0")
+ (mult:V4DI
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 3 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.d.w<u>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWEV.Q.D
+;;TODO2
+(define_insn "lasx_xvmaddwev_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "f")
+ (match_operand:V4DI 3 "register_operand" "f")]
+ UNSPEC_LASX_XVMADDWEV))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.q.d\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWOD.H.B XVMADDWOD.H.BU
+(define_insn "lasx_xvmaddwod_h_b<u>"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (plus:V16HI
+ (match_operand:V16HI 1 "register_operand" "0")
+ (mult:V16HI
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)])))
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 3 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.h.b<u>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V16HI")])
+
+;;XVMADDWOD.W.H XVMADDWOD.W.HU
+(define_insn "lasx_xvmaddwod_w_h<u>"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (plus:V8SI
+ (match_operand:V8SI 1 "register_operand" "0")
+ (mult:V8SI
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)])))
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 3 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.w.h<u>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V8SI")])
+
+;;XVMADDWOD.D.W XVMADDWOD.D.WU
+(define_insn "lasx_xvmaddwod_d_w<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (plus:V4DI
+ (match_operand:V4DI 1 "register_operand" "0")
+ (mult:V4DI
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)])))
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 3 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.d.w<u>\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWOD.Q.D
+;;TODO2
+(define_insn "lasx_xvmaddwod_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "f")
+ (match_operand:V4DI 3 "register_operand" "f")]
+ UNSPEC_LASX_XVMADDWOD))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.q.d\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWEV.Q.DU
+;;TODO2
+(define_insn "lasx_xvmaddwev_q_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "f")
+ (match_operand:V4DI 3 "register_operand" "f")]
+ UNSPEC_LASX_XVMADDWEV2))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.q.du\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWOD.Q.DU
+;;TODO2
+(define_insn "lasx_xvmaddwod_q_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "f")
+ (match_operand:V4DI 3 "register_operand" "f")]
+ UNSPEC_LASX_XVMADDWOD2))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.q.du\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWEV.H.BU.B
+(define_insn "lasx_xvmaddwev_h_bu_b"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (plus:V16HI
+ (match_operand:V16HI 1 "register_operand" "0")
+ (mult:V16HI
+ (zero_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)])))
+ (sign_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 3 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)
+ (const_int 16) (const_int 18)
+ (const_int 20) (const_int 22)
+ (const_int 24) (const_int 26)
+ (const_int 28) (const_int 30)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.h.bu.b\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V16HI")])
+
+;;XVMADDWEV.W.HU.H
+(define_insn "lasx_xvmaddwev_w_hu_h"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (plus:V8SI
+ (match_operand:V8SI 1 "register_operand" "0")
+ (mult:V8SI
+ (zero_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))
+ (sign_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 3 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.w.hu.h\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V8SI")])
+
+;;XVMADDWEV.D.WU.W
+(define_insn "lasx_xvmaddwev_d_wu_w"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (plus:V4DI
+ (match_operand:V4DI 1 "register_operand" "0")
+ (mult:V4DI
+ (zero_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (sign_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 3 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.d.wu.w\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWEV.Q.DU.D
+;;TODO2
+(define_insn "lasx_xvmaddwev_q_du_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "f")
+ (match_operand:V4DI 3 "register_operand" "f")]
+ UNSPEC_LASX_XVMADDWEV3))]
+ "ISA_HAS_LASX"
+ "xvmaddwev.q.du.d\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWOD.H.BU.B
+(define_insn "lasx_xvmaddwod_h_bu_b"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (plus:V16HI
+ (match_operand:V16HI 1 "register_operand" "0")
+ (mult:V16HI
+ (zero_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)])))
+ (sign_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 3 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)
+ (const_int 17) (const_int 19)
+ (const_int 21) (const_int 23)
+ (const_int 25) (const_int 27)
+ (const_int 29) (const_int 31)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.h.bu.b\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V16HI")])
+
+;;XVMADDWOD.W.HU.H
+(define_insn "lasx_xvmaddwod_w_hu_h"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (plus:V8SI
+ (match_operand:V8SI 1 "register_operand" "0")
+ (mult:V8SI
+ (zero_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)])))
+ (sign_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 3 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)
+ (const_int 9) (const_int 11)
+ (const_int 13) (const_int 15)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.w.hu.h\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V8SI")])
+
+;;XVMADDWOD.D.WU.W
+(define_insn "lasx_xvmaddwod_d_wu_w"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (plus:V4DI
+ (match_operand:V4DI 1 "register_operand" "0")
+ (mult:V4DI
+ (zero_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "%f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)])))
+ (sign_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 3 "register_operand" "f")
+ (parallel [(const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)]))))))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.d.wu.w\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "V4DI")])
+
+;;XVMADDWOD.Q.DU.D
+;;TODO2
+(define_insn "lasx_xvmaddwod_q_du_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
+ (match_operand:V4DI 2 "register_operand" "f")
+ (match_operand:V4DI 3 "register_operand" "f")]
+ UNSPEC_LASX_XVMADDWOD3))]
+ "ISA_HAS_LASX"
+ "xvmaddwod.q.du.d\t%u0,%u2,%u3"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVHADDW.Q.D
+;;TODO2
+(define_insn "lasx_xvhaddw_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVHADDW_Q_D))]
+ "ISA_HAS_LASX"
+ "xvhaddw.q.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVHSUBW.Q.D
+;;TODO2
+(define_insn "lasx_xvhsubw_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVHSUBW_Q_D))]
+ "ISA_HAS_LASX"
+ "xvhsubw.q.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVHADDW.QU.DU
+;;TODO2
+(define_insn "lasx_xvhaddw_qu_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVHADDW_QU_DU))]
+ "ISA_HAS_LASX"
+ "xvhaddw.qu.du\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVHSUBW.QU.DU
+;;TODO2
+(define_insn "lasx_xvhsubw_qu_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVHSUBW_QU_DU))]
+ "ISA_HAS_LASX"
+ "xvhsubw.qu.du\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVROTR.B XVROTR.H XVROTR.W XVROTR.D
+;;TODO-478
+(define_insn "lasx_xvrotr_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
+ (match_operand:ILASX 2 "register_operand" "f")]
+ UNSPEC_LASX_XVROTR))]
+ "ISA_HAS_LASX"
+ "xvrotr.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+;;XVADD.Q
+;;TODO2
+(define_insn "lasx_xvadd_q"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVADD_Q))]
+ "ISA_HAS_LASX"
+ "xvadd.q\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVSUB.Q
+;;TODO2
+(define_insn "lasx_xvsub_q"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSUB_Q))]
+ "ISA_HAS_LASX"
+ "xvsub.q\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVSSRLN.B.H XVSSRLN.H.W XVSSRLN.W.D
+(define_insn "lasx_xvssrln_<hlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSSRLN))]
+ "ISA_HAS_LASX"
+ "xvssrln.<hlasxfmt>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+;;XVREPLVE.B XVREPLVE.H XVREPLVE.W XVREPLVE.D
+(define_insn "lasx_xvreplve_<lasxfmt_f>"
+ [(set (match_operand:LASX 0 "register_operand" "=f")
+ (unspec:LASX [(match_operand:LASX 1 "register_operand" "f")
+ (match_operand:SI 2 "register_operand" "r")]
+ UNSPEC_LASX_XVREPLVE))]
+ "ISA_HAS_LASX"
+ "xvreplve.<lasxfmt>\t%u0,%u1,%z2"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+;;XVADDWEV.Q.DU.D
+(define_insn "lasx_xvaddwev_q_du_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVADDWEV3))]
+ "ISA_HAS_LASX"
+ "xvaddwev.q.du.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVADDWOD.Q.DU.D
+(define_insn "lasx_xvaddwod_q_du_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVADDWOD3))]
+ "ISA_HAS_LASX"
+ "xvaddwod.q.du.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMULWEV.Q.DU.D
+(define_insn "lasx_xvmulwev_q_du_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVMULWEV3))]
+ "ISA_HAS_LASX"
+ "xvmulwev.q.du.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;;XVMULWOD.Q.DU.D
+(define_insn "lasx_xvmulwod_q_du_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
+ (match_operand:V4DI 2 "register_operand" "f")]
+ UNSPEC_LASX_XVMULWOD3))]
+ "ISA_HAS_LASX"
+ "xvmulwod.q.du.d\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvpickve2gr_w<u>"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (any_extend:SI
+ (vec_select:SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (parallel [(match_operand 2 "const_0_to_7_operand" "")]))))]
+ "ISA_HAS_LASX"
+ "xvpickve2gr.w<u>\t%0,%u1,%2"
+ [(set_attr "type" "simd_copy")
+ (set_attr "mode" "V8SI")])
+
+
+(define_insn "lasx_xvmskgez_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (unspec:V32QI [(match_operand:V32QI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVMSKGEZ))]
+ "ISA_HAS_LASX"
+ "xvmskgez.b\t%u0,%u1"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvmsknz_b"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (unspec:V32QI [(match_operand:V32QI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVMSKNZ))]
+ "ISA_HAS_LASX"
+ "xvmsknz.b\t%u0,%u1"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvexth_h<u>_b<u>"
+ [(set (match_operand:V16HI 0 "register_operand" "=f")
+ (any_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)]))))]
+ "ISA_HAS_LASX"
+ "xvexth.h<u>.b<u>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V16HI")])
+
+(define_insn "lasx_xvexth_w<u>_h<u>"
+ [(set (match_operand:V8SI 0 "register_operand" "=f")
+ (any_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "ISA_HAS_LASX"
+ "xvexth.w<u>.h<u>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V8SI")])
+
+(define_insn "lasx_xvexth_d<u>_w<u>"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "register_operand" "f")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "ISA_HAS_LASX"
+ "xvexth.d<u>.w<u>\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvexth_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVEXTH_Q_D))]
+ "ISA_HAS_LASX"
+ "xvexth.q.d\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvexth_qu_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVEXTH_QU_DU))]
+ "ISA_HAS_LASX"
+ "xvexth.qu.du\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvrotri_<lasxfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (rotatert:ILASX (match_operand:ILASX 1 "register_operand" "f")
+ (match_operand 2 "const_<bitimm256>_operand" "")))]
+ "ISA_HAS_LASX"
+ "xvrotri.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shf")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvextl_q_d"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVEXTL_Q_D))]
+ "ISA_HAS_LASX"
+ "xvextl.q.d\t%u0,%u1"
+ [(set_attr "type" "simd_fcvt")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvsrlni_<lasxfmt>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSRLNI))]
+ "ISA_HAS_LASX"
+ "xvsrlni.<lasxfmt>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrlrni_<lasxfmt>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSRLRNI))]
+ "ISA_HAS_LASX"
+ "xvsrlrni.<lasxfmt>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrlni_<lasxfmt>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSSRLNI))]
+ "ISA_HAS_LASX"
+ "xvssrlni.<lasxfmt>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrlni_<lasxfmt_u>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSSRLNI2))]
+ "ISA_HAS_LASX"
+ "xvssrlni.<lasxfmt_u>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrlrni_<lasxfmt>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSSRLRNI))]
+ "ISA_HAS_LASX"
+ "xvssrlrni.<lasxfmt>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrlrni_<lasxfmt_u>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSSRLRNI2))]
+ "ISA_HAS_LASX"
+ "xvssrlrni.<lasxfmt_u>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrani_<lasxfmt>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSRANI))]
+ "ISA_HAS_LASX"
+ "xvsrani.<lasxfmt>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvsrarni_<lasxfmt>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSRARNI))]
+ "ISA_HAS_LASX"
+ "xvsrarni.<lasxfmt>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrani_<lasxfmt>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSSRANI))]
+ "ISA_HAS_LASX"
+ "xvssrani.<lasxfmt>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrani_<lasxfmt_u>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSSRANI2))]
+ "ISA_HAS_LASX"
+ "xvssrani.<lasxfmt_u>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrarni_<lasxfmt>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSSRARNI))]
+ "ISA_HAS_LASX"
+ "xvssrarni.<lasxfmt>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrarni_<lasxfmt_u>_<dlasxqfmt>"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "0")
+ (match_operand:ILASX 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVSSRARNI2))]
+ "ISA_HAS_LASX"
+ "xvssrarni.<lasxfmt_u>.<dlasxqfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shift")
+ (set_attr "mode" "<MODE>")])
+
+(define_mode_attr VDOUBLEMODEW256
+ [(V8SI "V16SI")
+ (V8SF "V16SF")])
+
+(define_insn "lasx_xvpermi_<lasxfmt_f_wd>"
+ [(set (match_operand:LASX_W 0 "register_operand" "=f")
+ (unspec:LASX_W [(match_operand:LASX_W 1 "register_operand" "0")
+ (match_operand:LASX_W 2 "register_operand" "f")
+ (match_operand 3 "const_uimm8_operand" "")]
+ UNSPEC_LASX_XVPERMI))]
+ "ISA_HAS_LASX"
+ "xvpermi.w\t%u0,%u2,%3"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvpermi_<lasxfmt_f_wd>_1"
+ [(set (match_operand:LASX_W 0 "register_operand" "=f")
+ (vec_select:LASX_W
+ (vec_concat:<VDOUBLEMODEW256>
+ (match_operand:LASX_W 1 "register_operand" "f")
+ (match_operand:LASX_W 2 "register_operand" "0"))
+ (parallel [(match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand" )
+ (match_operand 5 "const_8_to_11_operand" )
+ (match_operand 6 "const_8_to_11_operand" )
+ (match_operand 7 "const_4_to_7_operand" )
+ (match_operand 8 "const_4_to_7_operand" )
+ (match_operand 9 "const_12_to_15_operand")
+ (match_operand 10 "const_12_to_15_operand")])))]
+ "ISA_HAS_LASX
+ && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
+ && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
+ && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
+ && INTVAL (operands[6]) + 4 == INTVAL (operands[10])"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 8) << 4;
+ mask |= (INTVAL (operands[6]) - 8) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "xvpermi.w\t%u0,%u1,%3";
+}
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "lasx_xvld"
+ [(match_operand:V32QI 0 "register_operand")
+ (match_operand 1 "pmode_register_operand")
+ (match_operand 2 "aq12b_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
+ INTVAL (operands[2]));
+ loongarch_emit_move (operands[0], gen_rtx_MEM (V32QImode, addr));
+ DONE;
+})
+
+(define_expand "lasx_xvst"
+ [(match_operand:V32QI 0 "register_operand")
+ (match_operand 1 "pmode_register_operand")
+ (match_operand 2 "aq12b_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
+ INTVAL (operands[2]));
+ loongarch_emit_move (gen_rtx_MEM (V32QImode, addr), operands[0]);
+ DONE;
+})
+
+(define_expand "lasx_xvstelm_<lasxfmt_f>"
+ [(match_operand:LASX 0 "register_operand")
+ (match_operand 3 "const_<indeximm256>_operand")
+ (match_operand 2 "aq8<lasxfmt>_operand")
+ (match_operand 1 "pmode_register_operand")]
+ "ISA_HAS_LASX"
+{
+ emit_insn (gen_lasx_xvstelm_<lasxfmt_f>_insn
+ (operands[1], operands[2], operands[0], operands[3]));
+ DONE;
+})
+
+(define_insn "lasx_xvstelm_<lasxfmt_f>_insn"
+ [(set (mem:<UNITMODE> (plus:DI (match_operand:DI 0 "register_operand" "r")
+ (match_operand 1 "aq8<lasxfmt>_operand")))
+ (vec_select:<UNITMODE>
+ (match_operand:LASX 2 "register_operand" "f")
+ (parallel [(match_operand 3 "const_<indeximm256>_operand" "")])))]
+ "ISA_HAS_LASX"
+{
+ return "xvstelm.<lasxfmt>\t%u2,%0,%1,%3";
+}
+ [(set_attr "type" "simd_store")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "4")])
+
+;; Offset is "0"
+(define_insn "lasx_xvstelm_<lasxfmt_f>_insn_0"
+ [(set (mem:<UNITMODE> (match_operand:DI 0 "register_operand" "r"))
+ (vec_select:<UNITMODE>
+ (match_operand:LASX_WD 1 "register_operand" "f")
+ (parallel [(match_operand:SI 2 "const_<indeximm256>_operand")])))]
+ "ISA_HAS_LASX"
+{
+ return "xvstelm.<lasxfmt>\t%u1,%0,0,%2";
+}
+ [(set_attr "type" "simd_store")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "4")])
+
+(define_insn "lasx_xvinsve0_<lasxfmt_f>"
+ [(set (match_operand:LASX_WD 0 "register_operand" "=f")
+ (unspec:LASX_WD [(match_operand:LASX_WD 1 "register_operand" "0")
+ (match_operand:LASX_WD 2 "register_operand" "f")
+ (match_operand 3 "const_<indeximm256>_operand" "")]
+ UNSPEC_LASX_XVINSVE0))]
+ "ISA_HAS_LASX"
+ "xvinsve0.<lasxfmt>\t%u0,%u2,%3"
+ [(set_attr "type" "simd_shf")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvinsve0_<lasxfmt_f>_scalar"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (vec_merge:FLASX
+ (vec_duplicate:FLASX
+ (match_operand:<UNITMODE> 1 "register_operand" "f"))
+ (match_operand:FLASX 2 "register_operand" "0")
+ (match_operand 3 "const_<bitmask256>_operand" "")))]
+ "ISA_HAS_LASX"
+ "xvinsve0.<lasxfmt>\t%u0,%u1,%y3"
+ [(set_attr "type" "simd_insert")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvpickve_<lasxfmt_f>"
+ [(set (match_operand:LASX_WD 0 "register_operand" "=f")
+ (unspec:LASX_WD [(match_operand:LASX_WD 1 "register_operand" "f")
+ (match_operand 2 "const_<indeximm256>_operand" "")]
+ UNSPEC_LASX_XVPICKVE))]
+ "ISA_HAS_LASX"
+ "xvpickve.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shf")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvpickve_<lasxfmt_f>_scalar"
+ [(set (match_operand:<UNITMODE> 0 "register_operand" "=f")
+ (vec_select:<UNITMODE>
+ (match_operand:FLASX 1 "register_operand" "f")
+ (parallel [(match_operand 2 "const_<indeximm256>_operand" "")])))]
+ "ISA_HAS_LASX"
+ "xvpickve.<lasxfmt>\t%u0,%u1,%2"
+ [(set_attr "type" "simd_shf")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvssrlrn_<hlasxfmt>_<lasxfmt>"
+ [(set (match_operand:<VHSMODE256> 0 "register_operand" "=f")
+ (unspec:<VHSMODE256> [(match_operand:ILASX_DWH 1 "register_operand" "f")
+ (match_operand:ILASX_DWH 2 "register_operand" "f")]
+ UNSPEC_LASX_XVSSRLRN))]
+ "ISA_HAS_LASX"
+ "xvssrlrn.<hlasxfmt>.<lasxfmt>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "xvorn<mode>3"
+ [(set (match_operand:ILASX 0 "register_operand" "=f")
+ (ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f"))
+ (match_operand:ILASX 1 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvorn.v\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "lasx_xvextl_qu_du"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")]
+ UNSPEC_LASX_XVEXTL_QU_DU))]
+ "ISA_HAS_LASX"
+ "xvextl.qu.du\t%u0,%u1"
+ [(set_attr "type" "simd_bit")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvldi"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (unspec:V4DI[(match_operand 1 "const_imm13_operand")]
+ UNSPEC_LASX_XVLDI))]
+ "ISA_HAS_LASX"
+{
+ HOST_WIDE_INT val = INTVAL (operands[1]);
+ if (val < 0)
+ {
+ HOST_WIDE_INT modeVal = (val & 0xf00) >> 8;
+ if (modeVal < 13)
+ return "xvldi\t%u0,%1";
+ else
+ {
+ sorry ("imm13 only support 0000 ~ 1100 in bits '12 ~ 9' when bit '13' is 1");
+ return "#";
+ }
+ }
+ else
+ return "xvldi\t%u0,%1";
+}
+ [(set_attr "type" "simd_load")
+ (set_attr "mode" "V4DI")])
+
+(define_insn "lasx_xvldx"
+ [(set (match_operand:V32QI 0 "register_operand" "=f")
+ (unspec:V32QI [(match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 2 "reg_or_0_operand" "rJ")]
+ UNSPEC_LASX_XVLDX))]
+ "ISA_HAS_LASX"
+{
+ return "xvldx\t%u0,%1,%z2";
+}
+ [(set_attr "type" "simd_load")
+ (set_attr "mode" "V32QI")])
+
+(define_insn "lasx_xvstx"
+ [(set (mem:V32QI (plus:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 2 "reg_or_0_operand" "rJ")))
+ (unspec: V32QI[(match_operand:V32QI 0 "register_operand" "f")]
+ UNSPEC_LASX_XVSTX))]
+
+ "ISA_HAS_LASX"
+{
+ return "xvstx\t%u0,%1,%z2";
+}
+ [(set_attr "type" "simd_store")
+ (set_attr "mode" "DI")])
+
+(define_insn "vec_widen_<su>mult_even_v8si"
+ [(set (match_operand:V4DI 0 "register_operand" "=f")
+ (mult:V4DI
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "register_operand" "%f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (any_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))))]
+ "ISA_HAS_LASX"
+ "xvmulwev.d.w<u>\t%u0,%u1,%u2"
+ [(set_attr "type" "simd_int_arith")
+ (set_attr "mode" "V4DI")])
+
+;; Vector reduction operation
+(define_expand "reduc_plus_scal_v4di"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:V4DI 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx tmp = gen_reg_rtx (V4DImode);
+ rtx tmp1 = gen_reg_rtx (V4DImode);
+ rtx vec_res = gen_reg_rtx (V4DImode);
+ emit_insn (gen_lasx_xvhaddw_q_d (tmp, operands[1], operands[1]));
+ emit_insn (gen_lasx_xvpermi_d_v4di (tmp1, tmp, GEN_INT (2)));
+ emit_insn (gen_addv4di3 (vec_res, tmp, tmp1));
+ emit_insn (gen_vec_extractv4didi (operands[0], vec_res, const0_rtx));
+ DONE;
+})
+
+(define_expand "reduc_plus_scal_v8si"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:V8SI 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx tmp = gen_reg_rtx (V4DImode);
+ rtx tmp1 = gen_reg_rtx (V4DImode);
+ rtx vec_res = gen_reg_rtx (V4DImode);
+ emit_insn (gen_lasx_xvhaddw_d_w (tmp, operands[1], operands[1]));
+ emit_insn (gen_lasx_xvhaddw_q_d (tmp1, tmp, tmp));
+ emit_insn (gen_lasx_xvpermi_d_v4di (tmp, tmp1, GEN_INT (2)));
+ emit_insn (gen_addv4di3 (vec_res, tmp, tmp1));
+ emit_insn (gen_vec_extractv8sisi (operands[0], gen_lowpart (V8SImode,vec_res),
+ const0_rtx));
+ DONE;
+})
+
+(define_expand "reduc_plus_scal_<mode>"
+ [(match_operand:<UNITMODE> 0 "register_operand")
+ (match_operand:FLASX 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ loongarch_expand_vector_reduc (gen_add<mode>3, tmp, operands[1]);
+ emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+ const0_rtx));
+ DONE;
+})
+
+(define_expand "reduc_<optab>_scal_<mode>"
+ [(any_bitwise:<UNITMODE>
+ (match_operand:<UNITMODE> 0 "register_operand")
+ (match_operand:ILASX 1 "register_operand"))]
+ "ISA_HAS_LASX"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ loongarch_expand_vector_reduc (gen_<optab><mode>3, tmp, operands[1]);
+ emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+ const0_rtx));
+ DONE;
+})
+
+(define_expand "reduc_smax_scal_<mode>"
+ [(match_operand:<UNITMODE> 0 "register_operand")
+ (match_operand:LASX 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ loongarch_expand_vector_reduc (gen_smax<mode>3, tmp, operands[1]);
+ emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+ const0_rtx));
+ DONE;
+})
+
+(define_expand "reduc_smin_scal_<mode>"
+ [(match_operand:<UNITMODE> 0 "register_operand")
+ (match_operand:LASX 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ loongarch_expand_vector_reduc (gen_smin<mode>3, tmp, operands[1]);
+ emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+ const0_rtx));
+ DONE;
+})
+
+(define_expand "reduc_umax_scal_<mode>"
+ [(match_operand:<UNITMODE> 0 "register_operand")
+ (match_operand:ILASX 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ loongarch_expand_vector_reduc (gen_umax<mode>3, tmp, operands[1]);
+ emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+ const0_rtx));
+ DONE;
+})
+
+(define_expand "reduc_umin_scal_<mode>"
+ [(match_operand:<UNITMODE> 0 "register_operand")
+ (match_operand:ILASX 1 "register_operand")]
+ "ISA_HAS_LASX"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ loongarch_expand_vector_reduc (gen_umin<mode>3, tmp, operands[1]);
+ emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+ const0_rtx));
+ DONE;
+})
diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def
index b69ad3d83..ac9ea3142 100644
--- a/gcc/config/loongarch/loongarch-modes.def
+++ b/gcc/config/loongarch/loongarch-modes.def
@@ -33,6 +33,7 @@ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
VECTOR_MODES (FLOAT, 16); /* V4SF V2DF */
+/* For LARCH LASX 256 bits. */
VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
VECTOR_MODES (FLOAT, 32); /* V8SF V4DF */
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 24e42fa99..133ec9fa8 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -89,6 +89,8 @@ extern bool loongarch_split_move_insn_p (rtx, rtx);
extern void loongarch_split_move_insn (rtx, rtx, rtx);
extern void loongarch_split_128bit_move (rtx, rtx);
extern bool loongarch_split_128bit_move_p (rtx, rtx);
+extern void loongarch_split_256bit_move (rtx, rtx);
+extern bool loongarch_split_256bit_move_p (rtx, rtx);
extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx));
extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx);
extern void loongarch_split_lsx_fill_d (rtx, rtx);
@@ -174,9 +176,11 @@ union loongarch_gen_fn_ptrs
extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs,
rtx, rtx, rtx, rtx, rtx);
+extern void loongarch_expand_vector_group_init (rtx, rtx);
extern void loongarch_expand_vector_init (rtx, rtx);
extern void loongarch_expand_vec_unpack (rtx op[2], bool, bool);
extern void loongarch_expand_vec_perm (rtx, rtx, rtx, rtx);
+extern void loongarch_expand_vec_perm_1 (rtx[]);
extern void loongarch_expand_vector_extract (rtx, rtx, int);
extern void loongarch_expand_vector_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 40b83d72b..dae35a479 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1927,7 +1927,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
{
/* LSX LD.* and ST.* cannot support loading symbols via an immediate
operand. */
- if (LSX_SUPPORTED_MODE_P (mode))
+ if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
return 0;
switch (type)
@@ -2060,6 +2060,11 @@ loongarch_valid_offset_p (rtx x, machine_mode mode)
loongarch_ldst_scaled_shift (mode)))
return false;
+ /* LASX XVLD.B and XVST.B supports 10-bit signed offsets without shift. */
+ if (LASX_SUPPORTED_MODE_P (mode)
+ && !loongarch_signed_immediate_p (INTVAL (x), 10, 0))
+ return false;
+
return true;
}
@@ -2272,7 +2277,9 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
{
struct loongarch_address_info addr;
int factor;
- bool lsx_p = !might_split_p && LSX_SUPPORTED_MODE_P (mode);
+ bool lsx_p = (!might_split_p
+ && (LSX_SUPPORTED_MODE_P (mode)
+ || LASX_SUPPORTED_MODE_P (mode)));
if (!loongarch_classify_address (&addr, x, mode, false))
return 0;
@@ -2418,7 +2425,8 @@ loongarch_const_insns (rtx x)
return loongarch_integer_cost (INTVAL (x));
case CONST_VECTOR:
- if (LSX_SUPPORTED_MODE_P (GET_MODE (x))
+ if ((LSX_SUPPORTED_MODE_P (GET_MODE (x))
+ || LASX_SUPPORTED_MODE_P (GET_MODE (x)))
&& loongarch_const_vector_same_int_p (x, GET_MODE (x), -512, 511))
return 1;
/* Fall through. */
@@ -3257,10 +3265,11 @@ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src)
/* Both src and dest are non-registers; one special case is supported where
the source is (const_int 0) and the store can source the zero register.
- LSX is never able to source the zero register directly in
+ LSX and LASX are never able to source the zero register directly in
memory operations. */
if (!register_operand (dest, mode) && !register_operand (src, mode)
- && (!const_0_operand (src, mode) || LSX_SUPPORTED_MODE_P (mode)))
+ && (!const_0_operand (src, mode)
+ || LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)))
{
loongarch_emit_move (dest, force_reg (mode, src));
return true;
@@ -3842,6 +3851,7 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
int misalign ATTRIBUTE_UNUSED)
{
unsigned elements;
+ machine_mode mode = vectype != NULL ? TYPE_MODE (vectype) : DImode;
switch (type_of_cost)
{
@@ -3858,7 +3868,8 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return 1;
case vec_perm:
- return 1;
+ return LASX_SUPPORTED_MODE_P (mode)
+ && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
case unaligned_load:
case vector_gather_load:
@@ -3939,6 +3950,10 @@ loongarch_split_move_p (rtx dest, rtx src)
if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
return loongarch_split_128bit_move_p (dest, src);
+ /* Check if LASX moves need splitting. */
+ if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+ return loongarch_split_256bit_move_p (dest, src);
+
/* Otherwise split all multiword moves. */
return size > UNITS_PER_WORD;
}
@@ -3954,6 +3969,8 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_)
gcc_checking_assert (loongarch_split_move_p (dest, src));
if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
loongarch_split_128bit_move (dest, src);
+ else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+ loongarch_split_256bit_move (dest, src);
else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
{
if (!TARGET_64BIT && GET_MODE (dest) == DImode)
@@ -4119,7 +4136,7 @@ const char *
loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
{
int index = exact_log2 (GET_MODE_SIZE (mode));
- if (!IN_RANGE (index, 2, 4))
+ if (!IN_RANGE (index, 2, 5))
return NULL;
struct loongarch_address_info info;
@@ -4128,17 +4145,19 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
|| !loongarch_legitimate_address_p (mode, x, false))
return NULL;
- const char *const insn[][3] =
+ const char *const insn[][4] =
{
{
"fstx.s\t%1,%0",
"fstx.d\t%1,%0",
- "vstx\t%w1,%0"
+ "vstx\t%w1,%0",
+ "xvstx\t%u1,%0"
},
{
"fldx.s\t%0,%1",
"fldx.d\t%0,%1",
- "vldx\t%w0,%1"
+ "vldx\t%w0,%1",
+ "xvldx\t%u0,%1"
}
};
@@ -4172,6 +4191,34 @@ loongarch_split_128bit_move_p (rtx dest, rtx src)
return true;
}
+/* Return true if a 256-bit move from SRC to DEST should be split. */
+
+bool
+loongarch_split_256bit_move_p (rtx dest, rtx src)
+{
+ /* LSX-to-LSX moves can be done in a single instruction. */
+ if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+ return false;
+
+ /* Check for LSX loads and stores. */
+ if (FP_REG_RTX_P (dest) && MEM_P (src))
+ return false;
+ if (FP_REG_RTX_P (src) && MEM_P (dest))
+ return false;
+
+ /* Check for LSX set to an immediate const vector with valid replicated
+ element. */
+ if (FP_REG_RTX_P (dest)
+ && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
+ return false;
+
+ /* Check for LSX load zero immediate. */
+ if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
+ return false;
+
+ return true;
+}
+
/* Split a 128-bit move from SRC to DEST. */
void
@@ -4263,6 +4310,97 @@ loongarch_split_128bit_move (rtx dest, rtx src)
}
}
+/* Split a 256-bit move from SRC to DEST. */
+
+void
+loongarch_split_256bit_move (rtx dest, rtx src)
+{
+ int byte, index;
+ rtx low_dest, low_src, d, s;
+
+ if (FP_REG_RTX_P (dest))
+ {
+ gcc_assert (!MEM_P (src));
+
+ rtx new_dest = dest;
+ if (!TARGET_64BIT)
+ {
+ if (GET_MODE (dest) != V8SImode)
+ new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0);
+ }
+ else
+ {
+ if (GET_MODE (dest) != V4DImode)
+ new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0);
+ }
+
+ for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
+ byte += UNITS_PER_WORD, index++)
+ {
+ s = loongarch_subword_at_byte (src, byte);
+ if (!TARGET_64BIT)
+ emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest,
+ GEN_INT (1 << index)));
+ else
+ emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest,
+ GEN_INT (1 << index)));
+ }
+ }
+ else if (FP_REG_RTX_P (src))
+ {
+ gcc_assert (!MEM_P (dest));
+
+ rtx new_src = src;
+ if (!TARGET_64BIT)
+ {
+ if (GET_MODE (src) != V8SImode)
+ new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0);
+ }
+ else
+ {
+ if (GET_MODE (src) != V4DImode)
+ new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0);
+ }
+
+ for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src));
+ byte += UNITS_PER_WORD, index++)
+ {
+ d = loongarch_subword_at_byte (dest, byte);
+ if (!TARGET_64BIT)
+ emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
+ else
+ emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
+ }
+ }
+ else
+ {
+ low_dest = loongarch_subword_at_byte (dest, 0);
+ low_src = loongarch_subword_at_byte (src, 0);
+ gcc_assert (REG_P (low_dest) && REG_P (low_src));
+ /* Make sure the source register is not written before reading. */
+ if (REGNO (low_dest) <= REGNO (low_src))
+ {
+ for (byte = 0; byte < GET_MODE_SIZE (TImode);
+ byte += UNITS_PER_WORD)
+ {
+ d = loongarch_subword_at_byte (dest, byte);
+ s = loongarch_subword_at_byte (src, byte);
+ loongarch_emit_move (d, s);
+ }
+ }
+ else
+ {
+ for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
+ byte -= UNITS_PER_WORD)
+ {
+ d = loongarch_subword_at_byte (dest, byte);
+ s = loongarch_subword_at_byte (src, byte);
+ loongarch_emit_move (d, s);
+ }
+ }
+ }
+}
+
/* Split a COPY_S.D with operands DEST, SRC and INDEX. GEN is a function
used to generate subregs. */
@@ -4350,11 +4488,12 @@ loongarch_output_move (rtx dest, rtx src)
machine_mode mode = GET_MODE (dest);
bool dbl_p = (GET_MODE_SIZE (mode) == 8);
bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
+ bool lasx_p = LASX_SUPPORTED_MODE_P (mode);
if (loongarch_split_move_p (dest, src))
return "#";
- if ((lsx_p)
+ if ((lsx_p || lasx_p)
&& dest_code == REG && FP_REG_P (REGNO (dest))
&& src_code == CONST_VECTOR
&& CONST_INT_P (CONST_VECTOR_ELT (src, 0)))
@@ -4364,6 +4503,8 @@ loongarch_output_move (rtx dest, rtx src)
{
case 16:
return "vrepli.%v0\t%w0,%E1";
+ case 32:
+ return "xvrepli.%v0\t%u0,%E1";
default: gcc_unreachable ();
}
}
@@ -4378,13 +4519,15 @@ loongarch_output_move (rtx dest, rtx src)
if (FP_REG_P (REGNO (dest)))
{
- if (lsx_p)
+ if (lsx_p || lasx_p)
{
gcc_assert (src == CONST0_RTX (GET_MODE (src)));
switch (GET_MODE_SIZE (mode))
{
case 16:
return "vrepli.b\t%w0,0";
+ case 32:
+ return "xvrepli.b\t%u0,0";
default:
gcc_unreachable ();
}
@@ -4517,12 +4660,14 @@ loongarch_output_move (rtx dest, rtx src)
{
if (dest_code == REG && FP_REG_P (REGNO (dest)))
{
- if (lsx_p)
+ if (lsx_p || lasx_p)
{
switch (GET_MODE_SIZE (mode))
{
case 16:
return "vori.b\t%w0,%w1,0";
+ case 32:
+ return "xvori.b\t%u0,%u1,0";
default:
gcc_unreachable ();
}
@@ -4540,12 +4685,14 @@ loongarch_output_move (rtx dest, rtx src)
if (insn)
return insn;
- if (lsx_p)
+ if (lsx_p || lasx_p)
{
switch (GET_MODE_SIZE (mode))
{
case 16:
return "vst\t%w1,%0";
+ case 32:
+ return "xvst\t%u1,%0";
default:
gcc_unreachable ();
}
@@ -4566,12 +4713,14 @@ loongarch_output_move (rtx dest, rtx src)
if (insn)
return insn;
- if (lsx_p)
+ if (lsx_p || lasx_p)
{
switch (GET_MODE_SIZE (mode))
{
case 16:
return "vld\t%w0,%1";
+ case 32:
+ return "xvld\t%u0,%1";
default:
gcc_unreachable ();
}
@@ -5599,18 +5748,27 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
'z' for (eq:?I ...), 'n' for (ne:?I ...).
't' Like 'T', but with the EQ/NE cases reversed
- 'V' Print exact log2 of CONST_INT OP element 0 of a replicated
- CONST_VECTOR in decimal.
+ 'F' Print the FPU branch condition for comparison OP.
+ 'W' Print the inverse of the FPU branch condition for comparison OP.
+ 'w' Print a LSX register.
+ 'u' Print a LASX register.
+ 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+ 'z' for (eq:?I ...), 'n' for (ne:?I ...).
+ 't' Like 'T', but with the EQ/NE cases reversed
+ 'Y' Print loongarch_fp_conditions[INTVAL (OP)]
+ 'Z' Print OP and a comma for 8CC, otherwise print nothing.
+ 'z' Print $0 if OP is zero, otherwise print OP normally.
'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
+ 'V' Print exact log2 of CONST_INT OP element 0 of a replicated
+ CONST_VECTOR in decimal.
'W' Print the inverse of the FPU branch condition for comparison OP.
- 'w' Print a LSX register.
'X' Print CONST_INT OP in hexadecimal format.
'x' Print the low 16 bits of CONST_INT OP in hexadecimal format.
'Y' Print loongarch_fp_conditions[INTVAL (OP)]
'y' Print exact log2 of CONST_INT OP in decimal.
'Z' Print OP and a comma for 8CC, otherwise print nothing.
- 'z' Print $r0 if OP is zero, otherwise print OP normally. */
+ 'z' Print $0 if OP is zero, otherwise print OP normally. */
static void
loongarch_print_operand (FILE *file, rtx op, int letter)
@@ -5752,46 +5910,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
output_operand_lossage ("invalid use of '%%%c'", letter);
break;
- case 'v':
- switch (GET_MODE (op))
- {
- case E_V16QImode:
- case E_V32QImode:
- fprintf (file, "b");
- break;
- case E_V8HImode:
- case E_V16HImode:
- fprintf (file, "h");
- break;
- case E_V4SImode:
- case E_V4SFmode:
- case E_V8SImode:
- case E_V8SFmode:
- fprintf (file, "w");
- break;
- case E_V2DImode:
- case E_V2DFmode:
- case E_V4DImode:
- case E_V4DFmode:
- fprintf (file, "d");
- break;
- default:
- output_operand_lossage ("invalid use of '%%%c'", letter);
- }
- break;
-
case 'W':
loongarch_print_float_branch_condition (file, reverse_condition (code),
letter);
break;
- case 'w':
- if (code == REG && LSX_REG_P (REGNO (op)))
- fprintf (file, "$vr%s", &reg_names[REGNO (op)][2]);
- else
- output_operand_lossage ("invalid use of '%%%c'", letter);
- break;
-
case 'x':
if (CONST_INT_P (op))
fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff);
@@ -5833,6 +5956,48 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
fputc (',', file);
break;
+ case 'w':
+ if (code == REG && LSX_REG_P (REGNO (op)))
+ fprintf (file, "$vr%s", &reg_names[REGNO (op)][2]);
+ else
+ output_operand_lossage ("invalid use of '%%%c'", letter);
+ break;
+
+ case 'u':
+ if (code == REG && LASX_REG_P (REGNO (op)))
+ fprintf (file, "$xr%s", &reg_names[REGNO (op)][2]);
+ else
+ output_operand_lossage ("invalid use of '%%%c'", letter);
+ break;
+
+ case 'v':
+ switch (GET_MODE (op))
+ {
+ case E_V16QImode:
+ case E_V32QImode:
+ fprintf (file, "b");
+ break;
+ case E_V8HImode:
+ case E_V16HImode:
+ fprintf (file, "h");
+ break;
+ case E_V4SImode:
+ case E_V4SFmode:
+ case E_V8SImode:
+ case E_V8SFmode:
+ fprintf (file, "w");
+ break;
+ case E_V2DImode:
+ case E_V2DFmode:
+ case E_V4DImode:
+ case E_V4DFmode:
+ fprintf (file, "d");
+ break;
+ default:
+ output_operand_lossage ("invalid use of '%%%c'", letter);
+ }
+ break;
+
default:
switch (code)
{
@@ -6163,13 +6328,18 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
size = GET_MODE_SIZE (mode);
mclass = GET_MODE_CLASS (mode);
- if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode))
+ if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode)
+ && !LASX_SUPPORTED_MODE_P (mode))
return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
/* For LSX, allow TImode and 128-bit vector modes in all FPR. */
if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode))
return true;
+ /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR. */
+ if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode))
+ return true;
+
if (FP_REG_P (regno))
{
if (mclass == MODE_FLOAT
@@ -6222,6 +6392,9 @@ loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode)
if (LSX_SUPPORTED_MODE_P (mode))
return 1;
+ if (LASX_SUPPORTED_MODE_P (mode))
+ return 1;
+
return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
}
@@ -6251,7 +6424,10 @@ loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
{
if (loongarch_hard_regno_mode_ok (FP_REG_FIRST, mode))
{
- if (LSX_SUPPORTED_MODE_P (mode))
+ /* Fixed me. */
+ if (LASX_SUPPORTED_MODE_P (mode))
+ size = MIN (size, UNITS_PER_LASX_REG);
+ else if (LSX_SUPPORTED_MODE_P (mode))
size = MIN (size, UNITS_PER_LSX_REG);
else
size = MIN (size, UNITS_PER_FPREG);
@@ -6269,6 +6445,10 @@ static bool
loongarch_can_change_mode_class (machine_mode from, machine_mode to,
reg_class_t rclass)
{
+ /* Allow conversions between different LSX/LASX vector modes. */
+ if (LASX_SUPPORTED_MODE_P (from) && LASX_SUPPORTED_MODE_P (to))
+ return true;
+
/* Allow conversions between different LSX vector modes. */
if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))
return true;
@@ -6292,7 +6472,8 @@ loongarch_mode_ok_for_mov_fmt_p (machine_mode mode)
return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT;
default:
- return LSX_SUPPORTED_MODE_P (mode);
+ return ISA_HAS_LASX ? LASX_SUPPORTED_MODE_P (mode)
+ : LSX_SUPPORTED_MODE_P (mode);
}
}
@@ -6494,7 +6675,8 @@ loongarch_valid_pointer_mode (scalar_int_mode mode)
static bool
loongarch_vector_mode_supported_p (machine_mode mode)
{
- return LSX_SUPPORTED_MODE_P (mode);
+ return ISA_HAS_LASX ? LASX_SUPPORTED_MODE_P (mode)
+ : LSX_SUPPORTED_MODE_P (mode);
}
/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
@@ -6520,19 +6702,19 @@ loongarch_preferred_simd_mode (scalar_mode mode)
switch (mode)
{
case E_QImode:
- return E_V16QImode;
+ return ISA_HAS_LASX ? E_V32QImode : E_V16QImode;
case E_HImode:
- return E_V8HImode;
+ return ISA_HAS_LASX ? E_V16HImode : E_V8HImode;
case E_SImode:
- return E_V4SImode;
+ return ISA_HAS_LASX ? E_V8SImode : E_V4SImode;
case E_DImode:
- return E_V2DImode;
+ return ISA_HAS_LASX ? E_V4DImode : E_V2DImode;
case E_SFmode:
- return E_V4SFmode;
+ return ISA_HAS_LASX ? E_V8SFmode : E_V4SFmode;
case E_DFmode:
- return E_V2DFmode;
+ return ISA_HAS_LASX ? E_V4DFmode : E_V2DFmode;
default:
break;
@@ -6543,7 +6725,12 @@ loongarch_preferred_simd_mode (scalar_mode mode)
static unsigned int
loongarch_autovectorize_vector_modes (vector_modes *modes, bool)
{
- if (ISA_HAS_LSX)
+ if (ISA_HAS_LASX)
+ {
+ modes->safe_push (V32QImode);
+ modes->safe_push (V16QImode);
+ }
+ else if (ISA_HAS_LSX)
{
modes->safe_push (V16QImode);
}
@@ -6723,11 +6910,18 @@ const char *
loongarch_lsx_output_division (const char *division, rtx *operands)
{
const char *s;
+ machine_mode mode = GET_MODE (*operands);
s = division;
if (TARGET_CHECK_ZERO_DIV)
{
- if (ISA_HAS_LSX)
+ if (ISA_HAS_LASX && GET_MODE_SIZE (mode) == 32)
+ {
+ output_asm_insn ("xvsetallnez.%v0\t$fcc7,%u2",operands);
+ output_asm_insn (s, operands);
+ output_asm_insn ("bcnez\t$fcc7,1f", operands);
+ }
+ else if (ISA_HAS_LSX)
{
output_asm_insn ("vsetallnez.%v0\t$fcc7,%w2",operands);
output_asm_insn (s, operands);
@@ -7566,7 +7760,7 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
rtx_insn *insn;
unsigned i;
- if (!ISA_HAS_LSX)
+ if (!ISA_HAS_LSX && !ISA_HAS_LASX)
return false;
for (i = 0; i < d->nelt; i++)
@@ -7590,40 +7784,484 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
return true;
}
-void
-loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+/* Try to simplify a two vector permutation using 2 intra-lane interleave
+ insns and cross-lane shuffle for 32-byte vectors. */
+
+static bool
+loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d)
{
- machine_mode vmode = GET_MODE (target);
+ unsigned i, nelt;
+ rtx t1,t2,t3;
+ rtx (*gen_high) (rtx, rtx, rtx);
+ rtx (*gen_low) (rtx, rtx, rtx);
+ machine_mode mode = GET_MODE (d->target);
- switch (vmode)
+ if (d->one_vector_p)
+ return false;
+ if (ISA_HAS_LASX && GET_MODE_SIZE (d->vmode) == 32)
+ ;
+ else
+ return false;
+
+ nelt = d->nelt;
+ if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
+ return false;
+ for (i = 0; i < nelt; i += 2)
+ if (d->perm[i] != d->perm[0] + i / 2
+ || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ switch (d->vmode)
{
- case E_V16QImode:
- emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
+ case E_V32QImode:
+ gen_high = gen_lasx_xvilvh_b;
+ gen_low = gen_lasx_xvilvl_b;
break;
- case E_V2DFmode:
- emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
+ case E_V16HImode:
+ gen_high = gen_lasx_xvilvh_h;
+ gen_low = gen_lasx_xvilvl_h;
break;
- case E_V2DImode:
- emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
+ case E_V8SImode:
+ gen_high = gen_lasx_xvilvh_w;
+ gen_low = gen_lasx_xvilvl_w;
break;
- case E_V4SFmode:
- emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
+ case E_V4DImode:
+ gen_high = gen_lasx_xvilvh_d;
+ gen_low = gen_lasx_xvilvl_d;
break;
- case E_V4SImode:
- emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
+ case E_V8SFmode:
+ gen_high = gen_lasx_xvilvh_w_f;
+ gen_low = gen_lasx_xvilvl_w_f;
break;
- case E_V8HImode:
- emit_insn (gen_lsx_vshuf_h (target, sel, op1, op0));
+ case E_V4DFmode:
+ gen_high = gen_lasx_xvilvh_d_f;
+ gen_low = gen_lasx_xvilvl_d_f;
break;
default:
- break;
+ gcc_unreachable ();
+ }
+
+ t1 = gen_reg_rtx (mode);
+ t2 = gen_reg_rtx (mode);
+ emit_insn (gen_high (t1, d->op0, d->op1));
+ emit_insn (gen_low (t2, d->op0, d->op1));
+ if (mode == V4DFmode || mode == V8SFmode)
+ {
+ t3 = gen_reg_rtx (V4DFmode);
+ if (d->perm[0])
+ emit_insn (gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1),
+ gen_lowpart (V4DFmode, t2),
+ GEN_INT (0x31)));
+ else
+ emit_insn (gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1),
+ gen_lowpart (V4DFmode, t2),
+ GEN_INT (0x20)));
}
+ else
+ {
+ t3 = gen_reg_rtx (V4DImode);
+ if (d->perm[0])
+ emit_insn (gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1),
+ gen_lowpart (V4DImode, t2),
+ GEN_INT (0x31)));
+ else
+ emit_insn (gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1),
+ gen_lowpart (V4DImode, t2),
+ GEN_INT (0x20)));
+ }
+ emit_move_insn (d->target, gen_lowpart (mode, t3));
+ return true;
}
+/* Implement extract-even and extract-odd permutations. */
+
static bool
-loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
+loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{
- int i;
+ rtx t1;
+ machine_mode mode = GET_MODE (d->target);
+
+ if (d->testing_p)
+ return true;
+
+ t1 = gen_reg_rtx (mode);
+
+ switch (d->vmode)
+ {
+ case E_V4DFmode:
+ /* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }. */
+ if (odd)
+ emit_insn (gen_lasx_xvilvh_d_f (t1, d->op0, d->op1));
+ else
+ emit_insn (gen_lasx_xvilvl_d_f (t1, d->op0, d->op1));
+
+ /* Shuffle within the 256-bit lanes to produce the result required.
+ { 0 2 4 6 } | { 1 3 5 7 }. */
+ emit_insn (gen_lasx_xvpermi_d_v4df (d->target, t1, GEN_INT (0xd8)));
+ break;
+
+ case E_V4DImode:
+ if (odd)
+ emit_insn (gen_lasx_xvilvh_d (t1, d->op0, d->op1));
+ else
+ emit_insn (gen_lasx_xvilvl_d (t1, d->op0, d->op1));
+
+ emit_insn (gen_lasx_xvpermi_d_v4di (d->target, t1, GEN_INT (0xd8)));
+ break;
+
+ case E_V8SFmode:
+ /* Shuffle the lanes around into:
+ { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
+ if (odd)
+ emit_insn (gen_lasx_xvpickod_w_f (t1, d->op0, d->op1));
+ else
+ emit_insn (gen_lasx_xvpickev_w_f (t1, d->op0, d->op1));
+
+ /* Shuffle within the 256-bit lanes to produce the result required.
+ { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
+ emit_insn (gen_lasx_xvpermi_d_v8sf (d->target, t1, GEN_INT (0xd8)));
+ break;
+
+ case E_V8SImode:
+ if (odd)
+ emit_insn (gen_lasx_xvpickod_w (t1, d->op0, d->op1));
+ else
+ emit_insn (gen_lasx_xvpickev_w (t1, d->op0, d->op1));
+
+ emit_insn (gen_lasx_xvpermi_d_v8si (d->target, t1, GEN_INT (0xd8)));
+ break;
+
+ case E_V16HImode:
+ if (odd)
+ emit_insn (gen_lasx_xvpickod_h (t1, d->op0, d->op1));
+ else
+ emit_insn (gen_lasx_xvpickev_h (t1, d->op0, d->op1));
+
+ emit_insn (gen_lasx_xvpermi_d_v16hi (d->target, t1, GEN_INT (0xd8)));
+ break;
+
+ case E_V32QImode:
+ if (odd)
+ emit_insn (gen_lasx_xvpickod_b (t1, d->op0, d->op1));
+ else
+ emit_insn (gen_lasx_xvpickev_b (t1, d->op0, d->op1));
+
+ emit_insn (gen_lasx_xvpermi_d_v32qi (d->target, t1, GEN_INT (0xd8)));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return true;
+}
+
+/* Pattern match extract-even and extract-odd permutations. */
+
+static bool
+loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
+{
+ unsigned i, odd, nelt = d->nelt;
+ if (!ISA_HAS_LASX)
+ return false;
+
+ odd = d->perm[0];
+ if (odd != 0 && odd != 1)
+ return false;
+
+ for (i = 1; i < nelt; ++i)
+ if (d->perm[i] != 2 * i + odd)
+ return false;
+
+ return loongarch_expand_vec_perm_even_odd_1 (d, odd);
+}
+
+/* Expand a variable vector permutation for LASX. */
+
+void
+loongarch_expand_vec_perm_1 (rtx operands[])
+{
+ rtx target = operands[0];
+ rtx op0 = operands[1];
+ rtx op1 = operands[2];
+ rtx mask = operands[3];
+
+ bool one_operand_shuffle = rtx_equal_p (op0, op1);
+ rtx t1 = NULL;
+ rtx t2 = NULL;
+ rtx t3, t4, t5, t6, vt = NULL;
+ rtx vec[32] = {NULL};
+ machine_mode mode = GET_MODE (op0);
+ machine_mode maskmode = GET_MODE (mask);
+ int w, i;
+
+ /* Number of elements in the vector. */
+ w = GET_MODE_NUNITS (mode);
+
+ rtx round_data[MAX_VECT_LEN];
+ rtx round_reg, round_data_rtx;
+
+ if (mode != E_V32QImode)
+ {
+ for (int i = 0; i < w; i += 1)
+ {
+ round_data[i] = GEN_INT (0x1f);
+ }
+
+ if (mode == E_V4DFmode)
+ {
+ round_data_rtx = gen_rtx_CONST_VECTOR (E_V4DImode,
+ gen_rtvec_v (w, round_data));
+ round_reg = gen_reg_rtx (E_V4DImode);
+ }
+ else if (mode == E_V8SFmode)
+ {
+
+ round_data_rtx = gen_rtx_CONST_VECTOR (E_V8SImode,
+ gen_rtvec_v (w, round_data));
+ round_reg = gen_reg_rtx (E_V8SImode);
+ }
+ else
+ {
+ round_data_rtx = gen_rtx_CONST_VECTOR (mode,
+ gen_rtvec_v (w, round_data));
+ round_reg = gen_reg_rtx (mode);
+ }
+
+ emit_move_insn (round_reg, round_data_rtx);
+ switch (mode)
+ {
+ case E_V32QImode:
+ emit_insn (gen_andv32qi3 (mask, mask, round_reg));
+ break;
+ case E_V16HImode:
+ emit_insn (gen_andv16hi3 (mask, mask, round_reg));
+ break;
+ case E_V8SImode:
+ case E_V8SFmode:
+ emit_insn (gen_andv8si3 (mask, mask, round_reg));
+ break;
+ case E_V4DImode:
+ case E_V4DFmode:
+ emit_insn (gen_andv4di3 (mask, mask, round_reg));
+ break;
+ default:
+ gcc_unreachable ();
+ break;
+ }
+ }
+
+ if (mode == V4DImode || mode == V4DFmode)
+ {
+ maskmode = mode = V8SImode;
+ w = 8;
+ t1 = gen_reg_rtx (maskmode);
+
+ /* Replicate the low bits of the V4DImode mask into V8SImode:
+ mask = { A B C D }
+ t1 = { A A B B C C D D }. */
+ for (i = 0; i < w / 2; ++i)
+ vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
+ vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+ vt = force_reg (maskmode, vt);
+ mask = gen_lowpart (maskmode, mask);
+ emit_insn (gen_lasx_xvperm_w (t1, mask, vt));
+
+ /* Multiply the shuffle indicies by two. */
+ t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
+ OPTAB_DIRECT);
+
+ /* Add one to the odd shuffle indicies:
+ t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
+ for (i = 0; i < w / 2; ++i)
+ {
+ vec[i * 2] = const0_rtx;
+ vec[i * 2 + 1] = const1_rtx;
+ }
+ vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+ vt = validize_mem (force_const_mem (maskmode, vt));
+ t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
+ OPTAB_DIRECT);
+
+ /* Continue as if V8SImode (resp. V32QImode) was used initially. */
+ operands[3] = mask = t1;
+ target = gen_reg_rtx (mode);
+ op0 = gen_lowpart (mode, op0);
+ op1 = gen_lowpart (mode, op1);
+ }
+
+ switch (mode)
+ {
+ case E_V8SImode:
+ if (one_operand_shuffle)
+ {
+ emit_insn (gen_lasx_xvperm_w (target, op0, mask));
+ if (target != operands[0])
+ emit_move_insn (operands[0],
+ gen_lowpart (GET_MODE (operands[0]), target));
+ }
+ else
+ {
+ t1 = gen_reg_rtx (V8SImode);
+ t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_lasx_xvperm_w (t1, op0, mask));
+ emit_insn (gen_lasx_xvperm_w (t2, op1, mask));
+ goto merge_two;
+ }
+ return;
+
+ case E_V8SFmode:
+ mask = gen_lowpart (V8SImode, mask);
+ if (one_operand_shuffle)
+ emit_insn (gen_lasx_xvperm_w_f (target, op0, mask));
+ else
+ {
+ t1 = gen_reg_rtx (V8SFmode);
+ t2 = gen_reg_rtx (V8SFmode);
+ emit_insn (gen_lasx_xvperm_w_f (t1, op0, mask));
+ emit_insn (gen_lasx_xvperm_w_f (t2, op1, mask));
+ goto merge_two;
+ }
+ return;
+
+ case E_V16HImode:
+ if (one_operand_shuffle)
+ {
+ t1 = gen_reg_rtx (V16HImode);
+ t2 = gen_reg_rtx (V16HImode);
+ emit_insn (gen_lasx_xvpermi_d_v16hi (t1, op0, GEN_INT (0x44)));
+ emit_insn (gen_lasx_xvpermi_d_v16hi (t2, op0, GEN_INT (0xee)));
+ emit_insn (gen_lasx_xvshuf_h (target, mask, t2, t1));
+ }
+ else
+ {
+ t1 = gen_reg_rtx (V16HImode);
+ t2 = gen_reg_rtx (V16HImode);
+ t3 = gen_reg_rtx (V16HImode);
+ t4 = gen_reg_rtx (V16HImode);
+ t5 = gen_reg_rtx (V16HImode);
+ t6 = gen_reg_rtx (V16HImode);
+ emit_insn (gen_lasx_xvpermi_d_v16hi (t3, op0, GEN_INT (0x44)));
+ emit_insn (gen_lasx_xvpermi_d_v16hi (t4, op0, GEN_INT (0xee)));
+ emit_insn (gen_lasx_xvshuf_h (t1, mask, t4, t3));
+ emit_insn (gen_lasx_xvpermi_d_v16hi (t5, op1, GEN_INT (0x44)));
+ emit_insn (gen_lasx_xvpermi_d_v16hi (t6, op1, GEN_INT (0xee)));
+ emit_insn (gen_lasx_xvshuf_h (t2, mask, t6, t5));
+ goto merge_two;
+ }
+ return;
+
+ case E_V32QImode:
+ if (one_operand_shuffle)
+ {
+ t1 = gen_reg_rtx (V32QImode);
+ t2 = gen_reg_rtx (V32QImode);
+ emit_insn (gen_lasx_xvpermi_d_v32qi (t1, op0, GEN_INT (0x44)));
+ emit_insn (gen_lasx_xvpermi_d_v32qi (t2, op0, GEN_INT (0xee)));
+ emit_insn (gen_lasx_xvshuf_b (target, t2, t1, mask));
+ }
+ else
+ {
+ t1 = gen_reg_rtx (V32QImode);
+ t2 = gen_reg_rtx (V32QImode);
+ t3 = gen_reg_rtx (V32QImode);
+ t4 = gen_reg_rtx (V32QImode);
+ t5 = gen_reg_rtx (V32QImode);
+ t6 = gen_reg_rtx (V32QImode);
+ emit_insn (gen_lasx_xvpermi_d_v32qi (t3, op0, GEN_INT (0x44)));
+ emit_insn (gen_lasx_xvpermi_d_v32qi (t4, op0, GEN_INT (0xee)));
+ emit_insn (gen_lasx_xvshuf_b (t1, t4, t3, mask));
+ emit_insn (gen_lasx_xvpermi_d_v32qi (t5, op1, GEN_INT (0x44)));
+ emit_insn (gen_lasx_xvpermi_d_v32qi (t6, op1, GEN_INT (0xee)));
+ emit_insn (gen_lasx_xvshuf_b (t2, t6, t5, mask));
+ goto merge_two;
+ }
+ return;
+
+ default:
+ gcc_assert (GET_MODE_SIZE (mode) == 32);
+ break;
+ }
+
+merge_two:
+ /* Then merge them together. The key is whether any given control
+ element contained a bit set that indicates the second word. */
+ rtx xops[6];
+ mask = operands[3];
+ vt = GEN_INT (w);
+ vt = gen_const_vec_duplicate (maskmode, vt);
+ vt = force_reg (maskmode, vt);
+ mask = expand_simple_binop (maskmode, AND, mask, vt,
+ NULL_RTX, 0, OPTAB_DIRECT);
+ if (GET_MODE (target) != mode)
+ target = gen_reg_rtx (mode);
+ xops[0] = target;
+ xops[1] = gen_lowpart (mode, t2);
+ xops[2] = gen_lowpart (mode, t1);
+ xops[3] = gen_rtx_EQ (maskmode, mask, vt);
+ xops[4] = mask;
+ xops[5] = vt;
+
+ loongarch_expand_vec_cond_expr (mode, maskmode, xops);
+ if (target != operands[0])
+ emit_move_insn (operands[0],
+ gen_lowpart (GET_MODE (operands[0]), target));
+}
+
+void
+loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
+{
+ machine_mode vmode = GET_MODE (target);
+ auto nelt = GET_MODE_NUNITS (vmode);
+ auto round_reg = gen_reg_rtx (vmode);
+ rtx round_data[MAX_VECT_LEN];
+
+ for (int i = 0; i < nelt; i += 1)
+ {
+ round_data[i] = GEN_INT (0x1f);
+ }
+
+ rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data));
+ emit_move_insn (round_reg, round_data_rtx);
+
+ switch (vmode)
+ {
+ case E_V16QImode:
+ emit_insn (gen_andv16qi3 (sel, sel, round_reg));
+ emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
+ break;
+ case E_V2DFmode:
+ emit_insn (gen_andv2di3 (sel, sel, round_reg));
+ emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
+ break;
+ case E_V2DImode:
+ emit_insn (gen_andv2di3 (sel, sel, round_reg));
+ emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
+ break;
+ case E_V4SFmode:
+ emit_insn (gen_andv4si3 (sel, sel, round_reg));
+ emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
+ break;
+ case E_V4SImode:
+ emit_insn (gen_andv4si3 (sel, sel, round_reg));
+ emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
+ break;
+ case E_V8HImode:
+ emit_insn (gen_andv8hi3 (sel, sel, round_reg));
+ emit_insn (gen_lsx_vshuf_h (target, sel, op1, op0));
+ break;
+ default:
+ break;
+ }
+}
+
+static bool
+loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
+{
+ int i;
rtx target, op0, op1, sel, tmp;
rtx rperm[MAX_VECT_LEN];
@@ -7724,25 +8362,1302 @@ loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
}
- if (loongarch_expand_lsx_shuffle (d))
- return true;
- return false;
-}
-
-/* Implementation of constant vector permuatation. This function identifies
- * recognized pattern of permuation selector argument, and use one or more
- * instruction(s) to finish the permutation job correctly. For unsupported
- * patterns, it will return false. */
-
-static bool
-loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
-{
- /* Although we have the LSX vec_perm<mode> template, there's still some
- 128bit vector permuatation operations send to vectorize_vec_perm_const.
- In this case, we just simpliy wrap them by single vshuf.* instruction,
- because LSX vshuf.* instruction just have the same behavior that GCC
- expects. */
- return loongarch_try_expand_lsx_vshuf_const (d);
+ if (loongarch_expand_lsx_shuffle (d))
+ return true;
+ if (loongarch_expand_vec_perm_even_odd (d))
+ return true;
+ if (loongarch_expand_vec_perm_interleave (d))
+ return true;
+ return false;
+}
+
+/* Following are the assist function for const vector permutation support. */
+static bool
+loongarch_is_quad_duplicate (struct expand_vec_perm_d *d)
+{
+ if (d->perm[0] >= d->nelt / 2)
+ return false;
+
+ bool result = true;
+ unsigned char lhs = d->perm[0];
+ unsigned char rhs = d->perm[d->nelt / 2];
+
+ if ((rhs - lhs) != d->nelt / 2)
+ return false;
+
+ for (int i = 1; i < d->nelt; i += 1)
+ {
+ if ((i < d->nelt / 2) && (d->perm[i] != lhs))
+ {
+ result = false;
+ break;
+ }
+ if ((i > d->nelt / 2) && (d->perm[i] != rhs))
+ {
+ result = false;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_double_duplicate (struct expand_vec_perm_d *d)
+{
+ if (!d->one_vector_p)
+ return false;
+
+ if (d->nelt < 8)
+ return false;
+
+ bool result = true;
+ unsigned char buf = d->perm[0];
+
+ for (int i = 1; i < d->nelt; i += 2)
+ {
+ if (d->perm[i] != buf)
+ {
+ result = false;
+ break;
+ }
+ if (d->perm[i - 1] != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += d->nelt / 4;
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_odd_extraction (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+ unsigned char buf = 1;
+
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 2;
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_even_extraction (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+ unsigned char buf = 0;
+
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_extraction_permutation (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+ unsigned char buf = d->perm[0];
+
+ if (buf != 0 || buf != d->nelt)
+ return false;
+
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 2;
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_center_extraction (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+ unsigned buf = d->nelt / 2;
+
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_reversing_permutation (struct expand_vec_perm_d *d)
+{
+ if (!d->one_vector_p)
+ return false;
+
+ bool result = true;
+ unsigned char buf = d->nelt - 1;
+
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (d->perm[i] != buf)
+ {
+ result = false;
+ break;
+ }
+
+ buf -= 1;
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_di_misalign_extract (struct expand_vec_perm_d *d)
+{
+ if (d->nelt != 4 && d->nelt != 8)
+ return false;
+
+ bool result = true;
+ unsigned char buf;
+
+ if (d->nelt == 4)
+ {
+ buf = 1;
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+
+ buf += 1;
+ }
+ }
+ else if (d->nelt == 8)
+ {
+ buf = 2;
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+
+ buf += 1;
+ }
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_si_misalign_extract (struct expand_vec_perm_d *d)
+{
+ if (d->vmode != E_V8SImode && d->vmode != E_V8SFmode)
+ return false;
+ bool result = true;
+ unsigned char buf = 1;
+
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_lasx_lowpart_interleave (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+ unsigned char buf = 0;
+
+ for (int i = 0;i < d->nelt; i += 2)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+
+ if (result)
+ {
+ buf = d->nelt;
+ for (int i = 1; i < d->nelt; i += 2)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_lasx_lowpart_interleave_2 (struct expand_vec_perm_d *d)
+{
+ if (d->vmode != E_V32QImode)
+ return false;
+ bool result = true;
+ unsigned char buf = 0;
+
+#define COMPARE_SELECTOR(INIT, BEGIN, END) \
+ buf = INIT; \
+ for (int i = BEGIN; i < END && result; i += 1) \
+ { \
+ if (buf != d->perm[i]) \
+ { \
+ result = false; \
+ break; \
+ } \
+ buf += 1; \
+ }
+
+ COMPARE_SELECTOR (0, 0, 8);
+ COMPARE_SELECTOR (32, 8, 16);
+ COMPARE_SELECTOR (8, 16, 24);
+ COMPARE_SELECTOR (40, 24, 32);
+
+#undef COMPARE_SELECTOR
+ return result;
+}
+
+static bool
+loongarch_is_lasx_lowpart_extract (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+ unsigned char buf = 0;
+
+ for (int i = 0; i < d->nelt / 2; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+
+ if (result)
+ {
+ buf = d->nelt;
+ for (int i = d->nelt / 2; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_lasx_highpart_interleave (expand_vec_perm_d *d)
+{
+ bool result = true;
+ unsigned char buf = d->nelt / 2;
+
+ for (int i = 0; i < d->nelt; i += 2)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+
+ if (result)
+ {
+ buf = d->nelt + d->nelt / 2;
+ for (int i = 1; i < d->nelt;i += 2)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ buf += 1;
+ }
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_lasx_highpart_interleave_2 (struct expand_vec_perm_d *d)
+{
+ if (d->vmode != E_V32QImode)
+ return false;
+
+ bool result = true;
+ unsigned char buf = 0;
+
+#define COMPARE_SELECTOR(INIT, BEGIN, END) \
+ buf = INIT; \
+ for (int i = BEGIN; i < END && result; i += 1) \
+ { \
+ if (buf != d->perm[i]) \
+ { \
+ result = false; \
+ break; \
+ } \
+ buf += 1; \
+ }
+
+ COMPARE_SELECTOR (16, 0, 8);
+ COMPARE_SELECTOR (48, 8, 16);
+ COMPARE_SELECTOR (24, 16, 24);
+ COMPARE_SELECTOR (56, 24, 32);
+
+#undef COMPARE_SELECTOR
+ return result;
+}
+
+static bool
+loongarch_is_elem_duplicate (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+ unsigned char buf = d->perm[0];
+
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (buf != d->perm[i])
+ {
+ result = false;
+ break;
+ }
+ }
+
+ return result;
+}
+
+inline bool
+loongarch_is_op_reverse_perm (struct expand_vec_perm_d *d)
+{
+ return (d->vmode == E_V4DFmode)
+ && d->perm[0] == 2 && d->perm[1] == 3
+ && d->perm[2] == 0 && d->perm[3] == 1;
+}
+
+static bool
+loongarch_is_single_op_perm (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+
+ for (int i = 0; i < d->nelt; i += 1)
+ {
+ if (d->perm[i] >= d->nelt)
+ {
+ result = false;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static bool
+loongarch_is_divisible_perm (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+
+ for (int i = 0; i < d->nelt / 2; i += 1)
+ {
+ if (d->perm[i] >= d->nelt)
+ {
+ result = false;
+ break;
+ }
+ }
+
+ if (result)
+ {
+ for (int i = d->nelt / 2; i < d->nelt; i += 1)
+ {
+ if (d->perm[i] < d->nelt)
+ {
+ result = false;
+ break;
+ }
+ }
+ }
+
+ return result;
+}
+
+inline bool
+loongarch_is_triple_stride_extract (struct expand_vec_perm_d *d)
+{
+ return (d->vmode == E_V4DImode || d->vmode == E_V4DFmode)
+ && d->perm[0] == 1 && d->perm[1] == 4
+ && d->perm[2] == 7 && d->perm[3] == 0;
+}
+
+/* In LASX, some permutation insn does not have the behavior that gcc expects
+ * when compiler wants to emit a vector permutation.
+ *
+ * 1. What GCC provides via vectorize_vec_perm_const ()'s paramater:
+ * When GCC wants to performs a vector permutation, it provides two op
+ * reigster, one target register, and a selector.
+ * In const vector permutation case, GCC provides selector as a char array
+ * that contains original value; in variable vector permuatation
+ * (performs via vec_perm<mode> insn template), it provides a vector register.
+ * We assume that nelt is the elements numbers inside single vector in current
+ * 256bit vector mode.
+ *
+ * 2. What GCC expects to perform:
+ * Two op registers (op0, op1) will "combine" into a 512bit temp vector storage
+ * that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit
+ * is op1, then the elements are indexed as below:
+ * 0 ~ nelt - 1 nelt ~ 2 * nelt - 1
+ * |-------------------------|-------------------------|
+ * Low 256bit (op0) High 256bit (op1)
+ * For example, the second element in op1 (V8SImode) will be indexed with 9.
+ * Selector is a vector that has the same mode and number of elements with
+ * op0,op1 and target, it's look like this:
+ * 0 ~ nelt - 1
+ * |-------------------------|
+ * 256bit (selector)
+ * It describes which element from 512bit temp vector storage will fit into
+ * target's every element slot.
+ * GCC expects that every element in selector can be ANY indices of 512bit
+ * vector storage (Selector can pick literally any element from op0 and op1, and
+ * then fits into any place of target register). This is also what LSX 128bit
+ * vshuf.* instruction do similarly, so we can handle 128bit vector permutation
+ * by single instruction easily.
+ *
+ * 3. What LASX permutation instruction does:
+ * In short, it just execute two independent 128bit vector permuatation, and
+ * it's the reason that we need to do the jobs below. We will explain it.
+ * op0, op1, target, and selector will be separate into high 128bit and low
+ * 128bit, and do permutation as the description below:
+ *
+ * a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp
+ * vector storage (TVS1), elements are indexed as below:
+ * 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1
+ * |---------------------|---------------------| TVS1
+ * op0's low 128bit op1's low 128bit
+ * op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the
+ * same way.
+ * 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1
+ * |---------------------|---------------------| TVS2
+ * op0's high 128bit op1's high 128bit
+ * b) Selector's low 128bit describes which elements from TVS1 will fit into
+ * target vector's low 128bit. No TVS2 elements are allowed.
+ * c) Selector's high 128bit describes which elements from TVS2 will fit into
+ * target vector's high 128bit. No TVS1 elements are allowed.
+ *
+ * As we can see, if we want to handle vector permutation correctly, we can
+ * achieve it in three ways:
+ * a) Modify selector's elements, to make sure that every elements can inform
+ * correct value that will put into target vector.
+ b) Generate extra instruction before/after permutation instruction, for
+ adjusting op vector or target vector, to make sure target vector's value is
+ what GCC expects.
+ c) Use other instructions to process op and put correct result into target.
+ */
+
+/* Implementation of constant vector permuatation. This function identifies
+ * recognized pattern of permuation selector argument, and use one or more
+ * instruction(s) to finish the permutation job correctly. For unsupported
+ * patterns, it will return false. */
+
+static bool
+loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
+{
+ /* Although we have the LSX vec_perm<mode> template, there's still some
+ 128bit vector permuatation operations send to vectorize_vec_perm_const.
+ In this case, we just simpliy wrap them by single vshuf.* instruction,
+ because LSX vshuf.* instruction just have the same behavior that GCC
+ expects. */
+ if (GET_MODE_SIZE (d->vmode) == 16)
+ return loongarch_try_expand_lsx_vshuf_const (d);
+ else
+ return false;
+
+ bool ok = false, reverse_hi_lo = false, extract_ev_od = false,
+ use_alt_op = false;
+ unsigned char idx;
+ int i;
+ rtx target, op0, op1, sel, tmp;
+ rtx op0_alt = NULL_RTX, op1_alt = NULL_RTX;
+ rtx rperm[MAX_VECT_LEN];
+ unsigned int remapped[MAX_VECT_LEN];
+
+ /* Try to figure out whether is a recognized permutation selector pattern, if
+ yes, we will reassign some elements with new value in selector argument,
+ and in some cases we will generate some assist insn to complete the
+ permutation. (Even in some cases, we use other insn to impl permutation
+ instead of xvshuf!)
+
+ Make sure to check d->testing_p is false everytime if you want to emit new
+ insn, unless you want to crash into ICE directly. */
+ if (loongarch_is_quad_duplicate (d))
+ {
+ /* Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 }
+ copy first elem from original selector to all elem in new selector. */
+ idx = d->perm[0];
+ for (i = 0; i < d->nelt; i += 1)
+ {
+ remapped[i] = idx;
+ }
+ /* Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 }. */
+ }
+ else if (loongarch_is_double_duplicate (d))
+ {
+ /* Selector example: E_V8SImode, { 1, 1, 3, 3, 5, 5, 7, 7 }
+ one_vector_p == true. */
+ for (i = 0; i < d->nelt / 2; i += 1)
+ {
+ idx = d->perm[i];
+ remapped[i] = idx;
+ remapped[i + d->nelt / 2] = idx;
+ }
+ /* Selector after: { 1, 1, 3, 3, 1, 1, 3, 3 }. */
+ }
+ else if (loongarch_is_odd_extraction (d)
+ || loongarch_is_even_extraction (d))
+ {
+ /* Odd extraction selector sample: E_V4DImode, { 1, 3, 5, 7 }
+ Selector after: { 1, 3, 1, 3 }.
+ Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 }
+ Selector after: { 0, 2, 0, 2 }. */
+ for (i = 0; i < d->nelt / 2; i += 1)
+ {
+ idx = d->perm[i];
+ remapped[i] = idx;
+ remapped[i + d->nelt / 2] = idx;
+ }
+ /* Additional insn is required for correct result. See codes below. */
+ extract_ev_od = true;
+ }
+ else if (loongarch_is_extraction_permutation (d))
+ {
+ /* Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 }. */
+ if (d->perm[0] == 0)
+ {
+ for (i = 0; i < d->nelt / 2; i += 1)
+ {
+ remapped[i] = i;
+ remapped[i + d->nelt / 2] = i;
+ }
+ }
+ else
+ {
+ /* { 8, 9, 10, 11, 12, 13, 14, 15 }. */
+ for (i = 0; i < d->nelt / 2; i += 1)
+ {
+ idx = i + d->nelt / 2;
+ remapped[i] = idx;
+ remapped[i + d->nelt / 2] = idx;
+ }
+ }
+ /* Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 }
+ { 8, 9, 10, 11, 8, 9, 10, 11 } */
+ }
+ else if (loongarch_is_center_extraction (d))
+ {
+ /* sample: E_V4DImode, { 2, 3, 4, 5 }
+ In this condition, we can just copy high 128bit of op0 and low 128bit
+ of op1 to the target register by using xvpermi.q insn. */
+ if (!d->testing_p)
+ {
+ emit_move_insn (d->target, d->op1);
+ switch (d->vmode)
+ {
+ case E_V4DImode:
+ emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target,
+ d->op0, GEN_INT (0x21)));
+ break;
+ case E_V4DFmode:
+ emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target,
+ d->op0, GEN_INT (0x21)));
+ break;
+ case E_V8SImode:
+ emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target,
+ d->op0, GEN_INT (0x21)));
+ break;
+ case E_V8SFmode:
+ emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target,
+ d->op0, GEN_INT (0x21)));
+ break;
+ case E_V16HImode:
+ emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target,
+ d->op0, GEN_INT (0x21)));
+ break;
+ case E_V32QImode:
+ emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target,
+ d->op0, GEN_INT (0x21)));
+ break;
+ default:
+ break;
+ }
+ }
+ ok = true;
+ /* Finish the funtion directly. */
+ goto expand_perm_const_2_end;
+ }
+ else if (loongarch_is_reversing_permutation (d))
+ {
+ /* Selector sample: E_V8SImode, { 7, 6, 5, 4, 3, 2, 1, 0 }
+ one_vector_p == true */
+ idx = d->nelt / 2 - 1;
+ for (i = 0; i < d->nelt / 2; i += 1)
+ {
+ remapped[i] = idx;
+ remapped[i + d->nelt / 2] = idx;
+ idx -= 1;
+ }
+ /* Selector after: { 3, 2, 1, 0, 3, 2, 1, 0 }
+ Additional insn will be generated to swap hi and lo 128bit of target
+ register. */
+ reverse_hi_lo = true;
+ }
+ else if (loongarch_is_di_misalign_extract (d)
+ || loongarch_is_si_misalign_extract (d))
+ {
+ /* Selector Sample:
+ DI misalign: E_V4DImode, { 1, 2, 3, 4 }
+ SI misalign: E_V8SImode, { 1, 2, 3, 4, 5, 6, 7, 8 } */
+ if (!d->testing_p)
+ {
+ /* Copy original op0/op1 value to new temp register.
+ In some cases, operand register may be used in multiple place, so
+ we need new regiter instead modify original one, to avoid runtime
+ crashing or wrong value after execution. */
+ use_alt_op = true;
+ op1_alt = gen_reg_rtx (d->vmode);
+ emit_move_insn (op1_alt, d->op1);
+
+ /* Adjust op1 for selecting correct value in high 128bit of target
+ register.
+ op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }. */
+ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+ conv_op0, GEN_INT (0x21)));
+
+ for (i = 0; i < d->nelt / 2; i += 1)
+ {
+ remapped[i] = d->perm[i];
+ remapped[i + d->nelt / 2] = d->perm[i];
+ }
+ /* Selector after:
+ DI misalign: { 1, 2, 1, 2 }
+ SI misalign: { 1, 2, 3, 4, 1, 2, 3, 4 } */
+ }
+ }
+ else if (loongarch_is_lasx_lowpart_interleave (d))
+ {
+ /* Elements from op0's low 18bit and op1's 128bit are inserted into
+ target register alternately.
+ sample: E_V4DImode, { 0, 4, 1, 5 } */
+ if (!d->testing_p)
+ {
+ /* Prepare temp register instead of modify original op. */
+ use_alt_op = true;
+ op1_alt = gen_reg_rtx (d->vmode);
+ op0_alt = gen_reg_rtx (d->vmode);
+ emit_move_insn (op1_alt, d->op1);
+ emit_move_insn (op0_alt, d->op0);
+
+ /* Generate subreg for fitting into insn gen function. */
+ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+
+ /* Adjust op value in temp register.
+ op0 = {0,1,2,3}, op1 = {4,5,0,1} */
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+ conv_op0, GEN_INT (0x02)));
+ /* op0 = {0,1,4,5}, op1 = {4,5,0,1} */
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0,
+ conv_op1, GEN_INT (0x01)));
+
+ /* Remap indices in selector based on the location of index inside
+ selector, and vector element numbers in current vector mode. */
+
+ /* Filling low 128bit of new selector. */
+ for (i = 0; i < d->nelt / 2; i += 1)
+ {
+ /* value in odd-indexed slot of low 128bit part of selector
+ vector. */
+ remapped[i] = i % 2 != 0 ? d->perm[i] - d->nelt / 2 : d->perm[i];
+ }
+ /* Then filling the high 128bit. */
+ for (i = d->nelt / 2; i < d->nelt; i += 1)
+ {
+ /* value in even-indexed slot of high 128bit part of
+ selector vector. */
+ remapped[i] = i % 2 == 0
+ ? d->perm[i] + (d->nelt / 2) * 3 : d->perm[i];
+ }
+ }
+ }
+ else if (loongarch_is_lasx_lowpart_interleave_2 (d))
+ {
+ /* Special lowpart interleave case in V32QI vector mode. It does the same
+ thing as we can see in if branch that above this line.
+ Selector sample: E_V32QImode,
+ {0, 1, 2, 3, 4, 5, 6, 7, 32, 33, 34, 35, 36, 37, 38, 39, 8,
+ 9, 10, 11, 12, 13, 14, 15, 40, 41, 42, 43, 44, 45, 46, 47} */
+ if (!d->testing_p)
+ {
+ /* Solution for this case in very simple - covert op into V4DI mode,
+ and do same thing as previous if branch. */
+ op1_alt = gen_reg_rtx (d->vmode);
+ op0_alt = gen_reg_rtx (d->vmode);
+ emit_move_insn (op1_alt, d->op1);
+ emit_move_insn (op0_alt, d->op0);
+
+ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+ rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
+
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+ conv_op0, GEN_INT (0x02)));
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0,
+ conv_op1, GEN_INT (0x01)));
+ remapped[0] = 0;
+ remapped[1] = 4;
+ remapped[2] = 1;
+ remapped[3] = 5;
+
+ for (i = 0; i < d->nelt; i += 1)
+ {
+ rperm[i] = GEN_INT (remapped[i]);
+ }
+
+ sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (4, rperm));
+ sel = force_reg (E_V4DImode, sel);
+ emit_insn (gen_lasx_xvshuf_d (conv_target, sel,
+ conv_op1, conv_op0));
+ }
+
+ ok = true;
+ goto expand_perm_const_2_end;
+ }
+ else if (loongarch_is_lasx_lowpart_extract (d))
+ {
+ /* Copy op0's low 128bit to target's low 128bit, and copy op1's low
+ 128bit to target's high 128bit.
+ Selector sample: E_V4DImode, { 0, 1, 4 ,5 } */
+ if (!d->testing_p)
+ {
+ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
+ rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
+
+ /* We can achieve the expectation by using sinple xvpermi.q insn. */
+ emit_move_insn (conv_target, conv_op1);
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_target, conv_target,
+ conv_op0, GEN_INT (0x20)));
+ }
+
+ ok = true;
+ goto expand_perm_const_2_end;
+ }
+ else if (loongarch_is_lasx_highpart_interleave (d))
+ {
+ /* Similar to lowpart interleave, elements from op0's high 128bit and
+ op1's high 128bit are inserted into target regiter alternately.
+ Selector sample: E_V8SImode, { 4, 12, 5, 13, 6, 14, 7, 15 } */
+ if (!d->testing_p)
+ {
+ /* Prepare temp op register. */
+ use_alt_op = true;
+ op1_alt = gen_reg_rtx (d->vmode);
+ op0_alt = gen_reg_rtx (d->vmode);
+ emit_move_insn (op1_alt, d->op1);
+ emit_move_insn (op0_alt, d->op0);
+
+ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+ /* Adjust op value in temp regiter.
+ op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } */
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+ conv_op0, GEN_INT (0x13)));
+ /* op0 = { 2, 3, 6, 7 }, op1 = { 6, 7, 2, 3 } */
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0,
+ conv_op1, GEN_INT (0x01)));
+ /* Remap indices in selector based on the location of index inside
+ selector, and vector element numbers in current vector mode. */
+
+ /* Filling low 128bit of new selector. */
+ for (i = 0; i < d->nelt / 2; i += 1)
+ {
+ /* value in even-indexed slot of low 128bit part of selector
+ vector. */
+ remapped[i] = i % 2 == 0 ? d->perm[i] - d->nelt / 2 : d->perm[i];
+ }
+ /* Then filling the high 128bit. */
+ for (i = d->nelt / 2; i < d->nelt; i += 1)
+ {
+ /* value in odd-indexed slot of high 128bit part of selector
+ vector. */
+ remapped[i] = i % 2 != 0
+ ? d->perm[i] - (d->nelt / 2) * 3 : d->perm[i];
+ }
+ }
+ }
+ else if (loongarch_is_lasx_highpart_interleave_2 (d))
+ {
+ /* Special highpart interleave case in V32QI vector mode. It does the
+ same thing as the normal version above.
+ Selector sample: E_V32QImode,
+ {16, 17, 18, 19, 20, 21, 22, 23, 48, 49, 50, 51, 52, 53, 54, 55,
+ 24, 25, 26, 27, 28, 29, 30, 31, 56, 57, 58, 59, 60, 61, 62, 63}
+ */
+ if (!d->testing_p)
+ {
+ /* Convert op into V4DImode and do the things. */
+ op1_alt = gen_reg_rtx (d->vmode);
+ op0_alt = gen_reg_rtx (d->vmode);
+ emit_move_insn (op1_alt, d->op1);
+ emit_move_insn (op0_alt, d->op0);
+
+ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+ rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
+
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
+ conv_op0, GEN_INT (0x13)));
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0,
+ conv_op1, GEN_INT (0x01)));
+ remapped[0] = 2;
+ remapped[1] = 6;
+ remapped[2] = 3;
+ remapped[3] = 7;
+
+ for (i = 0; i < d->nelt; i += 1)
+ {
+ rperm[i] = GEN_INT (remapped[i]);
+ }
+
+ sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (4, rperm));
+ sel = force_reg (E_V4DImode, sel);
+ emit_insn (gen_lasx_xvshuf_d (conv_target, sel,
+ conv_op1, conv_op0));
+ }
+
+ ok = true;
+ goto expand_perm_const_2_end;
+ }
+ else if (loongarch_is_elem_duplicate (d))
+ {
+ /* Brocast single element (from op0 or op1) to all slot of target
+ register.
+ Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */
+ if (!d->testing_p)
+ {
+ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
+ rtx temp_reg = gen_reg_rtx (d->vmode);
+ rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0);
+
+ emit_move_insn (temp_reg, d->op0);
+
+ idx = d->perm[0];
+ /* We will use xvrepl128vei.* insn to achieve the result, but we need
+ to make the high/low 128bit has the same contents that contain the
+ value that we need to broardcast, because xvrepl128vei does the
+ broardcast job from every 128bit of source register to
+ corresponded part of target register! (A deep sigh.) */
+ if (/*idx >= 0 &&*/ idx < d->nelt / 2)
+ {
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
+ conv_op0, GEN_INT (0x0)));
+ }
+ else if (idx >= d->nelt / 2 && idx < d->nelt)
+ {
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
+ conv_op0, GEN_INT (0x11)));
+ idx -= d->nelt / 2;
+ }
+ else if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2))
+ {
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
+ conv_op1, GEN_INT (0x0)));
+ }
+ else if (idx >= (d->nelt + d->nelt / 2) && idx < d->nelt * 2)
+ {
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
+ conv_op1, GEN_INT (0x11)));
+ idx -= d->nelt / 2;
+ }
+
+ /* Then we can finally generate this insn. */
+ switch (d->vmode)
+ {
+ case E_V4DImode:
+ emit_insn (gen_lasx_xvrepl128vei_d (d->target, temp_reg,
+ GEN_INT (idx)));
+ break;
+ case E_V4DFmode:
+ emit_insn (gen_lasx_xvrepl128vei_d_f (d->target, temp_reg,
+ GEN_INT (idx)));
+ break;
+ case E_V8SImode:
+ emit_insn (gen_lasx_xvrepl128vei_w (d->target, temp_reg,
+ GEN_INT (idx)));
+ break;
+ case E_V8SFmode:
+ emit_insn (gen_lasx_xvrepl128vei_w_f (d->target, temp_reg,
+ GEN_INT (idx)));
+ break;
+ case E_V16HImode:
+ emit_insn (gen_lasx_xvrepl128vei_h (d->target, temp_reg,
+ GEN_INT (idx)));
+ break;
+ case E_V32QImode:
+ emit_insn (gen_lasx_xvrepl128vei_b (d->target, temp_reg,
+ GEN_INT (idx)));
+ break;
+ default:
+ gcc_unreachable ();
+ break;
+ }
+
+ /* finish func directly. */
+ ok = true;
+ goto expand_perm_const_2_end;
+ }
+ }
+ else if (loongarch_is_op_reverse_perm (d))
+ {
+ /* reverse high 128bit and low 128bit in op0.
+ Selector sample: E_V4DFmode, { 2, 3, 0, 1 }
+ Use xvpermi.q for doing this job. */
+ if (!d->testing_p)
+ {
+ if (d->vmode == E_V4DImode)
+ {
+ emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->op0,
+ GEN_INT (0x01)));
+ }
+ else if (d->vmode == E_V4DFmode)
+ {
+ emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->op0,
+ GEN_INT (0x01)));
+ }
+ else
+ {
+ gcc_unreachable ();
+ }
+ }
+
+ ok = true;
+ goto expand_perm_const_2_end;
+ }
+ else if (loongarch_is_single_op_perm (d))
+ {
+ /* Permutation that only select elements from op0. */
+ if (!d->testing_p)
+ {
+ /* Prepare temp register instead of modify original op. */
+ use_alt_op = true;
+ op0_alt = gen_reg_rtx (d->vmode);
+ op1_alt = gen_reg_rtx (d->vmode);
+
+ emit_move_insn (op0_alt, d->op0);
+ emit_move_insn (op1_alt, d->op1);
+
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
+ rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+ rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+
+ /* Duplicate op0's low 128bit in op0, then duplicate high 128bit
+ in op1. After this, xvshuf.* insn's selector argument can
+ access all elements we need for correct permutation result. */
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op0,
+ GEN_INT (0x00)));
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0,
+ GEN_INT (0x11)));
+
+ /* In this case, there's no need to remap selector's indices. */
+ for (i = 0; i < d->nelt; i += 1)
+ {
+ remapped[i] = d->perm[i];
+ }
+ }
+ }
+ else if (loongarch_is_divisible_perm (d))
+ {
+ /* Divisible perm:
+ Low 128bit of selector only selects elements of op0,
+ and high 128bit of selector only selects elements of op1. */
+
+ if (!d->testing_p)
+ {
+ /* Prepare temp register instead of modify original op. */
+ use_alt_op = true;
+ op0_alt = gen_reg_rtx (d->vmode);
+ op1_alt = gen_reg_rtx (d->vmode);
+
+ emit_move_insn (op0_alt, d->op0);
+ emit_move_insn (op1_alt, d->op1);
+
+ rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
+ rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
+ rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
+ rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
+
+ /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure
+ that selector's low 128bit can access all op0's elements, and
+ selector's high 128bit can access all op1's elements. */
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op1,
+ GEN_INT (0x02)));
+ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0,
+ GEN_INT (0x31)));
+
+ /* No need to modify indices. */
+ for (i = 0; i < d->nelt;i += 1)
+ {
+ remapped[i] = d->perm[i];
+ }
+ }
+ }
+ else if (loongarch_is_triple_stride_extract (d))
+ {
+ /* Selector sample: E_V4DFmode, { 1, 4, 7, 0 }. */
+ if (!d->testing_p)
+ {
+ /* Resolve it with brute force modification. */
+ remapped[0] = 1;
+ remapped[1] = 2;
+ remapped[2] = 3;
+ remapped[3] = 0;
+ }
+ }
+ else
+ {
+ /* When all of the detections above are failed, we will try last
+ strategy.
+ The for loop tries to detect following rules based on indices' value,
+ its position inside of selector vector ,and strange behavior of
+ xvshuf.* insn; Then we take corresponding action. (Replace with new
+ value, or give up whole permutation expansion.) */
+ for (i = 0; i < d->nelt; i += 1)
+ {
+ /* % (2 * d->nelt) */
+ idx = d->perm[i];
+
+ /* if index is located in low 128bit of selector vector. */
+ if (i < d->nelt / 2)
+ {
+ /* Fail case 1: index tries to reach element that located in op0's
+ high 128bit. */
+ if (idx >= d->nelt / 2 && idx < d->nelt)
+ {
+ goto expand_perm_const_2_end;
+ }
+ /* Fail case 2: index tries to reach element that located in
+ op1's high 128bit. */
+ if (idx >= (d->nelt + d->nelt / 2))
+ {
+ goto expand_perm_const_2_end;
+ }
+
+ /* Success case: index tries to reach elements that located in
+ op1's low 128bit. Apply - (nelt / 2) offset to original
+ value. */
+ if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2))
+ {
+ idx -= d->nelt / 2;
+ }
+ }
+ /* if index is located in high 128bit of selector vector. */
+ else
+ {
+ /* Fail case 1: index tries to reach element that located in
+ op1's low 128bit. */
+ if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2))
+ {
+ goto expand_perm_const_2_end;
+ }
+ /* Fail case 2: index tries to reach element that located in
+ op0's low 128bit. */
+ if (idx < (d->nelt / 2))
+ {
+ goto expand_perm_const_2_end;
+ }
+ /* Success case: index tries to reach element that located in
+ op0's high 128bit. */
+ if (idx >= d->nelt / 2 && idx < d->nelt)
+ {
+ idx -= d->nelt / 2;
+ }
+ }
+ /* No need to process other case that we did not mentioned. */
+
+ /* Assign with original or processed value. */
+ remapped[i] = idx;
+ }
+ }
+
+ ok = true;
+ /* If testing_p is true, compiler is trying to figure out that backend can
+ handle this permutation, but doesn't want to generate actual insn. So
+ if true, exit directly. */
+ if (d->testing_p)
+ {
+ goto expand_perm_const_2_end;
+ }
+
+ /* Convert remapped selector array to RTL array. */
+ for (i = 0; i < d->nelt; i += 1)
+ {
+ rperm[i] = GEN_INT (remapped[i]);
+ }
+
+ /* Copy selector vector from memory to vector regiter for later insn gen
+ function.
+ If vector's element in floating point value, we cannot fit selector
+ argument into insn gen function directly, because of the insn template
+ definition. As a solution, generate a integral mode subreg of target,
+ then copy selector vector (that is in integral mode) to this subreg. */
+ switch (d->vmode)
+ {
+ case E_V4DFmode:
+ sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm));
+ tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
+ emit_move_insn (tmp, sel);
+ break;
+ case E_V8SFmode:
+ sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm));
+ tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0);
+ emit_move_insn (tmp, sel);
+ break;
+ default:
+ sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
+ emit_move_insn (d->target, sel);
+ break;
+ }
+
+ target = d->target;
+ /* If temp op registers are requested in previous if branch, then use temp
+ register intead of original one. */
+ if (use_alt_op)
+ {
+ op0 = op0_alt != NULL_RTX ? op0_alt : d->op0;
+ op1 = op1_alt != NULL_RTX ? op1_alt : d->op1;
+ }
+ else
+ {
+ op0 = d->op0;
+ op1 = d->one_vector_p ? d->op0 : d->op1;
+ }
+
+ /* We FINALLY can generate xvshuf.* insn. */
+ switch (d->vmode)
+ {
+ case E_V4DFmode:
+ emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0));
+ break;
+ case E_V4DImode:
+ emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0));
+ break;
+ case E_V8SFmode:
+ emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0));
+ break;
+ case E_V8SImode:
+ emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0));
+ break;
+ case E_V16HImode:
+ emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0));
+ break;
+ case E_V32QImode:
+ emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target));
+ break;
+ default:
+ gcc_unreachable ();
+ break;
+ }
+
+ /* Extra insn for swapping the hi/lo 128bit of target vector register. */
+ if (reverse_hi_lo)
+ {
+ switch (d->vmode)
+ {
+ case E_V4DFmode:
+ emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target,
+ d->target, GEN_INT (0x1)));
+ break;
+ case E_V4DImode:
+ emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target,
+ d->target, GEN_INT (0x1)));
+ break;
+ case E_V8SFmode:
+ emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target,
+ d->target, GEN_INT (0x1)));
+ break;
+ case E_V8SImode:
+ emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target,
+ d->target, GEN_INT (0x1)));
+ break;
+ case E_V16HImode:
+ emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target,
+ d->target, GEN_INT (0x1)));
+ break;
+ case E_V32QImode:
+ emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target,
+ d->target, GEN_INT (0x1)));
+ break;
+ default:
+ break;
+ }
+ }
+ /* Extra insn required by odd/even extraction. Swapping the second and third
+ 64bit in target vector register. */
+ else if (extract_ev_od)
+ {
+ rtx converted = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
+ emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted,
+ GEN_INT (0xD8)));
+ }
+
+expand_perm_const_2_end:
+ return ok;
}
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
@@ -7813,6 +9728,12 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
break;
}
+ // Do rounding for selector to avoid vshuf undefined behavior.
+ for (i = 0; i < d.nelt; i += 1)
+ {
+ d.perm[i] %= (d.nelt * 2);
+ }
+
if (d.testing_p)
{
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
@@ -7865,7 +9786,7 @@ loongarch_cpu_sched_reassociation_width (struct loongarch_target *target,
case CPU_LOONGARCH64:
case CPU_LA464:
/* Vector part. */
- if (LSX_SUPPORTED_MODE_P (mode))
+ if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
{
/* Integer vector instructions execute in FP unit.
The width of integer/float-point vector instructions is 3. */
@@ -7915,6 +9836,44 @@ loongarch_expand_vector_extract (rtx target, rtx vec, int elt)
case E_V16QImode:
break;
+ case E_V32QImode:
+ if (ISA_HAS_LASX)
+ {
+ if (elt >= 16)
+ {
+ tmp = gen_reg_rtx (V32QImode);
+ emit_insn (gen_lasx_xvpermi_d_v32qi (tmp, vec, GEN_INT (0xe)));
+ loongarch_expand_vector_extract (target,
+ gen_lowpart (V16QImode, tmp),
+ elt & 15);
+ }
+ else
+ loongarch_expand_vector_extract (target,
+ gen_lowpart (V16QImode, vec),
+ elt & 15);
+ return;
+ }
+ break;
+
+ case E_V16HImode:
+ if (ISA_HAS_LASX)
+ {
+ if (elt >= 8)
+ {
+ tmp = gen_reg_rtx (V16HImode);
+ emit_insn (gen_lasx_xvpermi_d_v16hi (tmp, vec, GEN_INT (0xe)));
+ loongarch_expand_vector_extract (target,
+ gen_lowpart (V8HImode, tmp),
+ elt & 7);
+ }
+ else
+ loongarch_expand_vector_extract (target,
+ gen_lowpart (V8HImode, vec),
+ elt & 7);
+ return;
+ }
+ break;
+
default:
break;
}
@@ -7953,6 +9912,31 @@ emit_reduc_half (rtx dest, rtx src, int i)
case E_V2DFmode:
tem = gen_lsx_vbsrl_d_f (dest, src, GEN_INT (8));
break;
+ case E_V8SFmode:
+ if (i == 256)
+ tem = gen_lasx_xvpermi_d_v8sf (dest, src, GEN_INT (0xe));
+ else
+ tem = gen_lasx_xvshuf4i_w_f (dest, src,
+ GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
+ break;
+ case E_V4DFmode:
+ if (i == 256)
+ tem = gen_lasx_xvpermi_d_v4df (dest, src, GEN_INT (0xe));
+ else
+ tem = gen_lasx_xvpermi_d_v4df (dest, src, const1_rtx);
+ break;
+ case E_V32QImode:
+ case E_V16HImode:
+ case E_V8SImode:
+ case E_V4DImode:
+ d = gen_reg_rtx (V4DImode);
+ if (i == 256)
+ tem = gen_lasx_xvpermi_d_v4di (d, gen_lowpart (V4DImode, src),
+ GEN_INT (0xe));
+ else
+ tem = gen_lasx_xvbsrl_d (d, gen_lowpart (V4DImode, src),
+ GEN_INT (i/16));
+ break;
case E_V16QImode:
case E_V8HImode:
case E_V4SImode:
@@ -8000,10 +9984,57 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
{
machine_mode imode = GET_MODE (operands[1]);
rtx (*unpack) (rtx, rtx, rtx);
+ rtx (*extend) (rtx, rtx);
rtx (*cmpFunc) (rtx, rtx, rtx);
+ rtx (*swap_hi_lo) (rtx, rtx, rtx, rtx);
rtx tmp, dest;
- if (ISA_HAS_LSX)
+ if (ISA_HAS_LASX && GET_MODE_SIZE (imode) == 32)
+ {
+ switch (imode)
+ {
+ case E_V8SImode:
+ if (unsigned_p)
+ extend = gen_lasx_vext2xv_du_wu;
+ else
+ extend = gen_lasx_vext2xv_d_w;
+ swap_hi_lo = gen_lasx_xvpermi_q_v8si;
+ break;
+
+ case E_V16HImode:
+ if (unsigned_p)
+ extend = gen_lasx_vext2xv_wu_hu;
+ else
+ extend = gen_lasx_vext2xv_w_h;
+ swap_hi_lo = gen_lasx_xvpermi_q_v16hi;
+ break;
+
+ case E_V32QImode:
+ if (unsigned_p)
+ extend = gen_lasx_vext2xv_hu_bu;
+ else
+ extend = gen_lasx_vext2xv_h_b;
+ swap_hi_lo = gen_lasx_xvpermi_q_v32qi;
+ break;
+
+ default:
+ gcc_unreachable ();
+ break;
+ }
+
+ if (high_p)
+ {
+ tmp = gen_reg_rtx (imode);
+ emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx));
+ emit_insn (extend (operands[0], tmp));
+ return;
+ }
+
+ emit_insn (extend (operands[0], operands[1]));
+ return;
+
+ }
+ else if (ISA_HAS_LSX)
{
switch (imode)
{
@@ -8104,8 +10135,17 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val)
return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nunits, elts));
}
+
/* Expand a vector initialization. */
+void
+loongarch_expand_vector_group_init (rtx target, rtx vals)
+{
+ rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
+ emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops[0],
+ ops[1])));
+}
+
void
loongarch_expand_vector_init (rtx target, rtx vals)
{
@@ -8125,6 +10165,285 @@ loongarch_expand_vector_init (rtx target, rtx vals)
all_same = false;
}
+ if (ISA_HAS_LASX && GET_MODE_SIZE (vmode) == 32)
+ {
+ if (all_same)
+ {
+ rtx same = XVECEXP (vals, 0, 0);
+ rtx temp, temp2;
+
+ if (CONST_INT_P (same) && nvar == 0
+ && loongarch_signed_immediate_p (INTVAL (same), 10, 0))
+ {
+ switch (vmode)
+ {
+ case E_V32QImode:
+ case E_V16HImode:
+ case E_V8SImode:
+ case E_V4DImode:
+ temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0));
+ emit_move_insn (target, temp);
+ return;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ temp = gen_reg_rtx (imode);
+ if (imode == GET_MODE (same))
+ temp2 = same;
+ else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
+ {
+ if (GET_CODE (same) == MEM)
+ {
+ rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
+ loongarch_emit_move (reg_tmp, same);
+ temp2 = simplify_gen_subreg (imode, reg_tmp,
+ GET_MODE (reg_tmp), 0);
+ }
+ else
+ temp2 = simplify_gen_subreg (imode, same,
+ GET_MODE (same), 0);
+ }
+ else
+ {
+ if (GET_CODE (same) == MEM)
+ {
+ rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
+ loongarch_emit_move (reg_tmp, same);
+ temp2 = lowpart_subreg (imode, reg_tmp,
+ GET_MODE (reg_tmp));
+ }
+ else
+ temp2 = lowpart_subreg (imode, same, GET_MODE (same));
+ }
+ emit_move_insn (temp, temp2);
+
+ switch (vmode)
+ {
+ case E_V32QImode:
+ case E_V16HImode:
+ case E_V8SImode:
+ case E_V4DImode:
+ loongarch_emit_move (target,
+ gen_rtx_VEC_DUPLICATE (vmode, temp));
+ break;
+
+ case E_V8SFmode:
+ emit_insn (gen_lasx_xvreplve0_w_f_scalar (target, temp));
+ break;
+
+ case E_V4DFmode:
+ emit_insn (gen_lasx_xvreplve0_d_f_scalar (target, temp));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ rtvec vec = shallow_copy_rtvec (XVEC (vals, 0));
+
+ for (i = 0; i < nelt; ++i)
+ RTVEC_ELT (vec, i) = CONST0_RTX (imode);
+
+ emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec));
+
+ machine_mode half_mode = VOIDmode;
+ rtx target_hi, target_lo;
+
+ switch (vmode)
+ {
+ case E_V32QImode:
+ half_mode=E_V16QImode;
+ target_hi = gen_reg_rtx (half_mode);
+ target_lo = gen_reg_rtx (half_mode);
+ for (i = 0; i < nelt/2; ++i)
+ {
+ rtx temp_hi = gen_reg_rtx (imode);
+ rtx temp_lo = gen_reg_rtx (imode);
+ emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2));
+ emit_move_insn (temp_lo, XVECEXP (vals, 0, i));
+ if (i == 0)
+ {
+ emit_insn (gen_lsx_vreplvei_b_scalar (target_hi,
+ temp_hi));
+ emit_insn (gen_lsx_vreplvei_b_scalar (target_lo,
+ temp_lo));
+ }
+ else
+ {
+ emit_insn (gen_vec_setv16qi (target_hi, temp_hi,
+ GEN_INT (i)));
+ emit_insn (gen_vec_setv16qi (target_lo, temp_lo,
+ GEN_INT (i)));
+ }
+ }
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_CONCAT (vmode, target_hi,
+ target_lo)));
+ break;
+
+ case E_V16HImode:
+ half_mode=E_V8HImode;
+ target_hi = gen_reg_rtx (half_mode);
+ target_lo = gen_reg_rtx (half_mode);
+ for (i = 0; i < nelt/2; ++i)
+ {
+ rtx temp_hi = gen_reg_rtx (imode);
+ rtx temp_lo = gen_reg_rtx (imode);
+ emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2));
+ emit_move_insn (temp_lo, XVECEXP (vals, 0, i));
+ if (i == 0)
+ {
+ emit_insn (gen_lsx_vreplvei_h_scalar (target_hi,
+ temp_hi));
+ emit_insn (gen_lsx_vreplvei_h_scalar (target_lo,
+ temp_lo));
+ }
+ else
+ {
+ emit_insn (gen_vec_setv8hi (target_hi, temp_hi,
+ GEN_INT (i)));
+ emit_insn (gen_vec_setv8hi (target_lo, temp_lo,
+ GEN_INT (i)));
+ }
+ }
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_CONCAT (vmode, target_hi,
+ target_lo)));
+ break;
+
+ case E_V8SImode:
+ half_mode=V4SImode;
+ target_hi = gen_reg_rtx (half_mode);
+ target_lo = gen_reg_rtx (half_mode);
+ for (i = 0; i < nelt/2; ++i)
+ {
+ rtx temp_hi = gen_reg_rtx (imode);
+ rtx temp_lo = gen_reg_rtx (imode);
+ emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2));
+ emit_move_insn (temp_lo, XVECEXP (vals, 0, i));
+ if (i == 0)
+ {
+ emit_insn (gen_lsx_vreplvei_w_scalar (target_hi,
+ temp_hi));
+ emit_insn (gen_lsx_vreplvei_w_scalar (target_lo,
+ temp_lo));
+ }
+ else
+ {
+ emit_insn (gen_vec_setv4si (target_hi, temp_hi,
+ GEN_INT (i)));
+ emit_insn (gen_vec_setv4si (target_lo, temp_lo,
+ GEN_INT (i)));
+ }
+ }
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_CONCAT (vmode, target_hi,
+ target_lo)));
+ break;
+
+ case E_V4DImode:
+ half_mode=E_V2DImode;
+ target_hi = gen_reg_rtx (half_mode);
+ target_lo = gen_reg_rtx (half_mode);
+ for (i = 0; i < nelt/2; ++i)
+ {
+ rtx temp_hi = gen_reg_rtx (imode);
+ rtx temp_lo = gen_reg_rtx (imode);
+ emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2));
+ emit_move_insn (temp_lo, XVECEXP (vals, 0, i));
+ if (i == 0)
+ {
+ emit_insn (gen_lsx_vreplvei_d_scalar (target_hi,
+ temp_hi));
+ emit_insn (gen_lsx_vreplvei_d_scalar (target_lo,
+ temp_lo));
+ }
+ else
+ {
+ emit_insn (gen_vec_setv2di (target_hi, temp_hi,
+ GEN_INT (i)));
+ emit_insn (gen_vec_setv2di (target_lo, temp_lo,
+ GEN_INT (i)));
+ }
+ }
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_CONCAT (vmode, target_hi,
+ target_lo)));
+ break;
+
+ case E_V8SFmode:
+ half_mode=E_V4SFmode;
+ target_hi = gen_reg_rtx (half_mode);
+ target_lo = gen_reg_rtx (half_mode);
+ for (i = 0; i < nelt/2; ++i)
+ {
+ rtx temp_hi = gen_reg_rtx (imode);
+ rtx temp_lo = gen_reg_rtx (imode);
+ emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2));
+ emit_move_insn (temp_lo, XVECEXP (vals, 0, i));
+ if (i == 0)
+ {
+ emit_insn (gen_lsx_vreplvei_w_f_scalar (target_hi,
+ temp_hi));
+ emit_insn (gen_lsx_vreplvei_w_f_scalar (target_lo,
+ temp_lo));
+ }
+ else
+ {
+ emit_insn (gen_vec_setv4sf (target_hi, temp_hi,
+ GEN_INT (i)));
+ emit_insn (gen_vec_setv4sf (target_lo, temp_lo,
+ GEN_INT (i)));
+ }
+ }
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_CONCAT (vmode, target_hi,
+ target_lo)));
+ break;
+
+ case E_V4DFmode:
+ half_mode=E_V2DFmode;
+ target_hi = gen_reg_rtx (half_mode);
+ target_lo = gen_reg_rtx (half_mode);
+ for (i = 0; i < nelt/2; ++i)
+ {
+ rtx temp_hi = gen_reg_rtx (imode);
+ rtx temp_lo = gen_reg_rtx (imode);
+ emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2));
+ emit_move_insn (temp_lo, XVECEXP (vals, 0, i));
+ if (i == 0)
+ {
+ emit_insn (gen_lsx_vreplvei_d_f_scalar (target_hi,
+ temp_hi));
+ emit_insn (gen_lsx_vreplvei_d_f_scalar (target_lo,
+ temp_lo));
+ }
+ else
+ {
+ emit_insn (gen_vec_setv2df (target_hi, temp_hi,
+ GEN_INT (i)));
+ emit_insn (gen_vec_setv2df (target_lo, temp_lo,
+ GEN_INT (i)));
+ }
+ }
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_CONCAT (vmode, target_hi,
+ target_lo)));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ }
+ return;
+ }
+
if (ISA_HAS_LSX)
{
if (all_same)
@@ -8372,6 +10691,38 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
}
break;
+ case E_V8SFmode:
+ case E_V4DFmode:
+ switch (cond)
+ {
+ case UNORDERED:
+ case ORDERED:
+ case EQ:
+ case NE:
+ case UNEQ:
+ case UNLE:
+ case UNLT:
+ break;
+ case LTGT: cond = NE; break;
+ case UNGE: cond = UNLE; std::swap (op0, op1); break;
+ case UNGT: cond = UNLT; std::swap (op0, op1); break;
+ case LE: unspec = UNSPEC_LASX_XVFCMP_SLE; break;
+ case LT: unspec = UNSPEC_LASX_XVFCMP_SLT; break;
+ case GE: unspec = UNSPEC_LASX_XVFCMP_SLE; std::swap (op0, op1); break;
+ case GT: unspec = UNSPEC_LASX_XVFCMP_SLT; std::swap (op0, op1); break;
+ default:
+ gcc_unreachable ();
+ }
+ if (unspec < 0)
+ loongarch_emit_binary (cond, dest, op0, op1);
+ else
+ {
+ rtx x = gen_rtx_UNSPEC (GET_MODE (dest),
+ gen_rtvec (2, op0, op1), unspec);
+ emit_insn (gen_rtx_SET (dest, x));
+ }
+ break;
+
default:
gcc_unreachable ();
break;
@@ -8709,7 +11060,7 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
int misalignment,
bool is_packed)
{
- if (ISA_HAS_LSX && STRICT_ALIGNMENT)
+ if ((ISA_HAS_LSX || ISA_HAS_LASX) && STRICT_ALIGNMENT)
{
if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
return false;
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index c3ebea2f2..b2295c589 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -186,6 +186,11 @@ along with GCC; see the file COPYING3. If not see
/* Width of a LSX vector register in bits. */
#define BITS_PER_LSX_REG (UNITS_PER_LSX_REG * BITS_PER_UNIT)
+/* Width of a LASX vector register in bytes. */
+#define UNITS_PER_LASX_REG 32
+/* Width of a LASX vector register in bits. */
+#define BITS_PER_LASX_REG (UNITS_PER_LASX_REG * BITS_PER_UNIT)
+
/* For LARCH, width of a floating point register. */
#define UNITS_PER_FPREG (TARGET_DOUBLE_FLOAT ? 8 : 4)
@@ -248,10 +253,11 @@ along with GCC; see the file COPYING3. If not see
#define STRUCTURE_SIZE_BOUNDARY 8
/* There is no point aligning anything to a rounder boundary than
- LONG_DOUBLE_TYPE_SIZE, unless under LSX the bigggest alignment is
- BITS_PER_LSX_REG/.. */
+ LONG_DOUBLE_TYPE_SIZE, unless under LSX/LASX the bigggest alignment is
+ BITS_PER_LSX_REG/BITS_PER_LASX_REG/.. */
#define BIGGEST_ALIGNMENT \
- (ISA_HAS_LSX ? BITS_PER_LSX_REG : LONG_DOUBLE_TYPE_SIZE)
+ (ISA_HAS_LASX? BITS_PER_LASX_REG \
+ : (ISA_HAS_LSX ? BITS_PER_LSX_REG : LONG_DOUBLE_TYPE_SIZE))
/* All accesses must be aligned. */
#define STRICT_ALIGNMENT (TARGET_STRICT_ALIGN)
@@ -391,6 +397,10 @@ along with GCC; see the file COPYING3. If not see
#define LSX_REG_LAST FP_REG_LAST
#define LSX_REG_NUM FP_REG_NUM
+#define LASX_REG_FIRST FP_REG_FIRST
+#define LASX_REG_LAST FP_REG_LAST
+#define LASX_REG_NUM FP_REG_NUM
+
/* The DWARF 2 CFA column which tracks the return address from a
signal handler context. This means that to maintain backwards
compatibility, no hard register can be assigned this column if it
@@ -409,9 +419,12 @@ along with GCC; see the file COPYING3. If not see
((unsigned int) ((int) (REGNO) - FCC_REG_FIRST) < FCC_REG_NUM)
#define LSX_REG_P(REGNO) \
((unsigned int) ((int) (REGNO) - LSX_REG_FIRST) < LSX_REG_NUM)
+#define LASX_REG_P(REGNO) \
+ ((unsigned int) ((int) (REGNO) - LASX_REG_FIRST) < LASX_REG_NUM)
#define FP_REG_RTX_P(X) (REG_P (X) && FP_REG_P (REGNO (X)))
#define LSX_REG_RTX_P(X) (REG_P (X) && LSX_REG_P (REGNO (X)))
+#define LASX_REG_RTX_P(X) (REG_P (X) && LASX_REG_P (REGNO (X)))
/* Select a register mode required for caller save of hard regno REGNO. */
#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
@@ -733,6 +746,13 @@ enum reg_class
&& (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \
|| GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT))
+#define LASX_SUPPORTED_MODE_P(MODE) \
+ (ISA_HAS_LASX \
+ && (GET_MODE_SIZE (MODE) == UNITS_PER_LSX_REG \
+ ||GET_MODE_SIZE (MODE) == UNITS_PER_LASX_REG) \
+ && (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \
+ || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT))
+
/* 1 if N is a possible register number for function argument passing.
We have no FP argument registers when soft-float. */
@@ -985,7 +1005,39 @@ typedef struct {
{ "vr28", 28 + FP_REG_FIRST }, \
{ "vr29", 29 + FP_REG_FIRST }, \
{ "vr30", 30 + FP_REG_FIRST }, \
- { "vr31", 31 + FP_REG_FIRST } \
+ { "vr31", 31 + FP_REG_FIRST }, \
+ { "xr0", 0 + FP_REG_FIRST }, \
+ { "xr1", 1 + FP_REG_FIRST }, \
+ { "xr2", 2 + FP_REG_FIRST }, \
+ { "xr3", 3 + FP_REG_FIRST }, \
+ { "xr4", 4 + FP_REG_FIRST }, \
+ { "xr5", 5 + FP_REG_FIRST }, \
+ { "xr6", 6 + FP_REG_FIRST }, \
+ { "xr7", 7 + FP_REG_FIRST }, \
+ { "xr8", 8 + FP_REG_FIRST }, \
+ { "xr9", 9 + FP_REG_FIRST }, \
+ { "xr10", 10 + FP_REG_FIRST }, \
+ { "xr11", 11 + FP_REG_FIRST }, \
+ { "xr12", 12 + FP_REG_FIRST }, \
+ { "xr13", 13 + FP_REG_FIRST }, \
+ { "xr14", 14 + FP_REG_FIRST }, \
+ { "xr15", 15 + FP_REG_FIRST }, \
+ { "xr16", 16 + FP_REG_FIRST }, \
+ { "xr17", 17 + FP_REG_FIRST }, \
+ { "xr18", 18 + FP_REG_FIRST }, \
+ { "xr19", 19 + FP_REG_FIRST }, \
+ { "xr20", 20 + FP_REG_FIRST }, \
+ { "xr21", 21 + FP_REG_FIRST }, \
+ { "xr22", 22 + FP_REG_FIRST }, \
+ { "xr23", 23 + FP_REG_FIRST }, \
+ { "xr24", 24 + FP_REG_FIRST }, \
+ { "xr25", 25 + FP_REG_FIRST }, \
+ { "xr26", 26 + FP_REG_FIRST }, \
+ { "xr27", 27 + FP_REG_FIRST }, \
+ { "xr28", 28 + FP_REG_FIRST }, \
+ { "xr29", 29 + FP_REG_FIRST }, \
+ { "xr30", 30 + FP_REG_FIRST }, \
+ { "xr31", 31 + FP_REG_FIRST } \
}
/* Globalizing directive for a label. */
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index fb3828262..3dde0ceb1 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -163,7 +163,7 @@
;; Main data type used by the insn
(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FCC,
- V2DI,V4SI,V8HI,V16QI,V2DF,V4SF"
+ V2DI,V4SI,V8HI,V16QI,V2DF,V4SF,V4DI,V8SI,V16HI,V32QI,V4DF,V8SF"
(const_string "unknown"))
;; True if the main data type is twice the size of a word.
@@ -422,12 +422,14 @@
;; floating-point mode or vector mode.
(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF") (V4SF "SF")
(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
- (V2DF "DF")])
+ (V2DF "DF")(V8SF "SF")(V32QI "QI")(V16HI "HI")(V8SI "SI")(V4DI "DI")(V4DF "DF")])
;; As above, but in lower case.
(define_mode_attr unitmode [(SF "sf") (DF "df") (V2SF "sf") (V4SF "sf")
(V16QI "qi") (V8QI "qi") (V8HI "hi") (V4HI "hi")
- (V4SI "si") (V2SI "si") (V2DI "di") (V2DF "df")])
+ (V4SI "si") (V2SI "si") (V2DI "di") (V2DF "df")
+ (V8SI "si") (V4DI "di") (V32QI "qi") (V16HI "hi")
+ (V8SF "sf") (V4DF "df")])
;; This attribute gives the integer mode that has half the size of
;; the controlling mode.
@@ -711,16 +713,17 @@
[(set_attr "alu_type" "sub")
(set_attr "mode" "<MODE>")])
+
(define_insn "*subsi3_extended"
- [(set (match_operand:DI 0 "register_operand" "= r")
+ [(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
- (minus:SI (match_operand:SI 1 "reg_or_0_operand" " rJ")
- (match_operand:SI 2 "register_operand" " r"))))]
+ (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+ (match_operand:SI 2 "register_operand" "r"))))]
"TARGET_64BIT"
"sub.w\t%0,%z1,%2"
[(set_attr "type" "arith")
(set_attr "mode" "SI")])
-
+
;;
;; ....................
;;
@@ -3638,6 +3641,9 @@
; The LoongArch SX Instructions.
(include "lsx.md")
+; The LoongArch ASX Instructions.
+(include "lasx.md")
+
(define_c_enum "unspec" [
UNSPEC_ADDRESS_FIRST
])
--
2.33.0