openjdk-11/add-SVE-backend-feature.patch

8264 lines
367 KiB
Diff
Raw Normal View History

2020-12-24 15:35:16 +08:00
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
index bb9721c8e..3774dd730 100644
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
@@ -140,6 +140,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
$d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
)))
+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
+ )))
+ endif
+
2021-02-01 21:59:54 +08:00
ifeq ($(call check-jvm-feature, zgc), true)
2020-12-24 15:35:16 +08:00
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
2021-02-01 21:59:54 +08:00
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/z/z_$(HOTSPOT_TARGET_CPU).ad \
2020-12-24 15:35:16 +08:00
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index 617b2b8fb..eab0101b0 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -69,7 +69,7 @@ register %{
//
// r0-r7,r10-r26 volatile (caller save)
// r27-r32 system (no save, no allocate)
-// r8-r9 invisible to the allocator (so we can use them as scratch regs)
+// r8-r9 non-allocatable (so we can use them as scratch regs)
//
// as regards Java usage. we don't use any callee save registers
// because this makes it difficult to de-optimise a frame (see comment
@@ -94,6 +94,10 @@ reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() );
reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() );
reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() );
reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() );
+reg_def R8 ( NS, SOC, Op_RegI, 8, r8->as_VMReg() ); // rscratch1, non-allocatable
+reg_def R8_H ( NS, SOC, Op_RegI, 8, r8->as_VMReg()->next() );
+reg_def R9 ( NS, SOC, Op_RegI, 9, r9->as_VMReg() ); // rscratch2, non-allocatable
+reg_def R9_H ( NS, SOC, Op_RegI, 9, r9->as_VMReg()->next() );
reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() );
reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() );
@@ -140,7 +144,7 @@ reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp
reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
// ----------------------------
-// Float/Double Registers
+// Float/Double/Vector Registers
// ----------------------------
// Double Registers
@@ -161,165 +165,316 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
// the platform ABI treats v8-v15 as callee save). float registers
// v16-v31 are SOC as per the platform spec
- reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
- reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
- reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
- reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
-
- reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
- reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
- reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
- reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
-
- reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
- reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
- reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
- reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
-
- reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
- reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
- reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
- reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
-
- reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
- reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
- reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
- reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
-
- reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
- reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
- reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
- reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
-
- reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
- reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
- reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
- reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
-
- reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
- reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
- reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
- reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
-
- reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() );
- reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() );
- reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
- reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
-
- reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() );
- reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() );
- reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
- reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
-
- reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() );
- reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
- reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
- reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
-
- reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() );
- reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
- reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
- reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
-
- reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() );
- reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
- reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
- reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
-
- reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() );
- reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
- reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
- reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
-
- reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() );
- reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
- reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
- reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
-
- reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() );
- reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
- reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
- reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
-
- reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
- reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
- reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
- reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
-
- reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
- reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
- reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
- reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
-
- reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
- reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
- reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
- reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
-
- reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
- reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
- reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
- reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
-
- reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
- reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
- reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
- reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
-
- reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
- reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
- reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
- reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
-
- reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
- reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
- reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
- reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
-
- reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
- reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
- reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
- reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
-
- reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
- reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
- reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
- reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
-
- reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
- reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
- reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
- reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
-
- reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
- reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
- reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
- reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
-
- reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
- reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
- reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
- reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
-
- reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
- reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
- reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
- reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
-
- reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
- reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
- reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
- reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
-
- reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
- reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
- reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
- reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
-
- reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
- reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
- reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
- reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
+// For SVE vector registers, we simply extend vector register size to 8
+// 'logical' slots. This is nominally 256 bits but it actually covers
+// all possible 'physical' SVE vector register lengths from 128 ~ 2048
+// bits. The 'physical' SVE vector register length is detected during
+// startup, so the register allocator is able to identify the correct
+// number of bytes needed for an SVE spill/unspill.
+// Note that a vector register with 4 slots denotes a 128-bit NEON
+// register allowing it to be distinguished from the corresponding SVE
+// vector register when the SVE vector length is 128 bits.
+
+ reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
+ reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
+ reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
+ reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
+ reg_def V0_L ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(4) );
+ reg_def V0_M ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(5) );
+ reg_def V0_N ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(6) );
+ reg_def V0_O ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(7) );
+
+ reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
+ reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
+ reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
+ reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
+ reg_def V1_L ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(4) );
+ reg_def V1_M ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(5) );
+ reg_def V1_N ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(6) );
+ reg_def V1_O ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(7) );
+
+ reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
+ reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
+ reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
+ reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
+ reg_def V2_L ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(4) );
+ reg_def V2_M ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(5) );
+ reg_def V2_N ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(6) );
+ reg_def V2_O ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(7) );
+
+ reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
+ reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
+ reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
+ reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
+ reg_def V3_L ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(4) );
+ reg_def V3_M ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(5) );
+ reg_def V3_N ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(6) );
+ reg_def V3_O ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(7) );
+
+ reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
+ reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
+ reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
+ reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
+ reg_def V4_L ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(4) );
+ reg_def V4_M ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(5) );
+ reg_def V4_N ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(6) );
+ reg_def V4_O ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(7) );
+
+ reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
+ reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
+ reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
+ reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
+ reg_def V5_L ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(4) );
+ reg_def V5_M ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(5) );
+ reg_def V5_N ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(6) );
+ reg_def V5_O ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(7) );
+
+ reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
+ reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
+ reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
+ reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
+ reg_def V6_L ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(4) );
+ reg_def V6_M ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(5) );
+ reg_def V6_N ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(6) );
+ reg_def V6_O ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(7) );
+
+ reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
+ reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
+ reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
+ reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
+ reg_def V7_L ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(4) );
+ reg_def V7_M ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(5) );
+ reg_def V7_N ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(6) );
+ reg_def V7_O ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(7) );
+
+ reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() );
+ reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() );
+ reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
+ reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
+ reg_def V8_L ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(4) );
+ reg_def V8_M ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(5) );
+ reg_def V8_N ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(6) );
+ reg_def V8_O ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(7) );
+
+ reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() );
+ reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() );
+ reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
+ reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
+ reg_def V9_L ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(4) );
+ reg_def V9_M ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(5) );
+ reg_def V9_N ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(6) );
+ reg_def V9_O ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(7) );
+
+ reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() );
+ reg_def V10_H ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
+ reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) );
+ reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) );
+ reg_def V10_L ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(4) );
+ reg_def V10_M ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(5) );
+ reg_def V10_N ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(6) );
+ reg_def V10_O ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(7) );
+
+ reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() );
+ reg_def V11_H ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
+ reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) );
+ reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) );
+ reg_def V11_L ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(4) );
+ reg_def V11_M ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(5) );
+ reg_def V11_N ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(6) );
+ reg_def V11_O ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(7) );
+
+ reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() );
+ reg_def V12_H ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
+ reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) );
+ reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) );
+ reg_def V12_L ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(4) );
+ reg_def V12_M ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(5) );
+ reg_def V12_N ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(6) );
+ reg_def V12_O ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(7) );
+
+ reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() );
+ reg_def V13_H ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
+ reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) );
+ reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) );
+ reg_def V13_L ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(4) );
+ reg_def V13_M ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(5) );
+ reg_def V13_N ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(6) );
+ reg_def V13_O ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(7) );
+
+ reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() );
+ reg_def V14_H ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
+ reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) );
+ reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) );
+ reg_def V14_L ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(4) );
+ reg_def V14_M ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(5) );
+ reg_def V14_N ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(6) );
+ reg_def V14_O ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(7) );
+
+ reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() );
+ reg_def V15_H ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
+ reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) );
+ reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) );
+ reg_def V15_L ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(4) );
+ reg_def V15_M ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(5) );
+ reg_def V15_N ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(6) );
+ reg_def V15_O ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(7) );
+
+ reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
+ reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
+ reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) );
+ reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) );
+ reg_def V16_L ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(4) );
+ reg_def V16_M ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(5) );
+ reg_def V16_N ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(6) );
+ reg_def V16_O ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(7) );
+
+ reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
+ reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
+ reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) );
+ reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) );
+ reg_def V17_L ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(4) );
+ reg_def V17_M ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(5) );
+ reg_def V17_N ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(6) );
+ reg_def V17_O ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(7) );
+
+ reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
+ reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
+ reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) );
+ reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) );
+ reg_def V18_L ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(4) );
+ reg_def V18_M ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(5) );
+ reg_def V18_N ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(6) );
+ reg_def V18_O ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(7) );
+
+ reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
+ reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
+ reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) );
+ reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) );
+ reg_def V19_L ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(4) );
+ reg_def V19_M ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(5) );
+ reg_def V19_N ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(6) );
+ reg_def V19_O ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(7) );
+
+ reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
+ reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
+ reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) );
+ reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) );
+ reg_def V20_L ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(4) );
+ reg_def V20_M ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(5) );
+ reg_def V20_N ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(6) );
+ reg_def V20_O ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(7) );
+
+ reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
+ reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
+ reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) );
+ reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) );
+ reg_def V21_L ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(4) );
+ reg_def V21_M ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(5) );
+ reg_def V21_N ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(6) );
+ reg_def V21_O ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(7) );
+
+ reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
+ reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
+ reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) );
+ reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) );
+ reg_def V22_L ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(4) );
+ reg_def V22_M ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(5) );
+ reg_def V22_N ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(6) );
+ reg_def V22_O ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(7) );
+
+ reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
+ reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
+ reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) );
+ reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) );
+ reg_def V23_L ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(4) );
+ reg_def V23_M ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(5) );
+ reg_def V23_N ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(6) );
+ reg_def V23_O ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(7) );
+
+ reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
+ reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
+ reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) );
+ reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) );
+ reg_def V24_L ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(4) );
+ reg_def V24_M ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(5) );
+ reg_def V24_N ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(6) );
+ reg_def V24_O ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(7) );
+
+ reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
+ reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
+ reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) );
+ reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) );
+ reg_def V25_L ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(4) );
+ reg_def V25_M ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(5) );
+ reg_def V25_N ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(6) );
+ reg_def V25_O ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(7) );
+
+ reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
+ reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
+ reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) );
+ reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) );
+ reg_def V26_L ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(4) );
+ reg_def V26_M ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(5) );
+ reg_def V26_N ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(6) );
+ reg_def V26_O ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(7) );
+
+ reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
+ reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
+ reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) );
+ reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) );
+ reg_def V27_L ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(4) );
+ reg_def V27_M ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(5) );
+ reg_def V27_N ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(6) );
+ reg_def V27_O ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(7) );
+
+ reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
+ reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
+ reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) );
+ reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) );
+ reg_def V28_L ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(4) );
+ reg_def V28_M ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(5) );
+ reg_def V28_N ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(6) );
+ reg_def V28_O ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(7) );
+
+ reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
+ reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
+ reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) );
+ reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) );
+ reg_def V29_L ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(4) );
+ reg_def V29_M ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(5) );
+ reg_def V29_N ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(6) );
+ reg_def V29_O ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(7) );
+
+ reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
+ reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
+ reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) );
+ reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) );
+ reg_def V30_L ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(4) );
+ reg_def V30_M ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(5) );
+ reg_def V30_N ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(6) );
+ reg_def V30_O ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(7) );
+
+ reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
+ reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
+ reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) );
+ reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) );
+ reg_def V31_L ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(4) );
+ reg_def V31_M ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(5) );
+ reg_def V31_N ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(6) );
+ reg_def V31_O ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(7) );
+
+
+// ----------------------------
+// SVE Predicate Registers
+// ----------------------------
+ reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg());
+ reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg());
+ reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg());
+ reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg());
+ reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg());
+ reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg());
+ reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg());
+ reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg());
// ----------------------------
// Special Registers
@@ -333,7 +488,6 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
-
// Specify priority of register selection within phases of register
// allocation. Highest priority is first. A useful heuristic is to
// give registers a low priority when they are required by machine
@@ -381,50 +535,64 @@ alloc_class chunk0(
R29, R29_H, // fp
R30, R30_H, // lr
R31, R31_H, // sp
+ R8, R8_H, // rscratch1
+ R9, R9_H, // rscratch2
);
alloc_class chunk1(
// no save
- V16, V16_H, V16_J, V16_K,
- V17, V17_H, V17_J, V17_K,
- V18, V18_H, V18_J, V18_K,
- V19, V19_H, V19_J, V19_K,
- V20, V20_H, V20_J, V20_K,
- V21, V21_H, V21_J, V21_K,
- V22, V22_H, V22_J, V22_K,
- V23, V23_H, V23_J, V23_K,
- V24, V24_H, V24_J, V24_K,
- V25, V25_H, V25_J, V25_K,
- V26, V26_H, V26_J, V26_K,
- V27, V27_H, V27_J, V27_K,
- V28, V28_H, V28_J, V28_K,
- V29, V29_H, V29_J, V29_K,
- V30, V30_H, V30_J, V30_K,
- V31, V31_H, V31_J, V31_K,
+ V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O,
+ V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O,
+ V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O,
+ V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O,
+ V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O,
+ V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O,
+ V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O,
+ V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O,
+ V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O,
+ V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O,
+ V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O,
+ V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O,
+ V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O,
+ V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O,
+ V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O,
+ V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O,
// arg registers
- V0, V0_H, V0_J, V0_K,
- V1, V1_H, V1_J, V1_K,
- V2, V2_H, V2_J, V2_K,
- V3, V3_H, V3_J, V3_K,
- V4, V4_H, V4_J, V4_K,
- V5, V5_H, V5_J, V5_K,
- V6, V6_H, V6_J, V6_K,
- V7, V7_H, V7_J, V7_K,
+ V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O,
+ V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O,
+ V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O,
+ V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O,
+ V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O,
+ V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O,
+ V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O,
+ V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O,
// non-volatiles
- V8, V8_H, V8_J, V8_K,
- V9, V9_H, V9_J, V9_K,
- V10, V10_H, V10_J, V10_K,
- V11, V11_H, V11_J, V11_K,
- V12, V12_H, V12_J, V12_K,
- V13, V13_H, V13_J, V13_K,
- V14, V14_H, V14_J, V14_K,
- V15, V15_H, V15_J, V15_K,
+ V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O,
+ V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O,
+ V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O,
+ V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O,
+ V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O,
+ V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O,
+ V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O,
+ V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O,
+);
+
+alloc_class chunk2 (
+ P0,
+ P1,
+ P2,
+ P3,
+ P4,
+ P5,
+ P6,
+ P7,
+ // Only use P0~P7 here for performance
);
-alloc_class chunk2(RFLAGS);
+alloc_class chunk3(RFLAGS);
//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
@@ -865,6 +1033,42 @@ reg_class double_reg(
V31, V31_H
);
+// Class for all SVE vector registers.
+reg_class vectora_reg (
+ V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O,
+ V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O,
+ V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O,
+ V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O,
+ V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O,
+ V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O,
+ V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O,
+ V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O,
+ V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O,
+ V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O,
+ V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O,
+ V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O,
+ V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O,
+ V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O,
+ V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O,
+ V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O,
+ V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O,
+ V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O,
+ V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O,
+ V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O,
+ V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O,
+ V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O,
+ V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O,
+ V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O,
+ V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O,
+ V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O,
+ V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O,
+ V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O,
+ V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O,
+ V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O,
+ V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O,
+ V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O,
+);
+
// Class for all 64bit vector registers
reg_class vectord_reg(
V0, V0_H,
@@ -1097,6 +1301,31 @@ reg_class v31_reg(
V31, V31_H
);
+// Class for all SVE predicate registers.
+reg_class pr_reg (
+ P0,
+ P1,
+ P2,
+ P3,
+ P4,
+ P5,
+ P6,
+ // P7, non-allocatable, preserved with all elements preset to TRUE.
+);
+
+// Class for SVE governing predicate registers, which are used
+// to determine the active elements of a predicated instruction.
+reg_class gov_pr (
+ P0,
+ P1,
+ P2,
+ P3,
+ P4,
+ P5,
+ P6,
+ // P7, non-allocatable, preserved with all elements preset to TRUE.
+);
+
// Singleton class for condition codes
reg_class int_flags(RFLAGS);
@@ -1758,6 +1987,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
// branch if we need to invalidate the method later
__ nop();
+ if (UseSVE > 0 && C->max_vector_size() >= 16) {
+ __ reinitialize_ptrue();
+ }
+
int bangsize = C->bang_size_in_bytes();
if (C->need_stack_bang(bangsize) && UseStackBanging)
__ generate_stack_overflow_check(bangsize);
@@ -1859,7 +2092,7 @@ int MachEpilogNode::safepoint_offset() const {
// Figure out which register class each belongs in: rc_int, rc_float or
// rc_stack.
-enum RC { rc_bad, rc_int, rc_float, rc_stack };
+enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
static enum RC rc_class(OptoReg::Name reg) {
@@ -1867,19 +2100,25 @@ static enum RC rc_class(OptoReg::Name reg) {
return rc_bad;
}
- // we have 30 int registers * 2 halves
- // (rscratch1 and rscratch2 are omitted)
+ // we have 32 int registers * 2 halves
+ int slots_of_int_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers;
- if (reg < 60) {
+ if (reg < slots_of_int_registers) {
return rc_int;
}
- // we have 32 float register * 2 halves
- if (reg < 60 + 128) {
+ // we have 32 float register * 8 halves
+ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
+ if (reg < slots_of_int_registers + slots_of_float_registers) {
return rc_float;
}
- // Between float regs & stack is the flags regs.
+ int slots_of_predicate_registers = PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers;
+ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) {
+ return rc_predicate;
+ }
+
+ // Between predicate regs & stack is the flags.
assert(OptoReg::is_stack(reg), "blow up if spilling flags");
return rc_stack;
@@ -1918,8 +2157,28 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
if (bottom_type()->isa_vect() != NULL) {
uint ireg = ideal_reg();
- assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
- if (cbuf) {
+ if (ireg == Op_VecA && cbuf) {
+ MacroAssembler _masm(cbuf);
+ int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
+ // stack->stack
+ __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset,
+ sve_vector_reg_size_in_bytes);
+ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
+ __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo),
+ sve_vector_reg_size_in_bytes);
+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
+ __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo),
+ sve_vector_reg_size_in_bytes);
+ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
+ __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ ShouldNotReachHere();
+ }
+ } else if (cbuf) {
+ assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
MacroAssembler _masm(cbuf);
assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
@@ -1937,12 +2196,12 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
as_FloatRegister(Matcher::_regEncode[src_lo]));
} else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
__ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
- ireg == Op_VecD ? __ D : __ Q,
- ra_->reg2offset(dst_lo));
+ ireg == Op_VecD ? __ D : __ Q,
+ ra_->reg2offset(dst_lo));
} else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
__ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- ireg == Op_VecD ? __ D : __ Q,
- ra_->reg2offset(src_lo));
+ ireg == Op_VecD ? __ D : __ Q,
+ ra_->reg2offset(src_lo));
} else {
ShouldNotReachHere();
}
@@ -2027,9 +2286,24 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
st->print("%s", Matcher::regName[dst_lo]);
}
if (bottom_type()->isa_vect() != NULL) {
- st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
+ int vsize = 0;
+ switch (ideal_reg()) {
+ case Op_VecD:
+ vsize = 64;
+ break;
+ case Op_VecX:
+ vsize = 128;
+ break;
+ case Op_VecA:
+ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
+ break;
+ default:
+ assert(false, "bad register type for spill");
+ ShouldNotReachHere();
+ }
+ st->print("\t# vector spill size = %d", vsize);
} else {
- st->print("\t# spill size = %d", is64 ? 64:32);
+ st->print("\t# spill size = %d", is64 ? 64 : 32);
}
}
@@ -2188,19 +2462,32 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-
- // TODO
- // identify extra cases that we might want to provide match rules for
- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
- bool ret_value = match_rule_supported(opcode);
- // Add rules here.
-
- return ret_value; // Per default match rules are supported.
+ // Identify extra cases that we might want to provide match rules for vector nodes and
+ // other intrinsics guarded with vector length (vlen) and element type (bt).
+ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
+ return false;
+ }
+ int bit_size = vlen * type2aelembytes(bt) * 8;
+ if (UseSVE == 0 && bit_size > 128) {
+ return false;
+ }
+ if (UseSVE > 0) {
+ return op_sve_supported(opcode);
+ } else { // NEON
+ // Special cases
+ switch (opcode) {
+ case Op_MulVL:
+ return false;
+ default:
+ break;
+ }
+ }
+ return true; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
- return false;
+ return UseSVE > 0;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
@@ -2236,7 +2523,8 @@ const bool Matcher::convL2FSupported(void) {
// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
- int size = MIN2(16,(int)MaxVectorSize);
+ // The MaxVectorSize should have been set by detecting SVE max vector register size.
+ int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize);
// Minimum 2 values in vector
if (size < 2*type2aelembytes(bt)) size = 0;
// But never < 4
@@ -2249,14 +2537,32 @@ const int Matcher::max_vector_size(const BasicType bt) {
return vector_width_in_bytes(bt)/type2aelembytes(bt);
}
const int Matcher::min_vector_size(const BasicType bt) {
-// For the moment limit the vector size to 8 bytes
+ int max_size = max_vector_size(bt);
+ if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
+ // Currently vector length less than SVE vector register size is not supported.
+ return max_size;
+ } else {
+ // For the moment limit the vector size to 8 bytes with NEON.
int size = 8 / type2aelembytes(bt);
if (size < 2) size = 2;
return size;
+ }
+}
+
+const bool Matcher::supports_scalable_vector() {
+ return UseSVE > 0;
+}
+
+// Actual max scalable vector register length.
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return Matcher::max_vector_size(bt);
}
// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int len) {
+ if (UseSVE > 0 && 16 <= len && len <= 256) {
+ return Op_VecA;
+ }
switch(len) {
case 8: return Op_VecD;
case 16: return Op_VecX;
@@ -2266,6 +2572,9 @@ const uint Matcher::vector_ideal_reg(int len) {
}
const uint Matcher::vector_shift_count_ideal_reg(int size) {
+ if (UseSVE > 0 && 16 <= size && size <= 256) {
+ return Op_VecA;
+ }
switch(size) {
case 8: return Op_VecD;
case 16: return Op_VecX;
@@ -3419,6 +3728,11 @@ encode %{
if (call == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
return;
+ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+ // Only non uncommon_trap calls need to reinitialize ptrue.
+ if (uncommon_trap_request() == 0) {
+ __ reinitialize_ptrue();
+ }
}
%}
@@ -3429,6 +3743,8 @@ encode %{
if (call == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
return;
+ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+ __ reinitialize_ptrue();
}
%}
@@ -3465,6 +3781,9 @@ encode %{
__ bind(retaddr);
__ add(sp, sp, 2 * wordSize);
}
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+ __ reinitialize_ptrue();
+ }
%}
enc_class aarch64_enc_rethrow() %{
@@ -3474,6 +3793,11 @@ encode %{
enc_class aarch64_enc_ret() %{
MacroAssembler _masm(&cbuf);
+#ifdef ASSERT
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+ __ verify_ptrue();
+ }
+#endif
__ ret(lr);
%}
@@ -4203,6 +4527,41 @@ operand immLoffset16()
interface(CONST_INTER);
%}
+// 8 bit signed value.
+operand immI8()
+%{
+ predicate(n->get_int() <= 127 && n->get_int() >= -128);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 8 bit signed value (simm8), or #simm8 LSL 8.
+operand immI8_shift8()
+%{
+ predicate((n->get_int() <= 127 && n->get_int() >= -128) ||
+ (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 8 bit signed value (simm8), or #simm8 LSL 8.
+operand immL8_shift8()
+%{
+ predicate((n->get_long() <= 127 && n->get_long() >= -128) ||
+ (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
// 32 bit integer valid for add sub immediate
operand immIAddSub()
%{
@@ -4832,6 +5191,18 @@ operand vRegD()
interface(REG_INTER);
%}
+// Generic vector class. This will be used for
+// all vector operands, including NEON and SVE,
+// but currently only used for SVE VecA.
+operand vReg()
+%{
+ constraint(ALLOC_IN_RC(vectora_reg));
+ match(VecA);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
operand vecD()
%{
constraint(ALLOC_IN_RC(vectord_reg));
@@ -5140,6 +5511,15 @@ operand vRegD_V31()
interface(REG_INTER);
%}
+operand pRegGov()
+%{
+ constraint(ALLOC_IN_RC(gov_pr));
+ match(RegVMask);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Flags register, used as output of signed compare instructions
// note that on AArch64 we also use this register as the output for
@@ -15477,7 +15857,7 @@ instruct loadV8(vecD dst, vmem8 mem)
// Load Vector (128 bits)
instruct loadV16(vecX dst, vmem16 mem)
%{
- predicate(n->as_LoadVector()->memory_size() == 16);
+ predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
ins_cost(4 * INSN_COST);
format %{ "ldrq $dst,$mem\t# vector (128 bits)" %}
@@ -15533,7 +15913,7 @@ instruct replicate8B(vecD dst, iRegIorL2I src)
instruct replicate16B(vecX dst, iRegIorL2I src)
%{
- predicate(n->as_Vector()->length() == 16);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
match(Set dst (ReplicateB src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (16B)" %}
@@ -15558,7 +15938,7 @@ instruct replicate8B_imm(vecD dst, immI con)
instruct replicate16B_imm(vecX dst, immI con)
%{
- predicate(n->as_Vector()->length() == 16);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
match(Set dst (ReplicateB con));
ins_cost(INSN_COST);
format %{ "movi $dst, $con\t# vector(16B)" %}
@@ -15583,7 +15963,7 @@ instruct replicate4S(vecD dst, iRegIorL2I src)
instruct replicate8S(vecX dst, iRegIorL2I src)
%{
- predicate(n->as_Vector()->length() == 8);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
match(Set dst (ReplicateS src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (8S)" %}
@@ -15608,7 +15988,7 @@ instruct replicate4S_imm(vecD dst, immI con)
instruct replicate8S_imm(vecX dst, immI con)
%{
- predicate(n->as_Vector()->length() == 8);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
match(Set dst (ReplicateS con));
ins_cost(INSN_COST);
format %{ "movi $dst, $con\t# vector(8H)" %}
@@ -15632,7 +16012,7 @@ instruct replicate2I(vecD dst, iRegIorL2I src)
instruct replicate4I(vecX dst, iRegIorL2I src)
%{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
match(Set dst (ReplicateI src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (4I)" %}
@@ -15656,7 +16036,7 @@ instruct replicate2I_imm(vecD dst, immI con)
instruct replicate4I_imm(vecX dst, immI con)
%{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
match(Set dst (ReplicateI con));
ins_cost(INSN_COST);
format %{ "movi $dst, $con\t# vector(4I)" %}
@@ -15668,7 +16048,7 @@ instruct replicate4I_imm(vecX dst, immI con)
instruct replicate2L(vecX dst, iRegL src)
%{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
match(Set dst (ReplicateL src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (2L)" %}
@@ -15680,7 +16060,7 @@ instruct replicate2L(vecX dst, iRegL src)
instruct replicate2L_zero(vecX dst, immI0 zero)
%{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
match(Set dst (ReplicateI zero));
ins_cost(INSN_COST);
format %{ "movi $dst, $zero\t# vector(4I)" %}
@@ -15707,7 +16087,7 @@ instruct replicate2F(vecD dst, vRegF src)
instruct replicate4F(vecX dst, vRegF src)
%{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
match(Set dst (ReplicateF src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (4F)" %}
@@ -15720,7 +16100,7 @@ instruct replicate4F(vecX dst, vRegF src)
instruct replicate2D(vecX dst, vRegD src)
%{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
match(Set dst (ReplicateD src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (2D)" %}
@@ -17496,6 +17876,43 @@ instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
ins_pipe(vshift128_imm);
%}
+instruct vpopcount4I(vecX dst, vecX src) %{
+ predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
+ match(Set dst (PopCountVI src));
+ format %{
+ "cnt $dst, $src\t# vector (16B)\n\t"
+ "uaddlp $dst, $dst\t# vector (16B)\n\t"
+ "uaddlp $dst, $dst\t# vector (8H)"
+ %}
+ ins_encode %{
+ __ cnt(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vpopcount2I(vecD dst, vecD src) %{
+ predicate(UsePopCountInstruction && n->as_Vector()->length() == 2);
+ match(Set dst (PopCountVI src));
+ format %{
+ "cnt $dst, $src\t# vector (8B)\n\t"
+ "uaddlp $dst, $dst\t# vector (8B)\n\t"
+ "uaddlp $dst, $dst\t# vector (4H)"
+ %}
+ ins_encode %{
+ __ cnt(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($dst$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
//----------PEEPHOLE RULES-----------------------------------------------------
// These must follow all instruction definitions as they use the names
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad
new file mode 100644
index 000000000..8d80cb37a
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad
@@ -0,0 +1,1366 @@
+//
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
+
+// AArch64 SVE Architecture Description File
+
+
+// 4 bit signed offset -- for predicated load/store
+
+operand vmemA_immIOffset4()
+%{
+ predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4,
+ Matcher::scalable_vector_reg_size(T_BYTE)));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand vmemA_immLOffset4()
+%{
+ predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4,
+ Matcher::scalable_vector_reg_size(T_BYTE)));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+
+operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off, MUL VL]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off, MUL VL]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
+
+source_hpp %{
+ bool op_sve_supported(int opcode);
+%}
+
+source %{
+
+ static inline BasicType vector_element_basic_type(const MachNode* n) {
+ const TypeVect* vt = n->bottom_type()->is_vect();
+ return vt->element_basic_type();
+ }
+
+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
+ int def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ const TypeVect* vt = def->bottom_type()->is_vect();
+ return vt->element_basic_type();
+ }
+
+ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
+ PRegister Pg, const Address &adr);
+
+ // Predicated load/store, with optional ptrue to all elements of given predicate register.
+ static void loadStoreA_predicate(MacroAssembler masm, bool is_store,
+ FloatRegister reg, PRegister pg, BasicType bt,
+ int opcode, Register base, int index, int size, int disp) {
+ sve_mem_insn_predicate insn = NULL;
+ Assembler::SIMD_RegVariant type = Assembler::B;
+ int esize = type2aelembytes(bt);
+ if (index == -1) {
+ assert(size == 0, "unsupported address mode: scale size = %d", size);
+ switch(esize) {
+ case 1:
+ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b;
+ type = Assembler::B;
+ break;
+ case 2:
+ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h;
+ type = Assembler::H;
+ break;
+ case 4:
+ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w;
+ type = Assembler::S;
+ break;
+ case 8:
+ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d;
+ type = Assembler::D;
+ break;
+ default:
+ assert(false, "unsupported");
+ ShouldNotReachHere();
+ }
+ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
+ } else {
+ assert(false, "unimplemented");
+ ShouldNotReachHere();
+ }
+ }
+
+ bool op_sve_supported(int opcode) {
+ switch (opcode) {
+ // No multiply reduction instructions
+ case Op_MulReductionVD:
+ case Op_MulReductionVF:
+ case Op_MulReductionVI:
+ case Op_MulReductionVL:
+ // Others
+ case Op_Extract:
+ case Op_ExtractB:
+ case Op_ExtractC:
+ case Op_ExtractD:
+ case Op_ExtractF:
+ case Op_ExtractI:
+ case Op_ExtractL:
+ case Op_ExtractS:
+ case Op_ExtractUB:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+%}
+
+definitions %{
+ int_def SVE_COST (200, 200);
+%}
+
+
+
+
+// All SVE instructions
+
+// vector load/store
+
+// Use predicated vector load/store
+instruct loadV(vReg dst, vmemA mem) %{
+ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
+ match(Set dst (LoadVector mem));
+ ins_cost(SVE_COST);
+ format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
+ ins_encode %{
+ FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+ loadStoreA_predicate(MacroAssembler(&cbuf), false, dst_reg, ptrue,
+ vector_element_basic_type(this), $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storeV(vReg src, vmemA mem) %{
+ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
+ match(Set mem (StoreVector mem src));
+ ins_cost(SVE_COST);
+ format %{ "sve_str $mem, $src\t # vector (sve)" %}
+ ins_encode %{
+ FloatRegister src_reg = as_FloatRegister($src$$reg);
+ loadStoreA_predicate(MacroAssembler(&cbuf), true, src_reg, ptrue,
+ vector_element_basic_type(this, $src), $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector add
+
+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (AddVB src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_add(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (AddVS src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_add(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (AddVI src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_add(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (AddVL src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_add(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (AddVF src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fadd(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (AddVD src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fadd(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector and
+
+instruct vand(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (AndV src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_and $dst, $src1, $src2\t# vector (sve)" %}
+ ins_encode %{
+ __ sve_and(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector or
+
+instruct vor(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (OrV src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_orr $dst, $src1, $src2\t# vector (sve)" %}
+ ins_encode %{
+ __ sve_orr(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector xor
+
+instruct vxor(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (XorV src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_eor $dst, $src1, $src2\t# vector (sve)" %}
+ ins_encode %{
+ __ sve_eor(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector float div
+
+instruct vdivF(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (DivVF dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivD(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (DivVD dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fmla
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fmls
+
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fnmla
+
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fnmls
+
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector mla
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector mls
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+
+// vector mul
+
+instruct vmulS(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst_src1 (MulVS dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulI(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (MulVI dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulL(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (MulVL dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (MulVF src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fmul(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (MulVD src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fmul(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fneg
+
+instruct vnegF(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (NegVF src));
+ ins_cost(SVE_COST);
+ format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fneg(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vnegD(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (NegVD src));
+ ins_cost(SVE_COST);
+ format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fneg(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// popcount vector
+
+instruct vpopcountI(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (PopCountVI src));
+ format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
+ ins_encode %{
+ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector add reduction
+
+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (S)\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "addw $dst, $dst, $src1\t # add reduction S" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+ match(Set dst (AddReductionVL src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (D)\n\t"
+ "umov $dst, $tmp, D, 0\n\t"
+ "add $dst, $dst, $src1\t # add reduction D" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
+ __ add($dst$$Register, $dst$$Register, $src1$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addF(vRegF src1_dst, vReg src2) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+ match(Set src1_dst (AddReductionVF src1_dst src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addD(vRegD src1_dst, vReg src2) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+ match(Set src1_dst (AddReductionVD src1_dst src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector replicate
+
+instruct replicateB(vReg dst, iRegIorL2I src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (ReplicateB src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateS(vReg dst, iRegIorL2I src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (ReplicateS src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateI(vReg dst, iRegIorL2I src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (ReplicateI src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateL(vReg dst, iRegL src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (ReplicateL src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+
+instruct replicateB_imm8(vReg dst, immI8 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (ReplicateB con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (ReplicateS con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (ReplicateI con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (ReplicateL con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+
+instruct replicateF(vReg dst, vRegF src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (ReplicateF src));
+ ins_cost(SVE_COST);
+ format %{ "sve_cpy $dst, $src\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateD(vReg dst, vRegD src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (ReplicateD src));
+ ins_cost(SVE_COST);
+ format %{ "sve_cpy $dst, $src\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector shift
+
+instruct vasrB(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (RShiftVB dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_asr(as_FloatRegister($dst$$reg), __ B,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrS(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (RShiftVS dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_asr(as_FloatRegister($dst$$reg), __ H,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrI(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (RShiftVI dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_asr(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrL(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (RShiftVL dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_asr(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslB(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (LShiftVB dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslS(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (LShiftVS dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslI(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (LShiftVI dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslL(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (LShiftVL dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (URShiftVB dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (URShiftVS dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrI(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (URShiftVI dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrL(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (URShiftVL dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (RShiftVB src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ if (con >= 8) con = 7;
+ __ sve_asr(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (RShiftVS src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ if (con >= 16) con = 15;
+ __ sve_asr(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (RShiftVI src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_asr(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (RShiftVL src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_asr(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (URShiftVB src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (URShiftVS src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (URShiftVI src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (URShiftVL src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (LShiftVB src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (LShiftVS src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (LShiftVI src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (LShiftVL src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+ (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR)));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector sqrt
+
+instruct vsqrtF(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (SqrtVF src));
+ ins_cost(SVE_COST);
+ format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsqrtD(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (SqrtVD src));
+ ins_cost(SVE_COST);
+ format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector sub
+
+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (SubVB src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_sub(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (SubVS src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_sub(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (SubVI src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_sub(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (SubVL src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_sub(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (SubVF src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fsub(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (SubVD src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fsub(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
new file mode 100644
index 000000000..0323f2f8c
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
@@ -0,0 +1,727 @@
+//
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+dnl Generate the warning
+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
+dnl
+
+// AArch64 SVE Architecture Description File
+
+dnl
+dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 )
+dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len)
+define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
+operand vmemA_imm$1Offset$3()
+%{
+ predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
+ Matcher::scalable_vector_reg_size(T_BYTE)));
+ match(Con$1);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}')
+dnl
+// 4 bit signed offset -- for predicated load/store
+OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4)
+OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4)
+dnl
+dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1, $2 )
+dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
+define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
+operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off, MUL VL]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ `index'(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}')
+dnl
+OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
+OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
+
+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
+
+source_hpp %{
+ bool op_sve_supported(int opcode);
+%}
+
+source %{
+
+ static inline BasicType vector_element_basic_type(const MachNode* n) {
+ const TypeVect* vt = n->bottom_type()->is_vect();
+ return vt->element_basic_type();
+ }
+
+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
+ int def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ const TypeVect* vt = def->bottom_type()->is_vect();
+ return vt->element_basic_type();
+ }
+
+ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
+ PRegister Pg, const Address &adr);
+
+ // Predicated load/store, with optional ptrue to all elements of given predicate register.
+ static void loadStoreA_predicate(MacroAssembler masm, bool is_store,
+ FloatRegister reg, PRegister pg, BasicType bt,
+ int opcode, Register base, int index, int size, int disp) {
+ sve_mem_insn_predicate insn;
+ Assembler::SIMD_RegVariant type;
+ int esize = type2aelembytes(bt);
+ if (index == -1) {
+ assert(size == 0, "unsupported address mode: scale size = %d", size);
+ switch(esize) {
+ case 1:
+ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b;
+ type = Assembler::B;
+ break;
+ case 2:
+ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h;
+ type = Assembler::H;
+ break;
+ case 4:
+ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w;
+ type = Assembler::S;
+ break;
+ case 8:
+ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d;
+ type = Assembler::D;
+ break;
+ default:
+ assert(false, "unsupported");
+ ShouldNotReachHere();
+ }
+ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
+ } else {
+ assert(false, "unimplemented");
+ ShouldNotReachHere();
+ }
+ }
+
+ bool op_sve_supported(int opcode) {
+ switch (opcode) {
+ // No multiply reduction instructions
+ case Op_MulReductionVD:
+ case Op_MulReductionVF:
+ case Op_MulReductionVI:
+ case Op_MulReductionVL:
+ // Others
+ case Op_Extract:
+ case Op_ExtractB:
+ case Op_ExtractC:
+ case Op_ExtractD:
+ case Op_ExtractF:
+ case Op_ExtractI:
+ case Op_ExtractL:
+ case Op_ExtractS:
+ case Op_ExtractUB:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+%}
+
+definitions %{
+ int_def SVE_COST (200, 200);
+%}
+
+
+dnl
+dnl ELEMENT_SHORT_CHART($1, $2)
+dnl ELEMENT_SHORT_CHART(etype, node)
+define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
+ `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+ ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
+ `($2->bottom_type()->is_vect()->element_basic_type() == $1)')')
+dnl
+
+// All SVE instructions
+
+// vector load/store
+
+// Use predicated vector load/store
+instruct loadV(vReg dst, vmemA mem) %{
+ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
+ match(Set dst (LoadVector mem));
+ ins_cost(SVE_COST);
+ format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
+ ins_encode %{
+ FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+ loadStoreA_predicate(MacroAssembler(&cbuf), false, dst_reg, ptrue,
+ vector_element_basic_type(this), $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storeV(vReg src, vmemA mem) %{
+ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
+ match(Set mem (StoreVector mem src));
+ ins_cost(SVE_COST);
+ format %{ "sve_str $mem, $src\t # vector (sve)" %}
+ ins_encode %{
+ FloatRegister src_reg = as_FloatRegister($src$$reg);
+ loadStoreA_predicate(MacroAssembler(&cbuf), true, src_reg, ptrue,
+ vector_element_basic_type(this, $src), $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+dnl
+dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 )
+dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
+define(`UNARY_OP_TRUE_PREDICATE_ETYPE', `
+instruct $1(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
+ n->bottom_type()->is_vect()->element_basic_type() == $3);
+ match(Set dst ($2 src));
+ ins_cost(SVE_COST);
+ format %{ "$6 $dst, $src\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst$$reg), __ $4,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+dnl
+dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 )
+dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn)
+define(`BINARY_OP_UNPREDICATED', `
+instruct $1(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+ match(Set dst ($2 src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector add
+BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add)
+BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8, sve_add)
+BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4, sve_add)
+BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2, sve_add)
+BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4, sve_fadd)
+BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2, sve_fadd)
+dnl
+dnl BINARY_OP_UNSIZED($1, $2, $3, $4 )
+dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn)
+define(`BINARY_OP_UNSIZED', `
+instruct $1(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3);
+ match(Set dst ($2 src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %}
+ ins_encode %{
+ __ $4(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector and
+BINARY_OP_UNSIZED(vand, AndV, 16, sve_and)
+
+// vector or
+BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)
+
+// vector xor
+BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)
+dnl
+dnl VDIVF($1, $2 , $3 )
+dnl VDIVF(name_suffix, size, min_vec_len)
+define(`VDIVF', `
+instruct vdiv$1(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (DivV$1 dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector float div
+VDIVF(F, S, 4)
+VDIVF(D, D, 2)
+
+dnl
+dnl BINARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, $6 )
+dnl BINARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
+define(`BINARY_OP_TRUE_PREDICATE_ETYPE', `
+instruct $1(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
+ n->bottom_type()->is_vect()->element_basic_type() == $3);
+ match(Set dst_src1 ($2 dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "$6 $dst_src1, $dst_src1, $src2\t # vector (sve) ($4)" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst_src1$$reg), __ $4,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+
+dnl
+dnl VFMLA($1 $2 $3 )
+dnl VFMLA(name_suffix, size, min_vec_len)
+define(`VFMLA', `
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fmla
+VFMLA(F, S, 4)
+VFMLA(D, D, 2)
+
+dnl
+dnl VFMLS($1 $2 $3 )
+dnl VFMLS(name_suffix, size, min_vec_len)
+define(`VFMLS', `
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fmls
+VFMLS(F, S, 4)
+VFMLS(D, D, 2)
+
+dnl
+dnl VFNMLA($1 $2 $3 )
+dnl VFNMLA(name_suffix, size, min_vec_len)
+define(`VFNMLA', `
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fnmla
+VFNMLA(F, S, 4)
+VFNMLA(D, D, 2)
+
+dnl
+dnl VFNMLS($1 $2 $3 )
+dnl VFNMLS(name_suffix, size, min_vec_len)
+define(`VFNMLS', `
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fnmls
+VFNMLS(F, S, 4)
+VFNMLS(D, D, 2)
+
+dnl
+dnl VMLA($1 $2 $3 )
+dnl VMLA(name_suffix, size, min_vec_len)
+define(`VMLA', `
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector mla
+VMLA(B, B, 16)
+VMLA(S, H, 8)
+VMLA(I, S, 4)
+VMLA(L, D, 2)
+
+dnl
+dnl VMLS($1 $2 $3 )
+dnl VMLS(name_suffix, size, min_vec_len)
+define(`VMLS', `
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector mls
+VMLS(B, B, 16)
+VMLS(S, H, 8)
+VMLS(I, S, 4)
+VMLS(L, D, 2)
+
+dnl
+dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
+dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`BINARY_OP_TRUE_PREDICATE', `
+instruct $1(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+ match(Set dst_src1 ($2 dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst_src1$$reg), __ $3,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector mul
+BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8, sve_mul)
+BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4, sve_mul)
+BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2, sve_mul)
+BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul)
+BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul)
+
+dnl
+dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
+dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn)
+define(`UNARY_OP_TRUE_PREDICATE', `
+instruct $1(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4);
+ match(Set dst ($2 src));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst, $src\t# vector (sve) ($3)" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fneg
+UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg)
+UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg)
+
+// popcount vector
+
+instruct vpopcountI(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (PopCountVI src));
+ format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
+ ins_encode %{
+ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+dnl
+dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 )
+dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
+define(`REDUCE_ADD', `
+instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ ELEMENT_SHORT_CHAR($6, n->in(2)));
+ match(Set dst ($2 src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t"
+ "umov $dst, $tmp, $5, 0\n\t"
+ "$7 $dst, $dst, $src1\t # add reduction $5" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0);
+ __ $7($dst$$Register, $dst$$Register, $src1$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl REDUCE_ADDF($1, $2, $3, $4 )
+dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
+define(`REDUCE_ADDF', `
+instruct $1($3 src1_dst, vReg src2) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+ match(Set src1_dst ($2 src1_dst src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector add reduction
+REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw)
+REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add)
+REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S)
+REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D)
+
+dnl
+dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 )
+dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
+define(`REDUCE_FMINMAX', `
+instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
+ n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+ match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t"
+ "f$1s $dst, $dst, $src1\t # $1 reduction $2" %}
+ ins_encode %{
+ __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+// vector max reduction
+REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF)
+REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
+
+// vector min reduction
+REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF)
+REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
+
+dnl
+dnl REPLICATE($1, $2, $3, $4, $5 )
+dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
+define(`REPLICATE', `
+instruct $1(vReg dst, $3 src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+ match(Set dst ($2 src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl REPLICATE_IMM8($1, $2, $3, $4, $5 )
+dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
+define(`REPLICATE_IMM8', `
+instruct $1(vReg dst, $3 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+ match(Set dst ($2 con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl FREPLICATE($1, $2, $3, $4, $5 )
+dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
+define(`FREPLICATE', `
+instruct $1(vReg dst, $3 src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+ match(Set dst ($2 src));
+ ins_cost(SVE_COST);
+ format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector replicate
+REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
+REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
+REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
+REPLICATE(replicateL, ReplicateL, iRegL, D, 2)
+
+REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16)
+REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
+REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
+REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
+
+FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
+FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
+dnl
+dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 )
+dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`VSHIFT_TRUE_PREDICATE', `
+instruct $1(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+ match(Set dst ($2 dst shift));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 )
+dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`VSHIFT_IMM_UNPREDICATE', `
+instruct $1(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+ match(Set dst ($2 src shift));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;dnl
+ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }')dnl
+ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
+ if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
+ if (con >= 16) con = 15;')')dnl
+ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }')
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl VSHIFT_COUNT($1, $2, $3, $4 )
+dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
+define(`VSHIFT_COUNT', `
+instruct $1(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 &&
+ ELEMENT_SHORT_CHAR($4, n));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector shift
+VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB, B, 16, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS, H, 8, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI, S, 4, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL, D, 2, sve_asr)
+VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB, B, 16, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS, H, 8, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI, S, 4, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL, D, 2, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl)
+VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
+VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT)
+VSHIFT_COUNT(vshiftcntI, S, 4, T_INT)
+VSHIFT_COUNT(vshiftcntL, D, 2, T_LONG)
+
+// vector sqrt
+UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt)
+UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt)
+
+// vector sub
+BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub)
+BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub)
+BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub)
+BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub)
+BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub)
+BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub)
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
index 586743eb9..441ea4066 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
@@ -98,531 +98,617 @@ void entry(CodeBuffer *cb) {
__ bind(back);
// ArithOp
- __ add(r19, r22, r7, Assembler::LSL, 28); // add x19, x22, x7, LSL #28
- __ sub(r16, r11, r10, Assembler::LSR, 13); // sub x16, x11, x10, LSR #13
- __ adds(r27, r13, r28, Assembler::ASR, 2); // adds x27, x13, x28, ASR #2
- __ subs(r20, r28, r26, Assembler::ASR, 41); // subs x20, x28, x26, ASR #41
- __ addw(r8, r19, r19, Assembler::ASR, 19); // add w8, w19, w19, ASR #19
- __ subw(r4, r9, r10, Assembler::LSL, 14); // sub w4, w9, w10, LSL #14
- __ addsw(r8, r11, r30, Assembler::LSL, 13); // adds w8, w11, w30, LSL #13
- __ subsw(r0, r25, r19, Assembler::LSL, 9); // subs w0, w25, w19, LSL #9
- __ andr(r20, r0, r21, Assembler::LSL, 19); // and x20, x0, x21, LSL #19
- __ orr(r21, r14, r20, Assembler::LSL, 17); // orr x21, x14, x20, LSL #17
- __ eor(r25, r28, r1, Assembler::LSL, 51); // eor x25, x28, x1, LSL #51
- __ ands(r10, r27, r11, Assembler::ASR, 15); // ands x10, x27, x11, ASR #15
- __ andw(r25, r5, r12, Assembler::ASR, 23); // and w25, w5, w12, ASR #23
- __ orrw(r18, r14, r10, Assembler::LSR, 4); // orr w18, w14, w10, LSR #4
- __ eorw(r4, r21, r5, Assembler::ASR, 22); // eor w4, w21, w5, ASR #22
- __ andsw(r21, r0, r5, Assembler::ASR, 29); // ands w21, w0, w5, ASR #29
- __ bic(r26, r30, r6, Assembler::ASR, 37); // bic x26, x30, x6, ASR #37
- __ orn(r3, r1, r13, Assembler::LSR, 29); // orn x3, x1, x13, LSR #29
- __ eon(r0, r28, r9, Assembler::LSL, 47); // eon x0, x28, x9, LSL #47
- __ bics(r29, r5, r28, Assembler::LSL, 46); // bics x29, x5, x28, LSL #46
- __ bicw(r9, r18, r7, Assembler::LSR, 20); // bic w9, w18, w7, LSR #20
- __ ornw(r26, r13, r25, Assembler::ASR, 24); // orn w26, w13, w25, ASR #24
- __ eonw(r25, r4, r19, Assembler::LSL, 6); // eon w25, w4, w19, LSL #6
- __ bicsw(r5, r26, r4, Assembler::LSR, 24); // bics w5, w26, w4, LSR #24
+ __ add(r26, r23, r13, Assembler::LSL, 32); // add x26, x23, x13, LSL #32
+ __ sub(r12, r24, r9, Assembler::LSR, 37); // sub x12, x24, x9, LSR #37
+ __ adds(r28, r15, r8, Assembler::ASR, 39); // adds x28, x15, x8, ASR #39
+ __ subs(r7, r28, r30, Assembler::ASR, 57); // subs x7, x28, x30, ASR #57
+ __ addw(r9, r22, r27, Assembler::ASR, 15); // add w9, w22, w27, ASR #15
+ __ subw(r3, r13, r18, Assembler::ASR, 30); // sub w3, w13, w18, ASR #30
+ __ addsw(r14, r26, r8, Assembler::ASR, 17); // adds w14, w26, w8, ASR #17
+ __ subsw(r0, r22, r12, Assembler::ASR, 21); // subs w0, w22, w12, ASR #21
+ __ andr(r0, r15, r26, Assembler::LSL, 20); // and x0, x15, x26, LSL #20
+ __ orr(r26, r5, r17, Assembler::LSL, 61); // orr x26, x5, x17, LSL #61
+ __ eor(r24, r13, r2, Assembler::LSL, 32); // eor x24, x13, x2, LSL #32
+ __ ands(r28, r3, r17, Assembler::ASR, 35); // ands x28, x3, x17, ASR #35
+ __ andw(r25, r16, r29, Assembler::LSR, 18); // and w25, w16, w29, LSR #18
+ __ orrw(r13, r18, r11, Assembler::LSR, 9); // orr w13, w18, w11, LSR #9
+ __ eorw(r5, r5, r18, Assembler::LSR, 15); // eor w5, w5, w18, LSR #15
+ __ andsw(r2, r23, r27, Assembler::ASR, 26); // ands w2, w23, w27, ASR #26
+ __ bic(r27, r28, r16, Assembler::LSR, 45); // bic x27, x28, x16, LSR #45
+ __ orn(r8, r25, r26, Assembler::ASR, 37); // orn x8, x25, x26, ASR #37
+ __ eon(r29, r17, r13, Assembler::LSR, 63); // eon x29, x17, x13, LSR #63
+ __ bics(r28, r24, r2, Assembler::LSR, 31); // bics x28, x24, x2, LSR #31
+ __ bicw(r19, r26, r7, Assembler::ASR, 3); // bic w19, w26, w7, ASR #3
+ __ ornw(r6, r24, r10, Assembler::ASR, 3); // orn w6, w24, w10, ASR #3
+ __ eonw(r4, r21, r1, Assembler::LSR, 29); // eon w4, w21, w1, LSR #29
+ __ bicsw(r16, r21, r0, Assembler::LSR, 19); // bics w16, w21, w0, LSR #19
// AddSubImmOp
- __ addw(r7, r19, 340u); // add w7, w19, #340
- __ addsw(r8, r0, 401u); // adds w8, w0, #401
- __ subw(r29, r20, 163u); // sub w29, w20, #163
- __ subsw(r8, r23, 759u); // subs w8, w23, #759
- __ add(r1, r12, 523u); // add x1, x12, #523
- __ adds(r2, r11, 426u); // adds x2, x11, #426
- __ sub(r14, r29, 716u); // sub x14, x29, #716
- __ subs(r11, r5, 582u); // subs x11, x5, #582
+ __ addw(r17, r12, 379u); // add w17, w12, #379
+ __ addsw(r30, r1, 22u); // adds w30, w1, #22
+ __ subw(r29, r5, 126u); // sub w29, w5, #126
+ __ subsw(r6, r24, 960u); // subs w6, w24, #960
+ __ add(r0, r13, 104u); // add x0, x13, #104
+ __ adds(r8, r6, 663u); // adds x8, x6, #663
+ __ sub(r10, r5, 516u); // sub x10, x5, #516
+ __ subs(r1, r3, 1012u); // subs x1, x3, #1012
// LogicalImmOp
- __ andw(r23, r22, 32768ul); // and w23, w22, #0x8000
- __ orrw(r4, r10, 4042322160ul); // orr w4, w10, #0xf0f0f0f0
- __ eorw(r0, r24, 4042322160ul); // eor w0, w24, #0xf0f0f0f0
- __ andsw(r19, r29, 2139127680ul); // ands w19, w29, #0x7f807f80
- __ andr(r5, r10, 4503599627354112ul); // and x5, x10, #0xfffffffffc000
- __ orr(r12, r30, 18445618178097414144ul); // orr x12, x30, #0xfffc0000fffc0000
- __ eor(r30, r5, 262128ul); // eor x30, x5, #0x3fff0
- __ ands(r26, r23, 4194300ul); // ands x26, x23, #0x3ffffc
+ __ andw(r6, r11, 4294049777ull); // and w6, w11, #0xfff1fff1
+ __ orrw(r28, r5, 4294966791ull); // orr w28, w5, #0xfffffe07
+ __ eorw(r1, r20, 134217216ull); // eor w1, w20, #0x7fffe00
+ __ andsw(r7, r18, 1048576ull); // ands w7, w18, #0x100000
+ __ andr(r14, r12, 9223372036854775808ull); // and x14, x12, #0x8000000000000000
+ __ orr(r9, r11, 562675075514368ull); // orr x9, x11, #0x1ffc000000000
+ __ eor(r17, r0, 18014398509481728ull); // eor x17, x0, #0x3fffffffffff00
+ __ ands(r1, r8, 18446744073705357315ull); // ands x1, x8, #0xffffffffffc00003
// AbsOp
- __ b(__ pc()); // b .
- __ b(back); // b back
- __ b(forth); // b forth
- __ bl(__ pc()); // bl .
- __ bl(back); // bl back
- __ bl(forth); // bl forth
+ __ b(__ pc()); // b .
+ __ b(back); // b back
+ __ b(forth); // b forth
+ __ bl(__ pc()); // bl .
+ __ bl(back); // bl back
+ __ bl(forth); // bl forth
// RegAndAbsOp
- __ cbzw(r12, __ pc()); // cbz w12, .
- __ cbzw(r12, back); // cbz w12, back
- __ cbzw(r12, forth); // cbz w12, forth
- __ cbnzw(r20, __ pc()); // cbnz w20, .
- __ cbnzw(r20, back); // cbnz w20, back
- __ cbnzw(r20, forth); // cbnz w20, forth
- __ cbz(r12, __ pc()); // cbz x12, .
- __ cbz(r12, back); // cbz x12, back
- __ cbz(r12, forth); // cbz x12, forth
- __ cbnz(r24, __ pc()); // cbnz x24, .
- __ cbnz(r24, back); // cbnz x24, back
- __ cbnz(r24, forth); // cbnz x24, forth
- __ adr(r6, __ pc()); // adr x6, .
- __ adr(r6, back); // adr x6, back
- __ adr(r6, forth); // adr x6, forth
- __ _adrp(r21, __ pc()); // adrp x21, .
+ __ cbzw(r10, __ pc()); // cbz w10, .
+ __ cbzw(r10, back); // cbz w10, back
+ __ cbzw(r10, forth); // cbz w10, forth
+ __ cbnzw(r8, __ pc()); // cbnz w8, .
+ __ cbnzw(r8, back); // cbnz w8, back
+ __ cbnzw(r8, forth); // cbnz w8, forth
+ __ cbz(r11, __ pc()); // cbz x11, .
+ __ cbz(r11, back); // cbz x11, back
+ __ cbz(r11, forth); // cbz x11, forth
+ __ cbnz(r29, __ pc()); // cbnz x29, .
+ __ cbnz(r29, back); // cbnz x29, back
+ __ cbnz(r29, forth); // cbnz x29, forth
+ __ adr(r19, __ pc()); // adr x19, .
+ __ adr(r19, back); // adr x19, back
+ __ adr(r19, forth); // adr x19, forth
+ __ _adrp(r19, __ pc()); // adrp x19, .
// RegImmAbsOp
- __ tbz(r1, 1, __ pc()); // tbz x1, #1, .
- __ tbz(r1, 1, back); // tbz x1, #1, back
- __ tbz(r1, 1, forth); // tbz x1, #1, forth
- __ tbnz(r8, 9, __ pc()); // tbnz x8, #9, .
- __ tbnz(r8, 9, back); // tbnz x8, #9, back
- __ tbnz(r8, 9, forth); // tbnz x8, #9, forth
+ __ tbz(r22, 6, __ pc()); // tbz x22, #6, .
+ __ tbz(r22, 6, back); // tbz x22, #6, back
+ __ tbz(r22, 6, forth); // tbz x22, #6, forth
+ __ tbnz(r12, 11, __ pc()); // tbnz x12, #11, .
+ __ tbnz(r12, 11, back); // tbnz x12, #11, back
+ __ tbnz(r12, 11, forth); // tbnz x12, #11, forth
// MoveWideImmOp
- __ movnw(r12, 23175, 0); // movn w12, #23175, lsl 0
- __ movzw(r11, 20476, 16); // movz w11, #20476, lsl 16
- __ movkw(r21, 3716, 0); // movk w21, #3716, lsl 0
- __ movn(r29, 28661, 48); // movn x29, #28661, lsl 48
- __ movz(r3, 6927, 0); // movz x3, #6927, lsl 0
- __ movk(r22, 9828, 16); // movk x22, #9828, lsl 16
+ __ movnw(r0, 6301, 0); // movn w0, #6301, lsl 0
+ __ movzw(r7, 20886, 0); // movz w7, #20886, lsl 0
+ __ movkw(r27, 18617, 0); // movk w27, #18617, lsl 0
+ __ movn(r12, 22998, 16); // movn x12, #22998, lsl 16
+ __ movz(r20, 1532, 16); // movz x20, #1532, lsl 16
+ __ movk(r8, 5167, 32); // movk x8, #5167, lsl 32
// BitfieldOp
- __ sbfm(r12, r8, 6, 22); // sbfm x12, x8, #6, #22
- __ bfmw(r19, r25, 25, 19); // bfm w19, w25, #25, #19
- __ ubfmw(r9, r12, 29, 15); // ubfm w9, w12, #29, #15
- __ sbfm(r28, r25, 16, 16); // sbfm x28, x25, #16, #16
- __ bfm(r12, r5, 4, 25); // bfm x12, x5, #4, #25
- __ ubfm(r0, r10, 6, 8); // ubfm x0, x10, #6, #8
+ __ sbfm(r15, r17, 24, 28); // sbfm x15, x17, #24, #28
+ __ bfmw(r15, r9, 14, 25); // bfm w15, w9, #14, #25
+ __ ubfmw(r27, r25, 6, 31); // ubfm w27, w25, #6, #31
+ __ sbfm(r19, r2, 23, 31); // sbfm x19, x2, #23, #31
+ __ bfm(r12, r21, 10, 6); // bfm x12, x21, #10, #6
+ __ ubfm(r22, r0, 26, 16); // ubfm x22, x0, #26, #16
// ExtractOp
- __ extrw(r4, r13, r26, 24); // extr w4, w13, w26, #24
- __ extr(r23, r30, r24, 31); // extr x23, x30, x24, #31
+ __ extrw(r3, r3, r20, 27); // extr w3, w3, w20, #27
+ __ extr(r8, r30, r3, 54); // extr x8, x30, x3, #54
// CondBranchOp
- __ br(Assembler::EQ, __ pc()); // b.EQ .
- __ br(Assembler::EQ, back); // b.EQ back
- __ br(Assembler::EQ, forth); // b.EQ forth
- __ br(Assembler::NE, __ pc()); // b.NE .
- __ br(Assembler::NE, back); // b.NE back
- __ br(Assembler::NE, forth); // b.NE forth
- __ br(Assembler::HS, __ pc()); // b.HS .
- __ br(Assembler::HS, back); // b.HS back
- __ br(Assembler::HS, forth); // b.HS forth
- __ br(Assembler::CS, __ pc()); // b.CS .
- __ br(Assembler::CS, back); // b.CS back
- __ br(Assembler::CS, forth); // b.CS forth
- __ br(Assembler::LO, __ pc()); // b.LO .
- __ br(Assembler::LO, back); // b.LO back
- __ br(Assembler::LO, forth); // b.LO forth
- __ br(Assembler::CC, __ pc()); // b.CC .
- __ br(Assembler::CC, back); // b.CC back
- __ br(Assembler::CC, forth); // b.CC forth
- __ br(Assembler::MI, __ pc()); // b.MI .
- __ br(Assembler::MI, back); // b.MI back
- __ br(Assembler::MI, forth); // b.MI forth
- __ br(Assembler::PL, __ pc()); // b.PL .
- __ br(Assembler::PL, back); // b.PL back
- __ br(Assembler::PL, forth); // b.PL forth
- __ br(Assembler::VS, __ pc()); // b.VS .
- __ br(Assembler::VS, back); // b.VS back
- __ br(Assembler::VS, forth); // b.VS forth
- __ br(Assembler::VC, __ pc()); // b.VC .
- __ br(Assembler::VC, back); // b.VC back
- __ br(Assembler::VC, forth); // b.VC forth
- __ br(Assembler::HI, __ pc()); // b.HI .
- __ br(Assembler::HI, back); // b.HI back
- __ br(Assembler::HI, forth); // b.HI forth
- __ br(Assembler::LS, __ pc()); // b.LS .
- __ br(Assembler::LS, back); // b.LS back
- __ br(Assembler::LS, forth); // b.LS forth
- __ br(Assembler::GE, __ pc()); // b.GE .
- __ br(Assembler::GE, back); // b.GE back
- __ br(Assembler::GE, forth); // b.GE forth
- __ br(Assembler::LT, __ pc()); // b.LT .
- __ br(Assembler::LT, back); // b.LT back
- __ br(Assembler::LT, forth); // b.LT forth
- __ br(Assembler::GT, __ pc()); // b.GT .
- __ br(Assembler::GT, back); // b.GT back
- __ br(Assembler::GT, forth); // b.GT forth
- __ br(Assembler::LE, __ pc()); // b.LE .
- __ br(Assembler::LE, back); // b.LE back
- __ br(Assembler::LE, forth); // b.LE forth
- __ br(Assembler::AL, __ pc()); // b.AL .
- __ br(Assembler::AL, back); // b.AL back
- __ br(Assembler::AL, forth); // b.AL forth
- __ br(Assembler::NV, __ pc()); // b.NV .
- __ br(Assembler::NV, back); // b.NV back
- __ br(Assembler::NV, forth); // b.NV forth
+ __ br(Assembler::EQ, __ pc()); // b.EQ .
+ __ br(Assembler::EQ, back); // b.EQ back
+ __ br(Assembler::EQ, forth); // b.EQ forth
+ __ br(Assembler::NE, __ pc()); // b.NE .
+ __ br(Assembler::NE, back); // b.NE back
+ __ br(Assembler::NE, forth); // b.NE forth
+ __ br(Assembler::HS, __ pc()); // b.HS .
+ __ br(Assembler::HS, back); // b.HS back
+ __ br(Assembler::HS, forth); // b.HS forth
+ __ br(Assembler::CS, __ pc()); // b.CS .
+ __ br(Assembler::CS, back); // b.CS back
+ __ br(Assembler::CS, forth); // b.CS forth
+ __ br(Assembler::LO, __ pc()); // b.LO .
+ __ br(Assembler::LO, back); // b.LO back
+ __ br(Assembler::LO, forth); // b.LO forth
+ __ br(Assembler::CC, __ pc()); // b.CC .
+ __ br(Assembler::CC, back); // b.CC back
+ __ br(Assembler::CC, forth); // b.CC forth
+ __ br(Assembler::MI, __ pc()); // b.MI .
+ __ br(Assembler::MI, back); // b.MI back
+ __ br(Assembler::MI, forth); // b.MI forth
+ __ br(Assembler::PL, __ pc()); // b.PL .
+ __ br(Assembler::PL, back); // b.PL back
+ __ br(Assembler::PL, forth); // b.PL forth
+ __ br(Assembler::VS, __ pc()); // b.VS .
+ __ br(Assembler::VS, back); // b.VS back
+ __ br(Assembler::VS, forth); // b.VS forth
+ __ br(Assembler::VC, __ pc()); // b.VC .
+ __ br(Assembler::VC, back); // b.VC back
+ __ br(Assembler::VC, forth); // b.VC forth
+ __ br(Assembler::HI, __ pc()); // b.HI .
+ __ br(Assembler::HI, back); // b.HI back
+ __ br(Assembler::HI, forth); // b.HI forth
+ __ br(Assembler::LS, __ pc()); // b.LS .
+ __ br(Assembler::LS, back); // b.LS back
+ __ br(Assembler::LS, forth); // b.LS forth
+ __ br(Assembler::GE, __ pc()); // b.GE .
+ __ br(Assembler::GE, back); // b.GE back
+ __ br(Assembler::GE, forth); // b.GE forth
+ __ br(Assembler::LT, __ pc()); // b.LT .
+ __ br(Assembler::LT, back); // b.LT back
+ __ br(Assembler::LT, forth); // b.LT forth
+ __ br(Assembler::GT, __ pc()); // b.GT .
+ __ br(Assembler::GT, back); // b.GT back
+ __ br(Assembler::GT, forth); // b.GT forth
+ __ br(Assembler::LE, __ pc()); // b.LE .
+ __ br(Assembler::LE, back); // b.LE back
+ __ br(Assembler::LE, forth); // b.LE forth
+ __ br(Assembler::AL, __ pc()); // b.AL .
+ __ br(Assembler::AL, back); // b.AL back
+ __ br(Assembler::AL, forth); // b.AL forth
+ __ br(Assembler::NV, __ pc()); // b.NV .
+ __ br(Assembler::NV, back); // b.NV back
+ __ br(Assembler::NV, forth); // b.NV forth
// ImmOp
- __ svc(12729); // svc #12729
- __ hvc(6788); // hvc #6788
- __ smc(1535); // smc #1535
- __ brk(16766); // brk #16766
- __ hlt(9753); // hlt #9753
+ __ svc(12999); // svc #12999
+ __ hvc(2665); // hvc #2665
+ __ smc(9002); // smc #9002
+ __ brk(14843); // brk #14843
+ __ hlt(25964); // hlt #25964
// Op
- __ nop(); // nop
- __ eret(); // eret
- __ drps(); // drps
- __ isb(); // isb
+ __ nop(); // nop
+ __ eret(); // eret
+ __ drps(); // drps
+ __ isb(); // isb
// SystemOp
- __ dsb(Assembler::SY); // dsb SY
- __ dmb(Assembler::ISHST); // dmb ISHST
+ __ dsb(Assembler::ST); // dsb ST
+ __ dmb(Assembler::OSHST); // dmb OSHST
// OneRegOp
- __ br(r2); // br x2
- __ blr(r5); // blr x5
+ __ br(r16); // br x16
+ __ blr(r20); // blr x20
// LoadStoreExclusiveOp
- __ stxr(r20, r21, r2); // stxr w20, x21, [x2]
- __ stlxr(r5, r29, r7); // stlxr w5, x29, [x7]
- __ ldxr(r5, r16); // ldxr x5, [x16]
- __ ldaxr(r27, r29); // ldaxr x27, [x29]
- __ stlr(r0, r29); // stlr x0, [x29]
- __ ldar(r21, r28); // ldar x21, [x28]
+ __ stxr(r10, r27, r8); // stxr w10, x27, [x8]
+ __ stlxr(r0, r1, r21); // stlxr w0, x1, [x21]
+ __ ldxr(r17, r29); // ldxr x17, [x29]
+ __ ldaxr(r29, r28); // ldaxr x29, [x28]
+ __ stlr(r1, r23); // stlr x1, [x23]
+ __ ldar(r21, r20); // ldar x21, [x20]
// LoadStoreExclusiveOp
- __ stxrw(r21, r24, r7); // stxr w21, w24, [x7]
- __ stlxrw(r21, r26, r28); // stlxr w21, w26, [x28]
- __ ldxrw(r21, r6); // ldxr w21, [x6]
- __ ldaxrw(r15, r30); // ldaxr w15, [x30]
- __ stlrw(r19, r3); // stlr w19, [x3]
- __ ldarw(r22, r2); // ldar w22, [x2]
+ __ stxrw(r22, r27, r19); // stxr w22, w27, [x19]
+ __ stlxrw(r11, r16, r6); // stlxr w11, w16, [x6]
+ __ ldxrw(r18, r0); // ldxr w18, [x0]
+ __ ldaxrw(r4, r10); // ldaxr w4, [x10]
+ __ stlrw(r24, r22); // stlr w24, [x22]
+ __ ldarw(r10, r19); // ldar w10, [x19]
// LoadStoreExclusiveOp
- __ stxrh(r18, r15, r0); // stxrh w18, w15, [x0]
- __ stlxrh(r11, r5, r28); // stlxrh w11, w5, [x28]
- __ ldxrh(r29, r6); // ldxrh w29, [x6]
- __ ldaxrh(r18, r7); // ldaxrh w18, [x7]
- __ stlrh(r25, r28); // stlrh w25, [x28]
- __ ldarh(r2, r19); // ldarh w2, [x19]
+ __ stxrh(r1, r5, r30); // stxrh w1, w5, [x30]
+ __ stlxrh(r8, r12, r17); // stlxrh w8, w12, [x17]
+ __ ldxrh(r9, r14); // ldxrh w9, [x14]
+ __ ldaxrh(r7, r1); // ldaxrh w7, [x1]
+ __ stlrh(r5, r16); // stlrh w5, [x16]
+ __ ldarh(r2, r12); // ldarh w2, [x12]
// LoadStoreExclusiveOp
- __ stxrb(r10, r30, r1); // stxrb w10, w30, [x1]
- __ stlxrb(r20, r21, r22); // stlxrb w20, w21, [x22]
- __ ldxrb(r25, r2); // ldxrb w25, [x2]
- __ ldaxrb(r24, r5); // ldaxrb w24, [x5]
- __ stlrb(r16, r3); // stlrb w16, [x3]
- __ ldarb(r22, r29); // ldarb w22, [x29]
+ __ stxrb(r10, r12, r3); // stxrb w10, w12, [x3]
+ __ stlxrb(r28, r14, r26); // stlxrb w28, w14, [x26]
+ __ ldxrb(r30, r10); // ldxrb w30, [x10]
+ __ ldaxrb(r14, r21); // ldaxrb w14, [x21]
+ __ stlrb(r13, r9); // stlrb w13, [x9]
+ __ ldarb(r22, r27); // ldarb w22, [x27]
// LoadStoreExclusiveOp
- __ ldxp(r8, r2, r19); // ldxp x8, x2, [x19]
- __ ldaxp(r7, r19, r14); // ldaxp x7, x19, [x14]
- __ stxp(r8, r27, r28, r5); // stxp w8, x27, x28, [x5]
- __ stlxp(r5, r8, r14, r6); // stlxp w5, x8, x14, [x6]
+ __ ldxp(r28, r19, r11); // ldxp x28, x19, [x11]
+ __ ldaxp(r30, r19, r2); // ldaxp x30, x19, [x2]
+ __ stxp(r2, r23, r1, r0); // stxp w2, x23, x1, [x0]
+ __ stlxp(r12, r16, r13, r15); // stlxp w12, x16, x13, [x15]
// LoadStoreExclusiveOp
- __ ldxpw(r25, r4, r22); // ldxp w25, w4, [x22]
- __ ldaxpw(r13, r14, r15); // ldaxp w13, w14, [x15]
- __ stxpw(r20, r26, r8, r10); // stxp w20, w26, w8, [x10]
- __ stlxpw(r23, r18, r18, r18); // stlxp w23, w18, w18, [x18]
+ __ ldxpw(r18, r21, r13); // ldxp w18, w21, [x13]
+ __ ldaxpw(r11, r30, r8); // ldaxp w11, w30, [x8]
+ __ stxpw(r24, r13, r11, r1); // stxp w24, w13, w11, [x1]
+ __ stlxpw(r26, r21, r27, r13); // stlxp w26, w21, w27, [x13]
-// base_plus_unscaled_offset
+// base_plus_unscaled_offset
// LoadStoreOp
- __ str(r30, Address(r11, 99)); // str x30, [x11, 99]
- __ strw(r23, Address(r25, -77)); // str w23, [x25, -77]
- __ strb(r2, Address(r14, 3)); // strb w2, [x14, 3]
- __ strh(r9, Address(r10, 5)); // strh w9, [x10, 5]
- __ ldr(r20, Address(r15, 57)); // ldr x20, [x15, 57]
- __ ldrw(r12, Address(r16, -78)); // ldr w12, [x16, -78]
- __ ldrb(r22, Address(r26, -3)); // ldrb w22, [x26, -3]
- __ ldrh(r30, Address(r19, -47)); // ldrh w30, [x19, -47]
- __ ldrsb(r9, Address(r10, -12)); // ldrsb x9, [x10, -12]
- __ ldrsh(r28, Address(r17, 14)); // ldrsh x28, [x17, 14]
- __ ldrshw(r3, Address(r5, 10)); // ldrsh w3, [x5, 10]
- __ ldrsw(r17, Address(r17, -91)); // ldrsw x17, [x17, -91]
- __ ldrd(v2, Address(r20, -17)); // ldr d2, [x20, -17]
- __ ldrs(v22, Address(r7, -10)); // ldr s22, [x7, -10]
- __ strd(v30, Address(r18, -223)); // str d30, [x18, -223]
- __ strs(v13, Address(r22, 21)); // str s13, [x22, 21]
-
-// pre
+ __ str(r11, Address(r20, -103)); // str x11, [x20, -103]
+ __ strw(r28, Address(r16, 62)); // str w28, [x16, 62]
+ __ strb(r27, Address(r9, -9)); // strb w27, [x9, -9]
+ __ strh(r2, Address(r25, -50)); // strh w2, [x25, -50]
+ __ ldr(r4, Address(r2, -241)); // ldr x4, [x2, -241]
+ __ ldrw(r30, Address(r20, -31)); // ldr w30, [x20, -31]
+ __ ldrb(r18, Address(r23, -23)); // ldrb w18, [x23, -23]
+ __ ldrh(r29, Address(r26, -1)); // ldrh w29, [x26, -1]
+ __ ldrsb(r1, Address(r9, 6)); // ldrsb x1, [x9, 6]
+ __ ldrsh(r11, Address(r12, 19)); // ldrsh x11, [x12, 19]
+ __ ldrshw(r11, Address(r1, -50)); // ldrsh w11, [x1, -50]
+ __ ldrsw(r19, Address(r24, 41)); // ldrsw x19, [x24, 41]
+ __ ldrd(v24, Address(r24, 95)); // ldr d24, [x24, 95]
+ __ ldrs(v15, Address(r5, -43)); // ldr s15, [x5, -43]
+ __ strd(v21, Address(r27, 1)); // str d21, [x27, 1]
+ __ strs(v23, Address(r13, -107)); // str s23, [x13, -107]
+
+// pre
// LoadStoreOp
- __ str(r9, Address(__ pre(r18, -112))); // str x9, [x18, -112]!
- __ strw(r29, Address(__ pre(r23, 11))); // str w29, [x23, 11]!
- __ strb(r18, Address(__ pre(r12, -1))); // strb w18, [x12, -1]!
- __ strh(r16, Address(__ pre(r20, -23))); // strh w16, [x20, -23]!
- __ ldr(r3, Address(__ pre(r29, 9))); // ldr x3, [x29, 9]!
- __ ldrw(r25, Address(__ pre(r3, 19))); // ldr w25, [x3, 19]!
- __ ldrb(r1, Address(__ pre(r29, -1))); // ldrb w1, [x29, -1]!
- __ ldrh(r8, Address(__ pre(r29, -57))); // ldrh w8, [x29, -57]!
- __ ldrsb(r5, Address(__ pre(r14, -13))); // ldrsb x5, [x14, -13]!
- __ ldrsh(r10, Address(__ pre(r27, 1))); // ldrsh x10, [x27, 1]!
- __ ldrshw(r11, Address(__ pre(r10, 25))); // ldrsh w11, [x10, 25]!
- __ ldrsw(r4, Address(__ pre(r22, -92))); // ldrsw x4, [x22, -92]!
- __ ldrd(v11, Address(__ pre(r23, 8))); // ldr d11, [x23, 8]!
- __ ldrs(v25, Address(__ pre(r19, 54))); // ldr s25, [x19, 54]!
- __ strd(v1, Address(__ pre(r7, -174))); // str d1, [x7, -174]!
- __ strs(v8, Address(__ pre(r25, 54))); // str s8, [x25, 54]!
-
-// post
+ __ str(r11, Address(__ pre(r0, 8))); // str x11, [x0, 8]!
+ __ strw(r3, Address(__ pre(r0, 29))); // str w3, [x0, 29]!
+ __ strb(r11, Address(__ pre(r14, 9))); // strb w11, [x14, 9]!
+ __ strh(r29, Address(__ pre(r24, -3))); // strh w29, [x24, -3]!
+ __ ldr(r13, Address(__ pre(r17, -144))); // ldr x13, [x17, -144]!
+ __ ldrw(r12, Address(__ pre(r22, -6))); // ldr w12, [x22, -6]!
+ __ ldrb(r13, Address(__ pre(r12, -10))); // ldrb w13, [x12, -10]!
+ __ ldrh(r0, Address(__ pre(r21, -21))); // ldrh w0, [x21, -21]!
+ __ ldrsb(r23, Address(__ pre(r7, 4))); // ldrsb x23, [x7, 4]!
+ __ ldrsh(r3, Address(__ pre(r7, -53))); // ldrsh x3, [x7, -53]!
+ __ ldrshw(r28, Address(__ pre(r5, -7))); // ldrsh w28, [x5, -7]!
+ __ ldrsw(r24, Address(__ pre(r9, -18))); // ldrsw x24, [x9, -18]!
+ __ ldrd(v14, Address(__ pre(r11, 12))); // ldr d14, [x11, 12]!
+ __ ldrs(v19, Address(__ pre(r12, -67))); // ldr s19, [x12, -67]!
+ __ strd(v20, Address(__ pre(r0, -253))); // str d20, [x0, -253]!
+ __ strs(v8, Address(__ pre(r0, 64))); // str s8, [x0, 64]!
+
+// post
// LoadStoreOp
- __ str(r5, Address(__ post(r11, 37))); // str x5, [x11], 37
- __ strw(r24, Address(__ post(r15, 19))); // str w24, [x15], 19
- __ strb(r15, Address(__ post(r26, -1))); // strb w15, [x26], -1
- __ strh(r18, Address(__ post(r18, -6))); // strh w18, [x18], -6
- __ ldr(r7, Address(__ post(r2, -230))); // ldr x7, [x2], -230
- __ ldrw(r27, Address(__ post(r11, -27))); // ldr w27, [x11], -27
- __ ldrb(r18, Address(__ post(r3, -25))); // ldrb w18, [x3], -25
- __ ldrh(r10, Address(__ post(r24, -32))); // ldrh w10, [x24], -32
- __ ldrsb(r22, Address(__ post(r10, 4))); // ldrsb x22, [x10], 4
- __ ldrsh(r17, Address(__ post(r12, 25))); // ldrsh x17, [x12], 25
- __ ldrshw(r8, Address(__ post(r7, -62))); // ldrsh w8, [x7], -62
- __ ldrsw(r23, Address(__ post(r22, -51))); // ldrsw x23, [x22], -51
- __ ldrd(v24, Address(__ post(r25, 48))); // ldr d24, [x25], 48
- __ ldrs(v21, Address(__ post(r12, -10))); // ldr s21, [x12], -10
- __ strd(v18, Address(__ post(r13, -222))); // str d18, [x13], -222
- __ strs(v16, Address(__ post(r1, -41))); // str s16, [x1], -41
-
-// base_plus_reg
+ __ str(r4, Address(__ post(r28, -94))); // str x4, [x28], -94
+ __ strw(r12, Address(__ post(r7, -54))); // str w12, [x7], -54
+ __ strb(r27, Address(__ post(r10, -24))); // strb w27, [x10], -24
+ __ strh(r6, Address(__ post(r8, 27))); // strh w6, [x8], 27
+ __ ldr(r14, Address(__ post(r10, -202))); // ldr x14, [x10], -202
+ __ ldrw(r16, Address(__ post(r5, -41))); // ldr w16, [x5], -41
+ __ ldrb(r2, Address(__ post(r14, 9))); // ldrb w2, [x14], 9
+ __ ldrh(r28, Address(__ post(r13, -20))); // ldrh w28, [x13], -20
+ __ ldrsb(r9, Address(__ post(r13, -31))); // ldrsb x9, [x13], -31
+ __ ldrsh(r3, Address(__ post(r24, -36))); // ldrsh x3, [x24], -36
+ __ ldrshw(r20, Address(__ post(r3, 6))); // ldrsh w20, [x3], 6
+ __ ldrsw(r7, Address(__ post(r19, -1))); // ldrsw x7, [x19], -1
+ __ ldrd(v30, Address(__ post(r8, -130))); // ldr d30, [x8], -130
+ __ ldrs(v25, Address(__ post(r15, 21))); // ldr s25, [x15], 21
+ __ strd(v14, Address(__ post(r23, 90))); // str d14, [x23], 90
+ __ strs(v8, Address(__ post(r0, -33))); // str s8, [x0], -33
+
+// base_plus_reg
// LoadStoreOp
- __ str(r2, Address(r22, r15, Address::sxtw(0))); // str x2, [x22, w15, sxtw #0]
- __ strw(r2, Address(r16, r29, Address::lsl(0))); // str w2, [x16, x29, lsl #0]
- __ strb(r20, Address(r18, r14, Address::uxtw(0))); // strb w20, [x18, w14, uxtw #0]
- __ strh(r6, Address(r19, r20, Address::sxtx(1))); // strh w6, [x19, x20, sxtx #1]
- __ ldr(r14, Address(r29, r14, Address::sxtw(0))); // ldr x14, [x29, w14, sxtw #0]
- __ ldrw(r16, Address(r20, r12, Address::sxtw(2))); // ldr w16, [x20, w12, sxtw #2]
- __ ldrb(r9, Address(r12, r0, Address::sxtw(0))); // ldrb w9, [x12, w0, sxtw #0]
- __ ldrh(r12, Address(r17, r3, Address::lsl(1))); // ldrh w12, [x17, x3, lsl #1]
- __ ldrsb(r2, Address(r17, r3, Address::sxtx(0))); // ldrsb x2, [x17, x3, sxtx #0]
- __ ldrsh(r7, Address(r1, r17, Address::uxtw(1))); // ldrsh x7, [x1, w17, uxtw #1]
- __ ldrshw(r25, Address(r15, r18, Address::sxtw(1))); // ldrsh w25, [x15, w18, sxtw #1]
- __ ldrsw(r23, Address(r21, r12, Address::lsl(0))); // ldrsw x23, [x21, x12, lsl #0]
- __ ldrd(v5, Address(r13, r8, Address::lsl(3))); // ldr d5, [x13, x8, lsl #3]
- __ ldrs(v3, Address(r10, r22, Address::lsl(2))); // ldr s3, [x10, x22, lsl #2]
- __ strd(v14, Address(r2, r27, Address::sxtw(0))); // str d14, [x2, w27, sxtw #0]
- __ strs(v20, Address(r6, r25, Address::lsl(0))); // str s20, [x6, x25, lsl #0]
-
-// base_plus_scaled_offset
+ __ str(r10, Address(r18, r21, Address::sxtw(3))); // str x10, [x18, w21, sxtw #3]
+ __ strw(r4, Address(r13, r22, Address::sxtw(2))); // str w4, [x13, w22, sxtw #2]
+ __ strb(r13, Address(r0, r19, Address::uxtw(0))); // strb w13, [x0, w19, uxtw #0]
+ __ strh(r12, Address(r27, r6, Address::sxtw(0))); // strh w12, [x27, w6, sxtw #0]
+ __ ldr(r0, Address(r8, r16, Address::lsl(0))); // ldr x0, [x8, x16, lsl #0]
+ __ ldrw(r0, Address(r4, r26, Address::sxtx(0))); // ldr w0, [x4, x26, sxtx #0]
+ __ ldrb(r14, Address(r25, r5, Address::sxtw(0))); // ldrb w14, [x25, w5, sxtw #0]
+ __ ldrh(r9, Address(r4, r18, Address::uxtw(0))); // ldrh w9, [x4, w18, uxtw #0]
+ __ ldrsb(r27, Address(r4, r7, Address::lsl(0))); // ldrsb x27, [x4, x7, lsl #0]
+ __ ldrsh(r15, Address(r17, r30, Address::sxtw(0))); // ldrsh x15, [x17, w30, sxtw #0]
+ __ ldrshw(r16, Address(r0, r22, Address::sxtw(0))); // ldrsh w16, [x0, w22, sxtw #0]
+ __ ldrsw(r22, Address(r10, r30, Address::sxtx(2))); // ldrsw x22, [x10, x30, sxtx #2]
+ __ ldrd(v29, Address(r21, r10, Address::sxtx(3))); // ldr d29, [x21, x10, sxtx #3]
+ __ ldrs(v3, Address(r11, r19, Address::uxtw(0))); // ldr s3, [x11, w19, uxtw #0]
+ __ strd(v13, Address(r28, r29, Address::uxtw(3))); // str d13, [x28, w29, uxtw #3]
+ __ strs(v23, Address(r29, r5, Address::sxtx(2))); // str s23, [x29, x5, sxtx #2]
+
+// base_plus_scaled_offset
// LoadStoreOp
- __ str(r30, Address(r7, 16256)); // str x30, [x7, 16256]
- __ strw(r15, Address(r8, 7588)); // str w15, [x8, 7588]
- __ strb(r11, Address(r0, 1866)); // strb w11, [x0, 1866]
- __ strh(r3, Address(r17, 3734)); // strh w3, [x17, 3734]
- __ ldr(r2, Address(r7, 14224)); // ldr x2, [x7, 14224]
- __ ldrw(r5, Address(r9, 7396)); // ldr w5, [x9, 7396]
- __ ldrb(r28, Address(r9, 1721)); // ldrb w28, [x9, 1721]
- __ ldrh(r2, Address(r20, 3656)); // ldrh w2, [x20, 3656]
- __ ldrsb(r22, Address(r14, 1887)); // ldrsb x22, [x14, 1887]
- __ ldrsh(r8, Address(r0, 4080)); // ldrsh x8, [x0, 4080]
- __ ldrshw(r0, Address(r30, 3916)); // ldrsh w0, [x30, 3916]
- __ ldrsw(r24, Address(r19, 6828)); // ldrsw x24, [x19, 6828]
- __ ldrd(v24, Address(r12, 13032)); // ldr d24, [x12, 13032]
- __ ldrs(v8, Address(r8, 7452)); // ldr s8, [x8, 7452]
- __ strd(v10, Address(r15, 15992)); // str d10, [x15, 15992]
- __ strs(v26, Address(r19, 6688)); // str s26, [x19, 6688]
-
-// pcrel
+ __ str(r5, Address(r8, 12600)); // str x5, [x8, 12600]
+ __ strw(r29, Address(r24, 7880)); // str w29, [x24, 7880]
+ __ strb(r19, Address(r17, 1566)); // strb w19, [x17, 1566]
+ __ strh(r13, Address(r19, 3984)); // strh w13, [x19, 3984]
+ __ ldr(r19, Address(r23, 13632)); // ldr x19, [x23, 13632]
+ __ ldrw(r23, Address(r29, 6264)); // ldr w23, [x29, 6264]
+ __ ldrb(r22, Address(r11, 2012)); // ldrb w22, [x11, 2012]
+ __ ldrh(r3, Address(r10, 3784)); // ldrh w3, [x10, 3784]
+ __ ldrsb(r8, Address(r16, 1951)); // ldrsb x8, [x16, 1951]
+ __ ldrsh(r23, Address(r20, 3346)); // ldrsh x23, [x20, 3346]
+ __ ldrshw(r2, Address(r1, 3994)); // ldrsh w2, [x1, 3994]
+ __ ldrsw(r4, Address(r17, 7204)); // ldrsw x4, [x17, 7204]
+ __ ldrd(v20, Address(r27, 14400)); // ldr d20, [x27, 14400]
+ __ ldrs(v25, Address(r14, 8096)); // ldr s25, [x14, 8096]
+ __ strd(v26, Address(r10, 15024)); // str d26, [x10, 15024]
+ __ strs(v9, Address(r3, 6936)); // str s9, [x3, 6936]
+
+// pcrel
// LoadStoreOp
- __ ldr(r10, forth); // ldr x10, forth
- __ ldrw(r3, __ pc()); // ldr w3, .
+ __ ldr(r27, forth); // ldr x27, forth
+ __ ldrw(r11, __ pc()); // ldr w11, .
// LoadStoreOp
- __ prfm(Address(r23, 9)); // prfm PLDL1KEEP, [x23, 9]
+ __ prfm(Address(r3, -187)); // prfm PLDL1KEEP, [x3, -187]
// LoadStoreOp
- __ prfm(back); // prfm PLDL1KEEP, back
+ __ prfm(__ pc()); // prfm PLDL1KEEP, .
// LoadStoreOp
- __ prfm(Address(r3, r8, Address::uxtw(0))); // prfm PLDL1KEEP, [x3, w8, uxtw #0]
+ __ prfm(Address(r29, r14, Address::lsl(0))); // prfm PLDL1KEEP, [x29, x14, lsl #0]
// LoadStoreOp
- __ prfm(Address(r11, 15080)); // prfm PLDL1KEEP, [x11, 15080]
+ __ prfm(Address(r4, 13312)); // prfm PLDL1KEEP, [x4, 13312]
// AddSubCarryOp
- __ adcw(r13, r9, r28); // adc w13, w9, w28
- __ adcsw(r27, r19, r28); // adcs w27, w19, w28
- __ sbcw(r19, r18, r6); // sbc w19, w18, w6
- __ sbcsw(r14, r20, r3); // sbcs w14, w20, w3
- __ adc(r16, r14, r8); // adc x16, x14, x8
- __ adcs(r0, r29, r8); // adcs x0, x29, x8
- __ sbc(r8, r24, r20); // sbc x8, x24, x20
- __ sbcs(r12, r28, r0); // sbcs x12, x28, x0
+ __ adcw(r21, r1, r7); // adc w21, w1, w7
+ __ adcsw(r8, r5, r7); // adcs w8, w5, w7
+ __ sbcw(r7, r27, r14); // sbc w7, w27, w14
+ __ sbcsw(r27, r4, r17); // sbcs w27, w4, w17
+ __ adc(r0, r28, r0); // adc x0, x28, x0
+ __ adcs(r12, r24, r30); // adcs x12, x24, x30
+ __ sbc(r0, r25, r15); // sbc x0, x25, x15
+ __ sbcs(r1, r24, r3); // sbcs x1, x24, x3
// AddSubExtendedOp
- __ addw(r23, r6, r16, ext::uxtb, 4); // add w23, w6, w16, uxtb #4
- __ addsw(r25, r25, r23, ext::sxth, 2); // adds w25, w25, w23, sxth #2
- __ sub(r26, r22, r4, ext::uxtx, 1); // sub x26, x22, x4, uxtx #1
- __ subsw(r17, r29, r19, ext::sxtx, 3); // subs w17, w29, w19, sxtx #3
- __ add(r11, r30, r21, ext::uxtb, 3); // add x11, x30, x21, uxtb #3
- __ adds(r16, r19, r0, ext::sxtb, 2); // adds x16, x19, x0, sxtb #2
- __ sub(r11, r9, r25, ext::sxtx, 1); // sub x11, x9, x25, sxtx #1
- __ subs(r17, r20, r12, ext::sxtb, 4); // subs x17, x20, x12, sxtb #4
+ __ addw(r18, r24, r20, ext::uxtb, 2); // add w18, w24, w20, uxtb #2
+ __ addsw(r13, r28, r10, ext::uxth, 1); // adds w13, w28, w10, uxth #1
+ __ sub(r15, r16, r2, ext::sxth, 2); // sub x15, x16, x2, sxth #2
+ __ subsw(r29, r13, r13, ext::uxth, 2); // subs w29, w13, w13, uxth #2
+ __ add(r12, r20, r12, ext::sxtw, 3); // add x12, x20, x12, sxtw #3
+ __ adds(r30, r27, r11, ext::sxtb, 1); // adds x30, x27, x11, sxtb #1
+ __ sub(r14, r7, r1, ext::sxtw, 2); // sub x14, x7, x1, sxtw #2
+ __ subs(r29, r3, r27, ext::sxth, 1); // subs x29, x3, x27, sxth #1
// ConditionalCompareOp
- __ ccmnw(r13, r11, 3u, Assembler::LE); // ccmn w13, w11, #3, LE
- __ ccmpw(r13, r12, 2u, Assembler::HI); // ccmp w13, w12, #2, HI
- __ ccmn(r3, r2, 12u, Assembler::NE); // ccmn x3, x2, #12, NE
- __ ccmp(r7, r21, 3u, Assembler::VS); // ccmp x7, x21, #3, VS
+ __ ccmnw(r0, r13, 14u, Assembler::MI); // ccmn w0, w13, #14, MI
+ __ ccmpw(r22, r18, 6u, Assembler::CC); // ccmp w22, w18, #6, CC
+ __ ccmn(r18, r30, 14u, Assembler::VS); // ccmn x18, x30, #14, VS
+ __ ccmp(r10, r19, 12u, Assembler::HI); // ccmp x10, x19, #12, HI
// ConditionalCompareImmedOp
- __ ccmnw(r2, 14, 4, Assembler::CC); // ccmn w2, #14, #4, CC
- __ ccmpw(r17, 17, 6, Assembler::PL); // ccmp w17, #17, #6, PL
- __ ccmn(r10, 12, 0, Assembler::CS); // ccmn x10, #12, #0, CS
- __ ccmp(r21, 18, 14, Assembler::GE); // ccmp x21, #18, #14, GE
+ __ ccmnw(r6, 18, 2, Assembler::LE); // ccmn w6, #18, #2, LE
+ __ ccmpw(r9, 13, 4, Assembler::HI); // ccmp w9, #13, #4, HI
+ __ ccmn(r21, 11, 11, Assembler::LO); // ccmn x21, #11, #11, LO
+ __ ccmp(r4, 13, 2, Assembler::VC); // ccmp x4, #13, #2, VC
// ConditionalSelectOp
- __ cselw(r21, r13, r12, Assembler::GT); // csel w21, w13, w12, GT
- __ csincw(r10, r27, r15, Assembler::LS); // csinc w10, w27, w15, LS
- __ csinvw(r0, r13, r9, Assembler::HI); // csinv w0, w13, w9, HI
- __ csnegw(r18, r4, r26, Assembler::VS); // csneg w18, w4, w26, VS
- __ csel(r12, r29, r7, Assembler::LS); // csel x12, x29, x7, LS
- __ csinc(r6, r7, r20, Assembler::VC); // csinc x6, x7, x20, VC
- __ csinv(r22, r21, r3, Assembler::LE); // csinv x22, x21, x3, LE
- __ csneg(r19, r12, r27, Assembler::LS); // csneg x19, x12, x27, LS
+ __ cselw(r12, r2, r22, Assembler::HI); // csel w12, w2, w22, HI
+ __ csincw(r24, r16, r17, Assembler::HS); // csinc w24, w16, w17, HS
+ __ csinvw(r6, r7, r16, Assembler::LT); // csinv w6, w7, w16, LT
+ __ csnegw(r11, r27, r22, Assembler::LS); // csneg w11, w27, w22, LS
+ __ csel(r10, r3, r29, Assembler::LT); // csel x10, x3, x29, LT
+ __ csinc(r12, r26, r27, Assembler::CC); // csinc x12, x26, x27, CC
+ __ csinv(r15, r10, r21, Assembler::GT); // csinv x15, x10, x21, GT
+ __ csneg(r30, r23, r9, Assembler::GT); // csneg x30, x23, x9, GT
// TwoRegOp
- __ rbitw(r0, r16); // rbit w0, w16
- __ rev16w(r17, r23); // rev16 w17, w23
- __ revw(r17, r14); // rev w17, w14
- __ clzw(r24, r30); // clz w24, w30
- __ clsw(r24, r22); // cls w24, w22
- __ rbit(r3, r17); // rbit x3, x17
- __ rev16(r12, r13); // rev16 x12, x13
- __ rev32(r9, r22); // rev32 x9, x22
- __ rev(r0, r0); // rev x0, x0
- __ clz(r5, r16); // clz x5, x16
- __ cls(r25, r22); // cls x25, x22
+ __ rbitw(r30, r10); // rbit w30, w10
+ __ rev16w(r29, r15); // rev16 w29, w15
+ __ revw(r29, r30); // rev w29, w30
+ __ clzw(r25, r21); // clz w25, w21
+ __ clsw(r4, r0); // cls w4, w0
+ __ rbit(r18, r21); // rbit x18, x21
+ __ rev16(r29, r16); // rev16 x29, x16
+ __ rev32(r21, r20); // rev32 x21, x20
+ __ rev(r6, r19); // rev x6, x19
+ __ clz(r30, r3); // clz x30, x3
+ __ cls(r21, r19); // cls x21, x19
// ThreeRegOp
- __ udivw(r29, r4, r0); // udiv w29, w4, w0
- __ sdivw(r0, r29, r29); // sdiv w0, w29, w29
- __ lslvw(r5, r17, r21); // lslv w5, w17, w21
- __ lsrvw(r9, r9, r18); // lsrv w9, w9, w18
- __ asrvw(r1, r27, r8); // asrv w1, w27, w8
- __ rorvw(r18, r20, r13); // rorv w18, w20, w13
- __ udiv(r8, r25, r12); // udiv x8, x25, x12
- __ sdiv(r7, r5, r28); // sdiv x7, x5, x28
- __ lslv(r5, r17, r27); // lslv x5, x17, x27
- __ lsrv(r23, r26, r20); // lsrv x23, x26, x20
- __ asrv(r28, r8, r28); // asrv x28, x8, x28
- __ rorv(r3, r29, r4); // rorv x3, x29, x4
+ __ udivw(r11, r24, r0); // udiv w11, w24, w0
+ __ sdivw(r27, r25, r14); // sdiv w27, w25, w14
+ __ lslvw(r3, r14, r18); // lslv w3, w14, w18
+ __ lsrvw(r7, r15, r24); // lsrv w7, w15, w24
+ __ asrvw(r28, r17, r25); // asrv w28, w17, w25
+ __ rorvw(r2, r26, r28); // rorv w2, w26, w28
+ __ udiv(r5, r25, r26); // udiv x5, x25, x26
+ __ sdiv(r27, r16, r18); // sdiv x27, x16, x18
+ __ lslv(r6, r21, r12); // lslv x6, x21, x12
+ __ lsrv(r0, r4, r12); // lsrv x0, x4, x12
+ __ asrv(r27, r17, r28); // asrv x27, x17, x28
+ __ rorv(r28, r2, r18); // rorv x28, x2, x18
// FourRegMulOp
- __ maddw(r17, r14, r26, r21); // madd w17, w14, w26, w21
- __ msubw(r1, r30, r11, r11); // msub w1, w30, w11, w11
- __ madd(r1, r17, r6, r28); // madd x1, x17, x6, x28
- __ msub(r30, r6, r30, r8); // msub x30, x6, x30, x8
- __ smaddl(r21, r6, r14, r8); // smaddl x21, w6, w14, x8
- __ smsubl(r10, r10, r24, r19); // smsubl x10, w10, w24, x19
- __ umaddl(r20, r18, r14, r24); // umaddl x20, w18, w14, x24
- __ umsubl(r18, r2, r5, r5); // umsubl x18, w2, w5, x5
+ __ maddw(r10, r15, r14, r14); // madd w10, w15, w14, w14
+ __ msubw(r3, r25, r15, r19); // msub w3, w25, w15, w19
+ __ madd(r14, r5, r16, r4); // madd x14, x5, x16, x4
+ __ msub(r26, r25, r4, r2); // msub x26, x25, x4, x2
+ __ smaddl(r2, r12, r29, r17); // smaddl x2, w12, w29, x17
+ __ smsubl(r8, r7, r3, r4); // smsubl x8, w7, w3, x4
+ __ umaddl(r25, r4, r26, r25); // umaddl x25, w4, w26, x25
+ __ umsubl(r4, r17, r0, r26); // umsubl x4, w17, w0, x26
// ThreeRegFloatOp
- __ fmuls(v8, v18, v13); // fmul s8, s18, s13
- __ fdivs(v2, v14, v28); // fdiv s2, s14, s28
- __ fadds(v15, v12, v28); // fadd s15, s12, s28
- __ fsubs(v0, v12, v1); // fsub s0, s12, s1
- __ fmuls(v15, v29, v4); // fmul s15, s29, s4
- __ fmuld(v12, v1, v23); // fmul d12, d1, d23
- __ fdivd(v27, v8, v18); // fdiv d27, d8, d18
- __ faddd(v23, v20, v11); // fadd d23, d20, d11
- __ fsubd(v8, v12, v18); // fsub d8, d12, d18
- __ fmuld(v26, v24, v23); // fmul d26, d24, d23
+ __ fmuls(v17, v23, v15); // fmul s17, s23, s15
+ __ fdivs(v21, v28, v17); // fdiv s21, s28, s17
+ __ fadds(v27, v10, v3); // fadd s27, s10, s3
+ __ fsubs(v0, v7, v25); // fsub s0, s7, s25
+ __ fmuls(v9, v6, v15); // fmul s9, s6, s15
+ __ fmuld(v29, v15, v10); // fmul d29, d15, d10
+ __ fdivd(v2, v17, v7); // fdiv d2, d17, d7
+ __ faddd(v11, v11, v23); // fadd d11, d11, d23
+ __ fsubd(v7, v29, v23); // fsub d7, d29, d23
+ __ fmuld(v14, v27, v11); // fmul d14, d27, d11
// FourRegFloatOp
- __ fmadds(v21, v23, v13, v25); // fmadd s21, s23, s13, s25
- __ fmsubs(v22, v10, v1, v14); // fmsub s22, s10, s1, s14
- __ fnmadds(v14, v20, v2, v30); // fnmadd s14, s20, s2, s30
- __ fnmadds(v7, v29, v22, v22); // fnmadd s7, s29, s22, s22
- __ fmaddd(v13, v5, v15, v5); // fmadd d13, d5, d15, d5
- __ fmsubd(v14, v12, v5, v10); // fmsub d14, d12, d5, d10
- __ fnmaddd(v10, v19, v0, v1); // fnmadd d10, d19, d0, d1
- __ fnmaddd(v20, v2, v2, v0); // fnmadd d20, d2, d2, d0
+ __ fmadds(v11, v4, v24, v12); // fmadd s11, s4, s24, s12
+ __ fmsubs(v15, v14, v20, v11); // fmsub s15, s14, s20, s11
+ __ fnmadds(v28, v13, v11, v12); // fnmadd s28, s13, s11, s12
+ __ fnmadds(v23, v30, v26, v14); // fnmadd s23, s30, s26, s14
+ __ fmaddd(v9, v13, v10, v7); // fmadd d9, d13, d10, d7
+ __ fmsubd(v5, v29, v15, v3); // fmsub d5, d29, d15, d3
+ __ fnmaddd(v11, v12, v15, v30); // fnmadd d11, d12, d15, d30
+ __ fnmaddd(v30, v17, v19, v20); // fnmadd d30, d17, d19, d20
// TwoRegFloatOp
- __ fmovs(v25, v9); // fmov s25, s9
- __ fabss(v20, v4); // fabs s20, s4
- __ fnegs(v3, v27); // fneg s3, s27
- __ fsqrts(v1, v2); // fsqrt s1, s2
- __ fcvts(v30, v0); // fcvt d30, s0
- __ fmovd(v12, v4); // fmov d12, d4
- __ fabsd(v1, v27); // fabs d1, d27
- __ fnegd(v8, v22); // fneg d8, d22
- __ fsqrtd(v11, v11); // fsqrt d11, d11
- __ fcvtd(v22, v28); // fcvt s22, d28
+ __ fmovs(v27, v7); // fmov s27, s7
+ __ fabss(v9, v21); // fabs s9, s21
+ __ fnegs(v2, v9); // fneg s2, s9
+ __ fsqrts(v27, v7); // fsqrt s27, s7
+ __ fcvts(v29, v30); // fcvt d29, s30
+ __ fmovd(v17, v1); // fmov d17, d1
+ __ fabsd(v2, v6); // fabs d2, d6
+ __ fnegd(v10, v3); // fneg d10, d3
+ __ fsqrtd(v24, v11); // fsqrt d24, d11
+ __ fcvtd(v7, v1); // fcvt s7, d1
// FloatConvertOp
- __ fcvtzsw(r28, v22); // fcvtzs w28, s22
- __ fcvtzs(r20, v27); // fcvtzs x20, s27
- __ fcvtzdw(r14, v0); // fcvtzs w14, d0
- __ fcvtzd(r26, v11); // fcvtzs x26, d11
- __ scvtfws(v28, r22); // scvtf s28, w22
- __ scvtfs(v16, r10); // scvtf s16, x10
- __ scvtfwd(v8, r21); // scvtf d8, w21
- __ scvtfd(v21, r28); // scvtf d21, x28
- __ fmovs(r24, v24); // fmov w24, s24
- __ fmovd(r8, v19); // fmov x8, d19
- __ fmovs(v8, r12); // fmov s8, w12
- __ fmovd(v6, r7); // fmov d6, x7
+ __ fcvtzsw(r11, v0); // fcvtzs w11, s0
+ __ fcvtzs(r3, v18); // fcvtzs x3, s18
+ __ fcvtzdw(r28, v6); // fcvtzs w28, d6
+ __ fcvtzd(r22, v6); // fcvtzs x22, d6
+ __ scvtfws(v0, r27); // scvtf s0, w27
+ __ scvtfs(v26, r2); // scvtf s26, x2
+ __ scvtfwd(v5, r7); // scvtf d5, w7
+ __ scvtfd(v28, r11); // scvtf d28, x11
+ __ fmovs(r25, v13); // fmov w25, s13
+ __ fmovd(r11, v23); // fmov x11, d23
+ __ fmovs(v19, r8); // fmov s19, w8
+ __ fmovd(v18, r21); // fmov d18, x21
// TwoRegFloatOp
- __ fcmps(v30, v16); // fcmp s30, s16
- __ fcmpd(v25, v11); // fcmp d25, d11
- __ fcmps(v11, 0.0); // fcmp s11, #0.0
- __ fcmpd(v11, 0.0); // fcmp d11, #0.0
+ __ fcmps(v25, v20); // fcmp s25, s20
+ __ fcmpd(v19, v18); // fcmp d19, d18
+ __ fcmps(v2, 0.0); // fcmp s2, #0.0
+ __ fcmpd(v29, 0.0); // fcmp d29, #0.0
// LoadStorePairOp
- __ stpw(r29, r12, Address(r17, 128)); // stp w29, w12, [x17, #128]
- __ ldpw(r22, r18, Address(r14, -96)); // ldp w22, w18, [x14, #-96]
- __ ldpsw(r11, r16, Address(r1, 64)); // ldpsw x11, x16, [x1, #64]
- __ stp(r0, r11, Address(r26, 112)); // stp x0, x11, [x26, #112]
- __ ldp(r7, r1, Address(r26, 16)); // ldp x7, x1, [x26, #16]
+ __ stpw(r8, r21, Address(r19, 16)); // stp w8, w21, [x19, #16]
+ __ ldpw(r6, r15, Address(r20, 0)); // ldp w6, w15, [x20, #0]
+ __ ldpsw(r27, r14, Address(r3, -208)); // ldpsw x27, x14, [x3, #-208]
+ __ stp(r10, r12, Address(r11, -80)); // stp x10, x12, [x11, #-80]
+ __ ldp(r7, r14, Address(r7, -32)); // ldp x7, x14, [x7, #-32]
// LoadStorePairOp
- __ stpw(r10, r7, Address(__ pre(r24, 0))); // stp w10, w7, [x24, #0]!
- __ ldpw(r7, r28, Address(__ pre(r24, -256))); // ldp w7, w28, [x24, #-256]!
- __ ldpsw(r25, r28, Address(__ pre(r21, -240))); // ldpsw x25, x28, [x21, #-240]!
- __ stp(r20, r18, Address(__ pre(r14, -16))); // stp x20, x18, [x14, #-16]!
- __ ldp(r8, r10, Address(__ pre(r13, 80))); // ldp x8, x10, [x13, #80]!
+ __ stpw(r0, r22, Address(__ pre(r12, 112))); // stp w0, w22, [x12, #112]!
+ __ ldpw(r14, r7, Address(__ pre(r8, 48))); // ldp w14, w7, [x8, #48]!
+ __ ldpsw(r16, r2, Address(__ pre(r9, 0))); // ldpsw x16, x2, [x9, #0]!
+ __ stp(r20, r29, Address(__ pre(r1, -64))); // stp x20, x29, [x1, #-64]!
+ __ ldp(r21, r12, Address(__ pre(r5, 80))); // ldp x21, x12, [x5, #80]!
// LoadStorePairOp
- __ stpw(r26, r24, Address(__ post(r2, -128))); // stp w26, w24, [x2], #-128
- __ ldpw(r2, r25, Address(__ post(r21, -192))); // ldp w2, w25, [x21], #-192
- __ ldpsw(r17, r2, Address(__ post(r21, -144))); // ldpsw x17, x2, [x21], #-144
- __ stp(r12, r10, Address(__ post(r11, 96))); // stp x12, x10, [x11], #96
- __ ldp(r24, r6, Address(__ post(r17, -32))); // ldp x24, x6, [x17], #-32
+ __ stpw(r24, r24, Address(__ post(r27, -112))); // stp w24, w24, [x27], #-112
+ __ ldpw(r28, r22, Address(__ post(r18, 16))); // ldp w28, w22, [x18], #16
+ __ ldpsw(r17, r6, Address(__ post(r13, -96))); // ldpsw x17, x6, [x13], #-96
+ __ stp(r28, r26, Address(__ post(r5, -160))); // stp x28, x26, [x5], #-160
+ __ ldp(r6, r21, Address(__ post(r26, -240))); // ldp x6, x21, [x26], #-240
// LoadStorePairOp
- __ stnpw(r3, r30, Address(r14, -224)); // stnp w3, w30, [x14, #-224]
- __ ldnpw(r15, r20, Address(r26, -144)); // ldnp w15, w20, [x26, #-144]
- __ stnp(r22, r25, Address(r12, -128)); // stnp x22, x25, [x12, #-128]
- __ ldnp(r27, r22, Address(r17, -176)); // ldnp x27, x22, [x17, #-176]
+ __ stnpw(r13, r20, Address(r30, 32)); // stnp w13, w20, [x30, #32]
+ __ ldnpw(r17, r11, Address(r5, 96)); // ldnp w17, w11, [x5, #96]
+ __ stnp(r13, r20, Address(r26, -96)); // stnp x13, x20, [x26, #-96]
+ __ ldnp(r29, r12, Address(r23, -80)); // ldnp x29, x12, [x23, #-80]
+
+// SpecialCases
+ __ sve_cpy(z0, __ S, p0, v1); // mov z0.s, p0/m, s1
+ __ sve_inc(r0, __ S); // incw x0
+ __ sve_dec(r1, __ H); // dech x1
+ __ sve_lsl(z0, __ B, z1, 7); // lsl z0.b, z1.b, #7
+ __ sve_lsl(z21, __ H, z1, 15); // lsl z21.h, z1.h, #15
+ __ sve_lsl(z0, __ S, z1, 31); // lsl z0.s, z1.s, #31
+ __ sve_lsl(z0, __ D, z1, 63); // lsl z0.d, z1.d, #63
+ __ sve_lsr(z0, __ B, z1, 7); // lsr z0.b, z1.b, #7
+ __ sve_asr(z0, __ H, z11, 15); // asr z0.h, z11.h, #15
+ __ sve_lsr(z30, __ S, z1, 31); // lsr z30.s, z1.s, #31
+ __ sve_asr(z0, __ D, z1, 63); // asr z0.d, z1.d, #63
+ __ sve_addvl(sp, r0, 31); // addvl sp, x0, #31
+ __ sve_addpl(r1, sp, -32); // addpl x1, sp, -32
+ __ sve_cntp(r8, __ B, p0, p1); // cntp x8, p0, p1.b
+ __ sve_dup(z0, __ B, 127); // dup z0.b, 127
+ __ sve_dup(z1, __ H, -128); // dup z1.h, -128
+ __ sve_dup(z2, __ S, 32512); // dup z2.s, 32512
+ __ sve_dup(z7, __ D, -32768); // dup z7.d, -32768
+ __ sve_ld1b(z0, __ B, p0, Address(sp)); // ld1b {z0.b}, p0/z, [sp]
+ __ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL]
+ __ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL]
+ __ sve_ld1b(z30, __ B, p3, Address(sp, r8)); // ld1b {z30.b}, p3/z, [sp, x8]
+ __ sve_ld1w(z0, __ S, p4, Address(sp, r28)); // ld1w {z0.s}, p4/z, [sp, x28, LSL #2]
+ __ sve_ld1d(z11, __ D, p5, Address(r0, r1)); // ld1d {z11.d}, p5/z, [x0, x1, LSL #3]
+ __ sve_st1b(z22, __ B, p6, Address(sp)); // st1b {z22.b}, p6, [sp]
+ __ sve_st1b(z31, __ B, p7, Address(sp, -8)); // st1b {z31.b}, p7, [sp, #-8, MUL VL]
+ __ sve_st1w(z0, __ S, p1, Address(r0, 7)); // st1w {z0.s}, p1, [x0, #7, MUL VL]
+ __ sve_st1b(z0, __ B, p2, Address(sp, r1)); // st1b {z0.b}, p2, [sp, x1]
+ __ sve_st1h(z0, __ H, p3, Address(sp, r8)); // st1h {z0.h}, p3, [sp, x8, LSL #1]
+ __ sve_st1d(z0, __ D, p4, Address(r0, r18)); // st1d {z0.d}, p4, [x0, x18, LSL #3]
+ __ sve_ldr(z0, Address(sp)); // ldr z0, [sp]
+ __ sve_ldr(z31, Address(sp, -256)); // ldr z31, [sp, #-256, MUL VL]
+ __ sve_str(z8, Address(r8, 255)); // str z8, [x8, #255, MUL VL]
// FloatImmediateOp
- __ fmovd(v0, 2.0); // fmov d0, #2.0
- __ fmovd(v0, 2.125); // fmov d0, #2.125
- __ fmovd(v0, 4.0); // fmov d0, #4.0
- __ fmovd(v0, 4.25); // fmov d0, #4.25
- __ fmovd(v0, 8.0); // fmov d0, #8.0
- __ fmovd(v0, 8.5); // fmov d0, #8.5
- __ fmovd(v0, 16.0); // fmov d0, #16.0
- __ fmovd(v0, 17.0); // fmov d0, #17.0
- __ fmovd(v0, 0.125); // fmov d0, #0.125
- __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125
- __ fmovd(v0, 0.25); // fmov d0, #0.25
- __ fmovd(v0, 0.265625); // fmov d0, #0.265625
- __ fmovd(v0, 0.5); // fmov d0, #0.5
- __ fmovd(v0, 0.53125); // fmov d0, #0.53125
- __ fmovd(v0, 1.0); // fmov d0, #1.0
- __ fmovd(v0, 1.0625); // fmov d0, #1.0625
- __ fmovd(v0, -2.0); // fmov d0, #-2.0
- __ fmovd(v0, -2.125); // fmov d0, #-2.125
- __ fmovd(v0, -4.0); // fmov d0, #-4.0
- __ fmovd(v0, -4.25); // fmov d0, #-4.25
- __ fmovd(v0, -8.0); // fmov d0, #-8.0
- __ fmovd(v0, -8.5); // fmov d0, #-8.5
- __ fmovd(v0, -16.0); // fmov d0, #-16.0
- __ fmovd(v0, -17.0); // fmov d0, #-17.0
- __ fmovd(v0, -0.125); // fmov d0, #-0.125
- __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125
- __ fmovd(v0, -0.25); // fmov d0, #-0.25
- __ fmovd(v0, -0.265625); // fmov d0, #-0.265625
- __ fmovd(v0, -0.5); // fmov d0, #-0.5
- __ fmovd(v0, -0.53125); // fmov d0, #-0.53125
- __ fmovd(v0, -1.0); // fmov d0, #-1.0
- __ fmovd(v0, -1.0625); // fmov d0, #-1.0625
+ __ fmovd(v0, 2.0); // fmov d0, #2.0
+ __ fmovd(v0, 2.125); // fmov d0, #2.125
+ __ fmovd(v0, 4.0); // fmov d0, #4.0
+ __ fmovd(v0, 4.25); // fmov d0, #4.25
+ __ fmovd(v0, 8.0); // fmov d0, #8.0
+ __ fmovd(v0, 8.5); // fmov d0, #8.5
+ __ fmovd(v0, 16.0); // fmov d0, #16.0
+ __ fmovd(v0, 17.0); // fmov d0, #17.0
+ __ fmovd(v0, 0.125); // fmov d0, #0.125
+ __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125
+ __ fmovd(v0, 0.25); // fmov d0, #0.25
+ __ fmovd(v0, 0.265625); // fmov d0, #0.265625
+ __ fmovd(v0, 0.5); // fmov d0, #0.5
+ __ fmovd(v0, 0.53125); // fmov d0, #0.53125
+ __ fmovd(v0, 1.0); // fmov d0, #1.0
+ __ fmovd(v0, 1.0625); // fmov d0, #1.0625
+ __ fmovd(v0, -2.0); // fmov d0, #-2.0
+ __ fmovd(v0, -2.125); // fmov d0, #-2.125
+ __ fmovd(v0, -4.0); // fmov d0, #-4.0
+ __ fmovd(v0, -4.25); // fmov d0, #-4.25
+ __ fmovd(v0, -8.0); // fmov d0, #-8.0
+ __ fmovd(v0, -8.5); // fmov d0, #-8.5
+ __ fmovd(v0, -16.0); // fmov d0, #-16.0
+ __ fmovd(v0, -17.0); // fmov d0, #-17.0
+ __ fmovd(v0, -0.125); // fmov d0, #-0.125
+ __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125
+ __ fmovd(v0, -0.25); // fmov d0, #-0.25
+ __ fmovd(v0, -0.265625); // fmov d0, #-0.265625
+ __ fmovd(v0, -0.5); // fmov d0, #-0.5
+ __ fmovd(v0, -0.53125); // fmov d0, #-0.53125
+ __ fmovd(v0, -1.0); // fmov d0, #-1.0
+ __ fmovd(v0, -1.0625); // fmov d0, #-1.0625
+
+// SVEVectorOp
+ __ sve_add(z14, __ S, z16, z27); // add z14.s, z16.s, z27.s
+ __ sve_sub(z0, __ S, z6, z26); // sub z0.s, z6.s, z26.s
+ __ sve_fadd(z27, __ S, z12, z6); // fadd z27.s, z12.s, z6.s
+ __ sve_fmul(z30, __ S, z4, z19); // fmul z30.s, z4.s, z19.s
+ __ sve_fsub(z11, __ D, z16, z2); // fsub z11.d, z16.d, z2.d
+ __ sve_abs(z15, __ D, p0, z12); // abs z15.d, p0/m, z12.d
+ __ sve_add(z9, __ B, p5, z23); // add z9.b, p5/m, z9.b, z23.b
+ __ sve_asr(z30, __ S, p0, z26); // asr z30.s, p0/m, z30.s, z26.s
+ __ sve_cnt(z4, __ H, p2, z18); // cnt z4.h, p2/m, z18.h
+ __ sve_lsl(z25, __ S, p1, z11); // lsl z25.s, p1/m, z25.s, z11.s
+ __ sve_lsr(z10, __ B, p6, z8); // lsr z10.b, p6/m, z10.b, z8.b
+ __ sve_mul(z4, __ B, p5, z17); // mul z4.b, p5/m, z4.b, z17.b
+ __ sve_neg(z30, __ S, p3, z9); // neg z30.s, p3/m, z9.s
+ __ sve_not(z0, __ D, p3, z20); // not z0.d, p3/m, z20.d
+ __ sve_smax(z23, __ H, p7, z3); // smax z23.h, p7/m, z23.h, z3.h
+ __ sve_smin(z0, __ H, p2, z11); // smin z0.h, p2/m, z0.h, z11.h
+ __ sve_sub(z11, __ D, p6, z5); // sub z11.d, p6/m, z11.d, z5.d
+ __ sve_fabs(z16, __ S, p2, z17); // fabs z16.s, p2/m, z17.s
+ __ sve_fadd(z15, __ S, p0, z26); // fadd z15.s, p0/m, z15.s, z26.s
+ __ sve_fdiv(z10, __ S, p7, z19); // fdiv z10.s, p7/m, z10.s, z19.s
+ __ sve_fmax(z24, __ D, p0, z17); // fmax z24.d, p0/m, z24.d, z17.d
+ __ sve_fmin(z26, __ D, p4, z15); // fmin z26.d, p4/m, z26.d, z15.d
+ __ sve_fmul(z24, __ D, p2, z17); // fmul z24.d, p2/m, z24.d, z17.d
+ __ sve_fneg(z30, __ S, p5, z29); // fneg z30.s, p5/m, z29.s
+ __ sve_frintm(z18, __ S, p5, z10); // frintm z18.s, p5/m, z10.s
+ __ sve_frintn(z30, __ D, p2, z30); // frintn z30.d, p2/m, z30.d
+ __ sve_frintp(z6, __ S, p6, z30); // frintp z6.s, p6/m, z30.s
+ __ sve_fsqrt(z20, __ D, p6, z2); // fsqrt z20.d, p6/m, z2.d
+ __ sve_fsub(z9, __ S, p5, z29); // fsub z9.s, p5/m, z9.s, z29.s
+ __ sve_fmla(z18, __ D, p2, z3, z22); // fmla z18.d, p2/m, z3.d, z22.d
+ __ sve_fmls(z15, __ D, p2, z13, z12); // fmls z15.d, p2/m, z13.d, z12.d
+ __ sve_fnmla(z12, __ S, p0, z30, z30); // fnmla z12.s, p0/m, z30.s, z30.s
+ __ sve_fnmls(z7, __ D, p3, z21, z0); // fnmls z7.d, p3/m, z21.d, z0.d
+ __ sve_mla(z19, __ H, p2, z26, z20); // mla z19.h, p2/m, z26.h, z20.h
+ __ sve_mls(z16, __ D, p7, z1, z21); // mls z16.d, p7/m, z1.d, z21.d
+ __ sve_and(z21, z4, z18); // and z21.d, z4.d, z18.d
+ __ sve_eor(z12, z18, z7); // eor z12.d, z18.d, z7.d
+ __ sve_orr(z25, z15, z13); // orr z25.d, z15.d, z13.d
+
+// SVEReductionOp
+ __ sve_andv(v11, __ D, p4, z7); // andv d11, p4, z7.d
+ __ sve_orv(v11, __ D, p1, z9); // orv d11, p1, z9.d
+ __ sve_eorv(v28, __ D, p7, z0); // eorv d28, p7, z0.d
+ __ sve_smaxv(v16, __ H, p0, z7); // smaxv h16, p0, z7.h
+ __ sve_sminv(v12, __ B, p3, z29); // sminv b12, p3, z29.b
+ __ sve_fminv(v21, __ S, p6, z11); // fminv s21, p6, z11.s
+ __ sve_fmaxv(v6, __ D, p2, z4); // fmaxv d6, p2, z4.d
+ __ sve_fadda(v7, __ D, p0, z7); // fadda d7, p0, d7, z7.d
+ __ sve_uaddv(v12, __ B, p7, z29); // uaddv d12, p7, z29.b
__ bind(forth);
@@ -633,542 +719,642 @@ aarch64ops.o: file format elf64-littleaarch64
Disassembly of section .text:
0000000000000000 <back>:
- 0: 8b0772d3 add x19, x22, x7, lsl #28
- 4: cb4a3570 sub x16, x11, x10, lsr #13
- 8: ab9c09bb adds x27, x13, x28, asr #2
- c: eb9aa794 subs x20, x28, x26, asr #41
- 10: 0b934e68 add w8, w19, w19, asr #19
- 14: 4b0a3924 sub w4, w9, w10, lsl #14
- 18: 2b1e3568 adds w8, w11, w30, lsl #13
- 1c: 6b132720 subs w0, w25, w19, lsl #9
- 20: 8a154c14 and x20, x0, x21, lsl #19
- 24: aa1445d5 orr x21, x14, x20, lsl #17
- 28: ca01cf99 eor x25, x28, x1, lsl #51
- 2c: ea8b3f6a ands x10, x27, x11, asr #15
- 30: 0a8c5cb9 and w25, w5, w12, asr #23
- 34: 2a4a11d2 orr w18, w14, w10, lsr #4
- 38: 4a855aa4 eor w4, w21, w5, asr #22
- 3c: 6a857415 ands w21, w0, w5, asr #29
- 40: 8aa697da bic x26, x30, x6, asr #37
- 44: aa6d7423 orn x3, x1, x13, lsr #29
- 48: ca29bf80 eon x0, x28, x9, lsl #47
- 4c: ea3cb8bd bics x29, x5, x28, lsl #46
- 50: 0a675249 bic w9, w18, w7, lsr #20
- 54: 2ab961ba orn w26, w13, w25, asr #24
- 58: 4a331899 eon w25, w4, w19, lsl #6
- 5c: 6a646345 bics w5, w26, w4, lsr #24
- 60: 11055267 add w7, w19, #0x154
- 64: 31064408 adds w8, w0, #0x191
- 68: 51028e9d sub w29, w20, #0xa3
- 6c: 710bdee8 subs w8, w23, #0x2f7
- 70: 91082d81 add x1, x12, #0x20b
- 74: b106a962 adds x2, x11, #0x1aa
- 78: d10b33ae sub x14, x29, #0x2cc
- 7c: f10918ab subs x11, x5, #0x246
- 80: 121102d7 and w23, w22, #0x8000
- 84: 3204cd44 orr w4, w10, #0xf0f0f0f0
- 88: 5204cf00 eor w0, w24, #0xf0f0f0f0
- 8c: 72099fb3 ands w19, w29, #0x7f807f80
- 90: 92729545 and x5, x10, #0xfffffffffc000
- 94: b20e37cc orr x12, x30, #0xfffc0000fffc0000
- 98: d27c34be eor x30, x5, #0x3fff0
- 9c: f27e4efa ands x26, x23, #0x3ffffc
- a0: 14000000 b a0 <back+0xa0>
- a4: 17ffffd7 b 0 <back>
- a8: 1400017f b 6a4 <forth>
- ac: 94000000 bl ac <back+0xac>
- b0: 97ffffd4 bl 0 <back>
- b4: 9400017c bl 6a4 <forth>
- b8: 3400000c cbz w12, b8 <back+0xb8>
- bc: 34fffa2c cbz w12, 0 <back>
- c0: 34002f2c cbz w12, 6a4 <forth>
- c4: 35000014 cbnz w20, c4 <back+0xc4>
- c8: 35fff9d4 cbnz w20, 0 <back>
- cc: 35002ed4 cbnz w20, 6a4 <forth>
- d0: b400000c cbz x12, d0 <back+0xd0>
- d4: b4fff96c cbz x12, 0 <back>
- d8: b4002e6c cbz x12, 6a4 <forth>
- dc: b5000018 cbnz x24, dc <back+0xdc>
- e0: b5fff918 cbnz x24, 0 <back>
- e4: b5002e18 cbnz x24, 6a4 <forth>
- e8: 10000006 adr x6, e8 <back+0xe8>
- ec: 10fff8a6 adr x6, 0 <back>
- f0: 10002da6 adr x6, 6a4 <forth>
- f4: 90000015 adrp x21, 0 <back>
- f8: 36080001 tbz w1, #1, f8 <back+0xf8>
- fc: 360ff821 tbz w1, #1, 0 <back>
- 100: 36082d21 tbz w1, #1, 6a4 <forth>
- 104: 37480008 tbnz w8, #9, 104 <back+0x104>
- 108: 374ff7c8 tbnz w8, #9, 0 <back>
- 10c: 37482cc8 tbnz w8, #9, 6a4 <forth>
- 110: 128b50ec movn w12, #0x5a87
- 114: 52a9ff8b movz w11, #0x4ffc, lsl #16
- 118: 7281d095 movk w21, #0xe84
- 11c: 92edfebd movn x29, #0x6ff5, lsl #48
- 120: d28361e3 movz x3, #0x1b0f
- 124: f2a4cc96 movk x22, #0x2664, lsl #16
- 128: 9346590c sbfx x12, x8, #6, #17
- 12c: 33194f33 bfi w19, w25, #7, #20
- 130: 531d3d89 ubfiz w9, w12, #3, #16
- 134: 9350433c sbfx x28, x25, #16, #1
- 138: b34464ac bfxil x12, x5, #4, #22
- 13c: d3462140 ubfx x0, x10, #6, #3
- 140: 139a61a4 extr w4, w13, w26, #24
- 144: 93d87fd7 extr x23, x30, x24, #31
- 148: 54000000 b.eq 148 <back+0x148>
- 14c: 54fff5a0 b.eq 0 <back>
- 150: 54002aa0 b.eq 6a4 <forth>
- 154: 54000001 b.ne 154 <back+0x154>
- 158: 54fff541 b.ne 0 <back>
- 15c: 54002a41 b.ne 6a4 <forth>
- 160: 54000002 b.cs 160 <back+0x160>
- 164: 54fff4e2 b.cs 0 <back>
- 168: 540029e2 b.cs 6a4 <forth>
- 16c: 54000002 b.cs 16c <back+0x16c>
- 170: 54fff482 b.cs 0 <back>
- 174: 54002982 b.cs 6a4 <forth>
- 178: 54000003 b.cc 178 <back+0x178>
- 17c: 54fff423 b.cc 0 <back>
- 180: 54002923 b.cc 6a4 <forth>
- 184: 54000003 b.cc 184 <back+0x184>
- 188: 54fff3c3 b.cc 0 <back>
- 18c: 540028c3 b.cc 6a4 <forth>
- 190: 54000004 b.mi 190 <back+0x190>
- 194: 54fff364 b.mi 0 <back>
- 198: 54002864 b.mi 6a4 <forth>
- 19c: 54000005 b.pl 19c <back+0x19c>
- 1a0: 54fff305 b.pl 0 <back>
- 1a4: 54002805 b.pl 6a4 <forth>
- 1a8: 54000006 b.vs 1a8 <back+0x1a8>
- 1ac: 54fff2a6 b.vs 0 <back>
- 1b0: 540027a6 b.vs 6a4 <forth>
- 1b4: 54000007 b.vc 1b4 <back+0x1b4>
- 1b8: 54fff247 b.vc 0 <back>
- 1bc: 54002747 b.vc 6a4 <forth>
- 1c0: 54000008 b.hi 1c0 <back+0x1c0>
- 1c4: 54fff1e8 b.hi 0 <back>
- 1c8: 540026e8 b.hi 6a4 <forth>
- 1cc: 54000009 b.ls 1cc <back+0x1cc>
- 1d0: 54fff189 b.ls 0 <back>
- 1d4: 54002689 b.ls 6a4 <forth>
- 1d8: 5400000a b.ge 1d8 <back+0x1d8>
- 1dc: 54fff12a b.ge 0 <back>
- 1e0: 5400262a b.ge 6a4 <forth>
- 1e4: 5400000b b.lt 1e4 <back+0x1e4>
- 1e8: 54fff0cb b.lt 0 <back>
- 1ec: 540025cb b.lt 6a4 <forth>
- 1f0: 5400000c b.gt 1f0 <back+0x1f0>
- 1f4: 54fff06c b.gt 0 <back>
- 1f8: 5400256c b.gt 6a4 <forth>
- 1fc: 5400000d b.le 1fc <back+0x1fc>
- 200: 54fff00d b.le 0 <back>
- 204: 5400250d b.le 6a4 <forth>
- 208: 5400000e b.al 208 <back+0x208>
- 20c: 54ffefae b.al 0 <back>
- 210: 540024ae b.al 6a4 <forth>
- 214: 5400000f b.nv 214 <back+0x214>
- 218: 54ffef4f b.nv 0 <back>
- 21c: 5400244f b.nv 6a4 <forth>
- 220: d4063721 svc #0x31b9
- 224: d4035082 hvc #0x1a84
- 228: d400bfe3 smc #0x5ff
- 22c: d4282fc0 brk #0x417e
- 230: d444c320 hlt #0x2619
- 234: d503201f nop
- 238: d69f03e0 eret
- 23c: d6bf03e0 drps
- 240: d5033fdf isb
- 244: d5033f9f dsb sy
- 248: d5033abf dmb ishst
- 24c: d61f0040 br x2
- 250: d63f00a0 blr x5
- 254: c8147c55 stxr w20, x21, [x2]
- 258: c805fcfd stlxr w5, x29, [x7]
- 25c: c85f7e05 ldxr x5, [x16]
- 260: c85fffbb ldaxr x27, [x29]
- 264: c89fffa0 stlr x0, [x29]
- 268: c8dfff95 ldar x21, [x28]
- 26c: 88157cf8 stxr w21, w24, [x7]
- 270: 8815ff9a stlxr w21, w26, [x28]
- 274: 885f7cd5 ldxr w21, [x6]
- 278: 885fffcf ldaxr w15, [x30]
- 27c: 889ffc73 stlr w19, [x3]
- 280: 88dffc56 ldar w22, [x2]
- 284: 48127c0f stxrh w18, w15, [x0]
- 288: 480bff85 stlxrh w11, w5, [x28]
- 28c: 485f7cdd ldxrh w29, [x6]
- 290: 485ffcf2 ldaxrh w18, [x7]
- 294: 489fff99 stlrh w25, [x28]
- 298: 48dffe62 ldarh w2, [x19]
- 29c: 080a7c3e stxrb w10, w30, [x1]
- 2a0: 0814fed5 stlxrb w20, w21, [x22]
- 2a4: 085f7c59 ldxrb w25, [x2]
- 2a8: 085ffcb8 ldaxrb w24, [x5]
- 2ac: 089ffc70 stlrb w16, [x3]
- 2b0: 08dfffb6 ldarb w22, [x29]
- 2b4: c87f0a68 ldxp x8, x2, [x19]
- 2b8: c87fcdc7 ldaxp x7, x19, [x14]
- 2bc: c82870bb stxp w8, x27, x28, [x5]
- 2c0: c825b8c8 stlxp w5, x8, x14, [x6]
- 2c4: 887f12d9 ldxp w25, w4, [x22]
- 2c8: 887fb9ed ldaxp w13, w14, [x15]
- 2cc: 8834215a stxp w20, w26, w8, [x10]
- 2d0: 8837ca52 stlxp w23, w18, w18, [x18]
- 2d4: f806317e str x30, [x11,#99]
- 2d8: b81b3337 str w23, [x25,#-77]
- 2dc: 39000dc2 strb w2, [x14,#3]
- 2e0: 78005149 strh w9, [x10,#5]
- 2e4: f84391f4 ldr x20, [x15,#57]
- 2e8: b85b220c ldr w12, [x16,#-78]
- 2ec: 385fd356 ldrb w22, [x26,#-3]
- 2f0: 785d127e ldrh w30, [x19,#-47]
- 2f4: 389f4149 ldrsb x9, [x10,#-12]
- 2f8: 79801e3c ldrsh x28, [x17,#14]
- 2fc: 79c014a3 ldrsh w3, [x5,#10]
- 300: b89a5231 ldrsw x17, [x17,#-91]
- 304: fc5ef282 ldr d2, [x20,#-17]
- 308: bc5f60f6 ldr s22, [x7,#-10]
- 30c: fc12125e str d30, [x18,#-223]
- 310: bc0152cd str s13, [x22,#21]
- 314: f8190e49 str x9, [x18,#-112]!
- 318: b800befd str w29, [x23,#11]!
- 31c: 381ffd92 strb w18, [x12,#-1]!
- 320: 781e9e90 strh w16, [x20,#-23]!
- 324: f8409fa3 ldr x3, [x29,#9]!
- 328: b8413c79 ldr w25, [x3,#19]!
- 32c: 385fffa1 ldrb w1, [x29,#-1]!
- 330: 785c7fa8 ldrh w8, [x29,#-57]!
- 334: 389f3dc5 ldrsb x5, [x14,#-13]!
- 338: 78801f6a ldrsh x10, [x27,#1]!
- 33c: 78c19d4b ldrsh w11, [x10,#25]!
- 340: b89a4ec4 ldrsw x4, [x22,#-92]!
- 344: fc408eeb ldr d11, [x23,#8]!
- 348: bc436e79 ldr s25, [x19,#54]!
- 34c: fc152ce1 str d1, [x7,#-174]!
- 350: bc036f28 str s8, [x25,#54]!
- 354: f8025565 str x5, [x11],#37
- 358: b80135f8 str w24, [x15],#19
- 35c: 381ff74f strb w15, [x26],#-1
- 360: 781fa652 strh w18, [x18],#-6
- 364: f851a447 ldr x7, [x2],#-230
- 368: b85e557b ldr w27, [x11],#-27
- 36c: 385e7472 ldrb w18, [x3],#-25
- 370: 785e070a ldrh w10, [x24],#-32
- 374: 38804556 ldrsb x22, [x10],#4
- 378: 78819591 ldrsh x17, [x12],#25
- 37c: 78dc24e8 ldrsh w8, [x7],#-62
- 380: b89cd6d7 ldrsw x23, [x22],#-51
- 384: fc430738 ldr d24, [x25],#48
- 388: bc5f6595 ldr s21, [x12],#-10
- 38c: fc1225b2 str d18, [x13],#-222
- 390: bc1d7430 str s16, [x1],#-41
- 394: f82fcac2 str x2, [x22,w15,sxtw]
- 398: b83d6a02 str w2, [x16,x29]
- 39c: 382e5a54 strb w20, [x18,w14,uxtw #0]
- 3a0: 7834fa66 strh w6, [x19,x20,sxtx #1]
- 3a4: f86ecbae ldr x14, [x29,w14,sxtw]
- 3a8: b86cda90 ldr w16, [x20,w12,sxtw #2]
- 3ac: 3860d989 ldrb w9, [x12,w0,sxtw #0]
- 3b0: 78637a2c ldrh w12, [x17,x3,lsl #1]
- 3b4: 38a3fa22 ldrsb x2, [x17,x3,sxtx #0]
- 3b8: 78b15827 ldrsh x7, [x1,w17,uxtw #1]
- 3bc: 78f2d9f9 ldrsh w25, [x15,w18,sxtw #1]
- 3c0: b8ac6ab7 ldrsw x23, [x21,x12]
- 3c4: fc6879a5 ldr d5, [x13,x8,lsl #3]
- 3c8: bc767943 ldr s3, [x10,x22,lsl #2]
- 3cc: fc3bc84e str d14, [x2,w27,sxtw]
- 3d0: bc3968d4 str s20, [x6,x25]
- 3d4: f91fc0fe str x30, [x7,#16256]
- 3d8: b91da50f str w15, [x8,#7588]
- 3dc: 391d280b strb w11, [x0,#1866]
- 3e0: 791d2e23 strh w3, [x17,#3734]
- 3e4: f95bc8e2 ldr x2, [x7,#14224]
- 3e8: b95ce525 ldr w5, [x9,#7396]
- 3ec: 395ae53c ldrb w28, [x9,#1721]
- 3f0: 795c9282 ldrh w2, [x20,#3656]
- 3f4: 399d7dd6 ldrsb x22, [x14,#1887]
- 3f8: 799fe008 ldrsh x8, [x0,#4080]
- 3fc: 79de9bc0 ldrsh w0, [x30,#3916]
- 400: b99aae78 ldrsw x24, [x19,#6828]
- 404: fd597598 ldr d24, [x12,#13032]
- 408: bd5d1d08 ldr s8, [x8,#7452]
- 40c: fd1f3dea str d10, [x15,#15992]
- 410: bd1a227a str s26, [x19,#6688]
- 414: 5800148a ldr x10, 6a4 <forth>
- 418: 18000003 ldr w3, 418 <back+0x418>
- 41c: f88092e0 prfm pldl1keep, [x23,#9]
- 420: d8ffdf00 prfm pldl1keep, 0 <back>
- 424: f8a84860 prfm pldl1keep, [x3,w8,uxtw]
- 428: f99d7560 prfm pldl1keep, [x11,#15080]
- 42c: 1a1c012d adc w13, w9, w28
- 430: 3a1c027b adcs w27, w19, w28
- 434: 5a060253 sbc w19, w18, w6
- 438: 7a03028e sbcs w14, w20, w3
- 43c: 9a0801d0 adc x16, x14, x8
- 440: ba0803a0 adcs x0, x29, x8
- 444: da140308 sbc x8, x24, x20
- 448: fa00038c sbcs x12, x28, x0
- 44c: 0b3010d7 add w23, w6, w16, uxtb #4
- 450: 2b37ab39 adds w25, w25, w23, sxth #2
- 454: cb2466da sub x26, x22, x4, uxtx #1
- 458: 6b33efb1 subs w17, w29, w19, sxtx #3
- 45c: 8b350fcb add x11, x30, w21, uxtb #3
- 460: ab208a70 adds x16, x19, w0, sxtb #2
- 464: cb39e52b sub x11, x9, x25, sxtx #1
- 468: eb2c9291 subs x17, x20, w12, sxtb #4
- 46c: 3a4bd1a3 ccmn w13, w11, #0x3, le
- 470: 7a4c81a2 ccmp w13, w12, #0x2, hi
- 474: ba42106c ccmn x3, x2, #0xc, ne
- 478: fa5560e3 ccmp x7, x21, #0x3, vs
- 47c: 3a4e3844 ccmn w2, #0xe, #0x4, cc
- 480: 7a515a26 ccmp w17, #0x11, #0x6, pl
- 484: ba4c2940 ccmn x10, #0xc, #0x0, cs
- 488: fa52aaae ccmp x21, #0x12, #0xe, ge
- 48c: 1a8cc1b5 csel w21, w13, w12, gt
- 490: 1a8f976a csinc w10, w27, w15, ls
- 494: 5a8981a0 csinv w0, w13, w9, hi
- 498: 5a9a6492 csneg w18, w4, w26, vs
- 49c: 9a8793ac csel x12, x29, x7, ls
- 4a0: 9a9474e6 csinc x6, x7, x20, vc
- 4a4: da83d2b6 csinv x22, x21, x3, le
- 4a8: da9b9593 csneg x19, x12, x27, ls
- 4ac: 5ac00200 rbit w0, w16
- 4b0: 5ac006f1 rev16 w17, w23
- 4b4: 5ac009d1 rev w17, w14
- 4b8: 5ac013d8 clz w24, w30
- 4bc: 5ac016d8 cls w24, w22
- 4c0: dac00223 rbit x3, x17
- 4c4: dac005ac rev16 x12, x13
- 4c8: dac00ac9 rev32 x9, x22
- 4cc: dac00c00 rev x0, x0
- 4d0: dac01205 clz x5, x16
- 4d4: dac016d9 cls x25, x22
- 4d8: 1ac0089d udiv w29, w4, w0
- 4dc: 1add0fa0 sdiv w0, w29, w29
- 4e0: 1ad52225 lsl w5, w17, w21
- 4e4: 1ad22529 lsr w9, w9, w18
- 4e8: 1ac82b61 asr w1, w27, w8
- 4ec: 1acd2e92 ror w18, w20, w13
- 4f0: 9acc0b28 udiv x8, x25, x12
- 4f4: 9adc0ca7 sdiv x7, x5, x28
- 4f8: 9adb2225 lsl x5, x17, x27
- 4fc: 9ad42757 lsr x23, x26, x20
- 500: 9adc291c asr x28, x8, x28
- 504: 9ac42fa3 ror x3, x29, x4
- 508: 1b1a55d1 madd w17, w14, w26, w21
- 50c: 1b0bafc1 msub w1, w30, w11, w11
- 510: 9b067221 madd x1, x17, x6, x28
- 514: 9b1ea0de msub x30, x6, x30, x8
- 518: 9b2e20d5 smaddl x21, w6, w14, x8
- 51c: 9b38cd4a smsubl x10, w10, w24, x19
- 520: 9bae6254 umaddl x20, w18, w14, x24
- 524: 9ba59452 umsubl x18, w2, w5, x5
- 528: 1e2d0a48 fmul s8, s18, s13
- 52c: 1e3c19c2 fdiv s2, s14, s28
- 530: 1e3c298f fadd s15, s12, s28
- 534: 1e213980 fsub s0, s12, s1
- 538: 1e240baf fmul s15, s29, s4
- 53c: 1e77082c fmul d12, d1, d23
- 540: 1e72191b fdiv d27, d8, d18
- 544: 1e6b2a97 fadd d23, d20, d11
- 548: 1e723988 fsub d8, d12, d18
- 54c: 1e770b1a fmul d26, d24, d23
- 550: 1f0d66f5 fmadd s21, s23, s13, s25
- 554: 1f01b956 fmsub s22, s10, s1, s14
- 558: 1f227a8e fnmadd s14, s20, s2, s30
- 55c: 1f365ba7 fnmadd s7, s29, s22, s22
- 560: 1f4f14ad fmadd d13, d5, d15, d5
- 564: 1f45a98e fmsub d14, d12, d5, d10
- 568: 1f60066a fnmadd d10, d19, d0, d1
- 56c: 1f620054 fnmadd d20, d2, d2, d0
- 570: 1e204139 fmov s25, s9
- 574: 1e20c094 fabs s20, s4
- 578: 1e214363 fneg s3, s27
- 57c: 1e21c041 fsqrt s1, s2
- 580: 1e22c01e fcvt d30, s0
- 584: 1e60408c fmov d12, d4
- 588: 1e60c361 fabs d1, d27
- 58c: 1e6142c8 fneg d8, d22
- 590: 1e61c16b fsqrt d11, d11
- 594: 1e624396 fcvt s22, d28
- 598: 1e3802dc fcvtzs w28, s22
- 59c: 9e380374 fcvtzs x20, s27
- 5a0: 1e78000e fcvtzs w14, d0
- 5a4: 9e78017a fcvtzs x26, d11
- 5a8: 1e2202dc scvtf s28, w22
- 5ac: 9e220150 scvtf s16, x10
- 5b0: 1e6202a8 scvtf d8, w21
- 5b4: 9e620395 scvtf d21, x28
- 5b8: 1e260318 fmov w24, s24
- 5bc: 9e660268 fmov x8, d19
- 5c0: 1e270188 fmov s8, w12
- 5c4: 9e6700e6 fmov d6, x7
- 5c8: 1e3023c0 fcmp s30, s16
- 5cc: 1e6b2320 fcmp d25, d11
- 5d0: 1e202168 fcmp s11, #0.0
- 5d4: 1e602168 fcmp d11, #0.0
- 5d8: 2910323d stp w29, w12, [x17,#128]
- 5dc: 297449d6 ldp w22, w18, [x14,#-96]
- 5e0: 6948402b ldpsw x11, x16, [x1,#64]
- 5e4: a9072f40 stp x0, x11, [x26,#112]
- 5e8: a9410747 ldp x7, x1, [x26,#16]
- 5ec: 29801f0a stp w10, w7, [x24,#0]!
- 5f0: 29e07307 ldp w7, w28, [x24,#-256]!
- 5f4: 69e272b9 ldpsw x25, x28, [x21,#-240]!
- 5f8: a9bf49d4 stp x20, x18, [x14,#-16]!
- 5fc: a9c529a8 ldp x8, x10, [x13,#80]!
- 600: 28b0605a stp w26, w24, [x2],#-128
- 604: 28e866a2 ldp w2, w25, [x21],#-192
- 608: 68ee0ab1 ldpsw x17, x2, [x21],#-144
- 60c: a886296c stp x12, x10, [x11],#96
- 610: a8fe1a38 ldp x24, x6, [x17],#-32
- 614: 282479c3 stnp w3, w30, [x14,#-224]
- 618: 286e534f ldnp w15, w20, [x26,#-144]
- 61c: a8386596 stnp x22, x25, [x12,#-128]
- 620: a8755a3b ldnp x27, x22, [x17,#-176]
- 624: 1e601000 fmov d0, #2.000000000000000000e+00
- 628: 1e603000 fmov d0, #2.125000000000000000e+00
- 62c: 1e621000 fmov d0, #4.000000000000000000e+00
- 630: 1e623000 fmov d0, #4.250000000000000000e+00
- 634: 1e641000 fmov d0, #8.000000000000000000e+00
- 638: 1e643000 fmov d0, #8.500000000000000000e+00
- 63c: 1e661000 fmov d0, #1.600000000000000000e+01
- 640: 1e663000 fmov d0, #1.700000000000000000e+01
- 644: 1e681000 fmov d0, #1.250000000000000000e-01
- 648: 1e683000 fmov d0, #1.328125000000000000e-01
- 64c: 1e6a1000 fmov d0, #2.500000000000000000e-01
- 650: 1e6a3000 fmov d0, #2.656250000000000000e-01
- 654: 1e6c1000 fmov d0, #5.000000000000000000e-01
- 658: 1e6c3000 fmov d0, #5.312500000000000000e-01
- 65c: 1e6e1000 fmov d0, #1.000000000000000000e+00
- 660: 1e6e3000 fmov d0, #1.062500000000000000e+00
- 664: 1e701000 fmov d0, #-2.000000000000000000e+00
- 668: 1e703000 fmov d0, #-2.125000000000000000e+00
- 66c: 1e721000 fmov d0, #-4.000000000000000000e+00
- 670: 1e723000 fmov d0, #-4.250000000000000000e+00
- 674: 1e741000 fmov d0, #-8.000000000000000000e+00
- 678: 1e743000 fmov d0, #-8.500000000000000000e+00
- 67c: 1e761000 fmov d0, #-1.600000000000000000e+01
- 680: 1e763000 fmov d0, #-1.700000000000000000e+01
- 684: 1e781000 fmov d0, #-1.250000000000000000e-01
- 688: 1e783000 fmov d0, #-1.328125000000000000e-01
- 68c: 1e7a1000 fmov d0, #-2.500000000000000000e-01
- 690: 1e7a3000 fmov d0, #-2.656250000000000000e-01
- 694: 1e7c1000 fmov d0, #-5.000000000000000000e-01
- 698: 1e7c3000 fmov d0, #-5.312500000000000000e-01
- 69c: 1e7e1000 fmov d0, #-1.000000000000000000e+00
- 6a0: 1e7e3000 fmov d0, #-1.062500000000000000e+00
+ 0: 8b0d82fa add x26, x23, x13, lsl #32
+ 4: cb49970c sub x12, x24, x9, lsr #37
+ 8: ab889dfc adds x28, x15, x8, asr #39
+ c: eb9ee787 subs x7, x28, x30, asr #57
+ 10: 0b9b3ec9 add w9, w22, w27, asr #15
+ 14: 4b9279a3 sub w3, w13, w18, asr #30
+ 18: 2b88474e adds w14, w26, w8, asr #17
+ 1c: 6b8c56c0 subs w0, w22, w12, asr #21
+ 20: 8a1a51e0 and x0, x15, x26, lsl #20
+ 24: aa11f4ba orr x26, x5, x17, lsl #61
+ 28: ca0281b8 eor x24, x13, x2, lsl #32
+ 2c: ea918c7c ands x28, x3, x17, asr #35
+ 30: 0a5d4a19 and w25, w16, w29, lsr #18
+ 34: 2a4b264d orr w13, w18, w11, lsr #9
+ 38: 4a523ca5 eor w5, w5, w18, lsr #15
+ 3c: 6a9b6ae2 ands w2, w23, w27, asr #26
+ 40: 8a70b79b bic x27, x28, x16, lsr #45
+ 44: aaba9728 orn x8, x25, x26, asr #37
+ 48: ca6dfe3d eon x29, x17, x13, lsr #63
+ 4c: ea627f1c bics x28, x24, x2, lsr #31
+ 50: 0aa70f53 bic w19, w26, w7, asr #3
+ 54: 2aaa0f06 orn w6, w24, w10, asr #3
+ 58: 4a6176a4 eon w4, w21, w1, lsr #29
+ 5c: 6a604eb0 bics w16, w21, w0, lsr #19
+ 60: 1105ed91 add w17, w12, #0x17b
+ 64: 3100583e adds w30, w1, #0x16
+ 68: 5101f8bd sub w29, w5, #0x7e
+ 6c: 710f0306 subs w6, w24, #0x3c0
+ 70: 9101a1a0 add x0, x13, #0x68
+ 74: b10a5cc8 adds x8, x6, #0x297
+ 78: d10810aa sub x10, x5, #0x204
+ 7c: f10fd061 subs x1, x3, #0x3f4
+ 80: 120cb166 and w6, w11, #0xfff1fff1
+ 84: 321764bc orr w28, w5, #0xfffffe07
+ 88: 52174681 eor w1, w20, #0x7fffe00
+ 8c: 720c0247 ands w7, w18, #0x100000
+ 90: 9241018e and x14, x12, #0x8000000000000000
+ 94: b25a2969 orr x9, x11, #0x1ffc000000000
+ 98: d278b411 eor x17, x0, #0x3fffffffffff00
+ 9c: f26aad01 ands x1, x8, #0xffffffffffc00003
+ a0: 14000000 b a0 <back+0xa0>
+ a4: 17ffffd7 b 0 <back>
+ a8: 140001cf b 7e4 <forth>
+ ac: 94000000 bl ac <back+0xac>
+ b0: 97ffffd4 bl 0 <back>
+ b4: 940001cc bl 7e4 <forth>
+ b8: 3400000a cbz w10, b8 <back+0xb8>
+ bc: 34fffa2a cbz w10, 0 <back>
+ c0: 3400392a cbz w10, 7e4 <forth>
+ c4: 35000008 cbnz w8, c4 <back+0xc4>
+ c8: 35fff9c8 cbnz w8, 0 <back>
+ cc: 350038c8 cbnz w8, 7e4 <forth>
+ d0: b400000b cbz x11, d0 <back+0xd0>
+ d4: b4fff96b cbz x11, 0 <back>
+ d8: b400386b cbz x11, 7e4 <forth>
+ dc: b500001d cbnz x29, dc <back+0xdc>
+ e0: b5fff91d cbnz x29, 0 <back>
+ e4: b500381d cbnz x29, 7e4 <forth>
+ e8: 10000013 adr x19, e8 <back+0xe8>
+ ec: 10fff8b3 adr x19, 0 <back>
+ f0: 100037b3 adr x19, 7e4 <forth>
+ f4: 90000013 adrp x19, 0 <back>
+ f8: 36300016 tbz w22, #6, f8 <back+0xf8>
+ fc: 3637f836 tbz w22, #6, 0 <back>
+ 100: 36303736 tbz w22, #6, 7e4 <forth>
+ 104: 3758000c tbnz w12, #11, 104 <back+0x104>
+ 108: 375ff7cc tbnz w12, #11, 0 <back>
+ 10c: 375836cc tbnz w12, #11, 7e4 <forth>
+ 110: 128313a0 mov w0, #0xffffe762 // #-6302
+ 114: 528a32c7 mov w7, #0x5196 // #20886
+ 118: 7289173b movk w27, #0x48b9
+ 11c: 92ab3acc mov x12, #0xffffffffa629ffff // #-1507196929
+ 120: d2a0bf94 mov x20, #0x5fc0000 // #100401152
+ 124: f2c285e8 movk x8, #0x142f, lsl #32
+ 128: 9358722f sbfx x15, x17, #24, #5
+ 12c: 330e652f bfxil w15, w9, #14, #12
+ 130: 53067f3b lsr w27, w25, #6
+ 134: 93577c53 sbfx x19, x2, #23, #9
+ 138: b34a1aac bfi x12, x21, #54, #7
+ 13c: d35a4016 ubfiz x22, x0, #38, #17
+ 140: 13946c63 extr w3, w3, w20, #27
+ 144: 93c3dbc8 extr x8, x30, x3, #54
+ 148: 54000000 b.eq 148 <back+0x148> // b.none
+ 14c: 54fff5a0 b.eq 0 <back> // b.none
+ 150: 540034a0 b.eq 7e4 <forth> // b.none
+ 154: 54000001 b.ne 154 <back+0x154> // b.any
+ 158: 54fff541 b.ne 0 <back> // b.any
+ 15c: 54003441 b.ne 7e4 <forth> // b.any
+ 160: 54000002 b.cs 160 <back+0x160> // b.hs, b.nlast
+ 164: 54fff4e2 b.cs 0 <back> // b.hs, b.nlast
+ 168: 540033e2 b.cs 7e4 <forth> // b.hs, b.nlast
+ 16c: 54000002 b.cs 16c <back+0x16c> // b.hs, b.nlast
+ 170: 54fff482 b.cs 0 <back> // b.hs, b.nlast
+ 174: 54003382 b.cs 7e4 <forth> // b.hs, b.nlast
+ 178: 54000003 b.cc 178 <back+0x178> // b.lo, b.ul, b.last
+ 17c: 54fff423 b.cc 0 <back> // b.lo, b.ul, b.last
+ 180: 54003323 b.cc 7e4 <forth> // b.lo, b.ul, b.last
+ 184: 54000003 b.cc 184 <back+0x184> // b.lo, b.ul, b.last
+ 188: 54fff3c3 b.cc 0 <back> // b.lo, b.ul, b.last
+ 18c: 540032c3 b.cc 7e4 <forth> // b.lo, b.ul, b.last
+ 190: 54000004 b.mi 190 <back+0x190> // b.first
+ 194: 54fff364 b.mi 0 <back> // b.first
+ 198: 54003264 b.mi 7e4 <forth> // b.first
+ 19c: 54000005 b.pl 19c <back+0x19c> // b.nfrst
+ 1a0: 54fff305 b.pl 0 <back> // b.nfrst
+ 1a4: 54003205 b.pl 7e4 <forth> // b.nfrst
+ 1a8: 54000006 b.vs 1a8 <back+0x1a8>
+ 1ac: 54fff2a6 b.vs 0 <back>
+ 1b0: 540031a6 b.vs 7e4 <forth>
+ 1b4: 54000007 b.vc 1b4 <back+0x1b4>
+ 1b8: 54fff247 b.vc 0 <back>
+ 1bc: 54003147 b.vc 7e4 <forth>
+ 1c0: 54000008 b.hi 1c0 <back+0x1c0> // b.pmore
+ 1c4: 54fff1e8 b.hi 0 <back> // b.pmore
+ 1c8: 540030e8 b.hi 7e4 <forth> // b.pmore
+ 1cc: 54000009 b.ls 1cc <back+0x1cc> // b.plast
+ 1d0: 54fff189 b.ls 0 <back> // b.plast
+ 1d4: 54003089 b.ls 7e4 <forth> // b.plast
+ 1d8: 5400000a b.ge 1d8 <back+0x1d8> // b.tcont
+ 1dc: 54fff12a b.ge 0 <back> // b.tcont
+ 1e0: 5400302a b.ge 7e4 <forth> // b.tcont
+ 1e4: 5400000b b.lt 1e4 <back+0x1e4> // b.tstop
+ 1e8: 54fff0cb b.lt 0 <back> // b.tstop
+ 1ec: 54002fcb b.lt 7e4 <forth> // b.tstop
+ 1f0: 5400000c b.gt 1f0 <back+0x1f0>
+ 1f4: 54fff06c b.gt 0 <back>
+ 1f8: 54002f6c b.gt 7e4 <forth>
+ 1fc: 5400000d b.le 1fc <back+0x1fc>
+ 200: 54fff00d b.le 0 <back>
+ 204: 54002f0d b.le 7e4 <forth>
+ 208: 5400000e b.al 208 <back+0x208>
+ 20c: 54ffefae b.al 0 <back>
+ 210: 54002eae b.al 7e4 <forth>
+ 214: 5400000f b.nv 214 <back+0x214>
+ 218: 54ffef4f b.nv 0 <back>
+ 21c: 54002e4f b.nv 7e4 <forth>
+ 220: d40658e1 svc #0x32c7
+ 224: d4014d22 hvc #0xa69
+ 228: d4046543 smc #0x232a
+ 22c: d4273f60 brk #0x39fb
+ 230: d44cad80 hlt #0x656c
+ 234: d503201f nop
+ 238: d69f03e0 eret
+ 23c: d6bf03e0 drps
+ 240: d5033fdf isb
+ 244: d5033e9f dsb st
+ 248: d50332bf dmb oshst
+ 24c: d61f0200 br x16
+ 250: d63f0280 blr x20
+ 254: c80a7d1b stxr w10, x27, [x8]
+ 258: c800fea1 stlxr w0, x1, [x21]
+ 25c: c85f7fb1 ldxr x17, [x29]
+ 260: c85fff9d ldaxr x29, [x28]
+ 264: c89ffee1 stlr x1, [x23]
+ 268: c8dffe95 ldar x21, [x20]
+ 26c: 88167e7b stxr w22, w27, [x19]
+ 270: 880bfcd0 stlxr w11, w16, [x6]
+ 274: 885f7c12 ldxr w18, [x0]
+ 278: 885ffd44 ldaxr w4, [x10]
+ 27c: 889ffed8 stlr w24, [x22]
+ 280: 88dffe6a ldar w10, [x19]
+ 284: 48017fc5 stxrh w1, w5, [x30]
+ 288: 4808fe2c stlxrh w8, w12, [x17]
+ 28c: 485f7dc9 ldxrh w9, [x14]
+ 290: 485ffc27 ldaxrh w7, [x1]
+ 294: 489ffe05 stlrh w5, [x16]
+ 298: 48dffd82 ldarh w2, [x12]
+ 29c: 080a7c6c stxrb w10, w12, [x3]
+ 2a0: 081cff4e stlxrb w28, w14, [x26]
+ 2a4: 085f7d5e ldxrb w30, [x10]
+ 2a8: 085ffeae ldaxrb w14, [x21]
+ 2ac: 089ffd2d stlrb w13, [x9]
+ 2b0: 08dfff76 ldarb w22, [x27]
+ 2b4: c87f4d7c ldxp x28, x19, [x11]
+ 2b8: c87fcc5e ldaxp x30, x19, [x2]
+ 2bc: c8220417 stxp w2, x23, x1, [x0]
+ 2c0: c82cb5f0 stlxp w12, x16, x13, [x15]
+ 2c4: 887f55b2 ldxp w18, w21, [x13]
+ 2c8: 887ff90b ldaxp w11, w30, [x8]
+ 2cc: 88382c2d stxp w24, w13, w11, [x1]
+ 2d0: 883aedb5 stlxp w26, w21, w27, [x13]
+ 2d4: f819928b stur x11, [x20, #-103]
+ 2d8: b803e21c stur w28, [x16, #62]
+ 2dc: 381f713b sturb w27, [x9, #-9]
+ 2e0: 781ce322 sturh w2, [x25, #-50]
+ 2e4: f850f044 ldur x4, [x2, #-241]
+ 2e8: b85e129e ldur w30, [x20, #-31]
+ 2ec: 385e92f2 ldurb w18, [x23, #-23]
+ 2f0: 785ff35d ldurh w29, [x26, #-1]
+ 2f4: 39801921 ldrsb x1, [x9, #6]
+ 2f8: 7881318b ldursh x11, [x12, #19]
+ 2fc: 78dce02b ldursh w11, [x1, #-50]
+ 300: b8829313 ldursw x19, [x24, #41]
+ 304: fc45f318 ldur d24, [x24, #95]
+ 308: bc5d50af ldur s15, [x5, #-43]
+ 30c: fc001375 stur d21, [x27, #1]
+ 310: bc1951b7 stur s23, [x13, #-107]
+ 314: f8008c0b str x11, [x0, #8]!
+ 318: b801dc03 str w3, [x0, #29]!
+ 31c: 38009dcb strb w11, [x14, #9]!
+ 320: 781fdf1d strh w29, [x24, #-3]!
+ 324: f8570e2d ldr x13, [x17, #-144]!
+ 328: b85faecc ldr w12, [x22, #-6]!
+ 32c: 385f6d8d ldrb w13, [x12, #-10]!
+ 330: 785ebea0 ldrh w0, [x21, #-21]!
+ 334: 38804cf7 ldrsb x23, [x7, #4]!
+ 338: 789cbce3 ldrsh x3, [x7, #-53]!
+ 33c: 78df9cbc ldrsh w28, [x5, #-7]!
+ 340: b89eed38 ldrsw x24, [x9, #-18]!
+ 344: fc40cd6e ldr d14, [x11, #12]!
+ 348: bc5bdd93 ldr s19, [x12, #-67]!
+ 34c: fc103c14 str d20, [x0, #-253]!
+ 350: bc040c08 str s8, [x0, #64]!
+ 354: f81a2784 str x4, [x28], #-94
+ 358: b81ca4ec str w12, [x7], #-54
+ 35c: 381e855b strb w27, [x10], #-24
+ 360: 7801b506 strh w6, [x8], #27
+ 364: f853654e ldr x14, [x10], #-202
+ 368: b85d74b0 ldr w16, [x5], #-41
+ 36c: 384095c2 ldrb w2, [x14], #9
+ 370: 785ec5bc ldrh w28, [x13], #-20
+ 374: 389e15a9 ldrsb x9, [x13], #-31
+ 378: 789dc703 ldrsh x3, [x24], #-36
+ 37c: 78c06474 ldrsh w20, [x3], #6
+ 380: b89ff667 ldrsw x7, [x19], #-1
+ 384: fc57e51e ldr d30, [x8], #-130
+ 388: bc4155f9 ldr s25, [x15], #21
+ 38c: fc05a6ee str d14, [x23], #90
+ 390: bc1df408 str s8, [x0], #-33
+ 394: f835da4a str x10, [x18, w21, sxtw #3]
+ 398: b836d9a4 str w4, [x13, w22, sxtw #2]
+ 39c: 3833580d strb w13, [x0, w19, uxtw #0]
+ 3a0: 7826cb6c strh w12, [x27, w6, sxtw]
+ 3a4: f8706900 ldr x0, [x8, x16]
+ 3a8: b87ae880 ldr w0, [x4, x26, sxtx]
+ 3ac: 3865db2e ldrb w14, [x25, w5, sxtw #0]
+ 3b0: 78724889 ldrh w9, [x4, w18, uxtw]
+ 3b4: 38a7789b ldrsb x27, [x4, x7, lsl #0]
+ 3b8: 78beca2f ldrsh x15, [x17, w30, sxtw]
+ 3bc: 78f6c810 ldrsh w16, [x0, w22, sxtw]
+ 3c0: b8bef956 ldrsw x22, [x10, x30, sxtx #2]
+ 3c4: fc6afabd ldr d29, [x21, x10, sxtx #3]
+ 3c8: bc734963 ldr s3, [x11, w19, uxtw]
+ 3cc: fc3d5b8d str d13, [x28, w29, uxtw #3]
+ 3d0: bc25fbb7 str s23, [x29, x5, sxtx #2]
+ 3d4: f9189d05 str x5, [x8, #12600]
+ 3d8: b91ecb1d str w29, [x24, #7880]
+ 3dc: 39187a33 strb w19, [x17, #1566]
+ 3e0: 791f226d strh w13, [x19, #3984]
+ 3e4: f95aa2f3 ldr x19, [x23, #13632]
+ 3e8: b9587bb7 ldr w23, [x29, #6264]
+ 3ec: 395f7176 ldrb w22, [x11, #2012]
+ 3f0: 795d9143 ldrh w3, [x10, #3784]
+ 3f4: 399e7e08 ldrsb x8, [x16, #1951]
+ 3f8: 799a2697 ldrsh x23, [x20, #3346]
+ 3fc: 79df3422 ldrsh w2, [x1, #3994]
+ 400: b99c2624 ldrsw x4, [x17, #7204]
+ 404: fd5c2374 ldr d20, [x27, #14400]
+ 408: bd5fa1d9 ldr s25, [x14, #8096]
+ 40c: fd1d595a str d26, [x10, #15024]
+ 410: bd1b1869 str s9, [x3, #6936]
+ 414: 58001e9b ldr x27, 7e4 <forth>
+ 418: 1800000b ldr w11, 418 <back+0x418>
+ 41c: f8945060 prfum pldl1keep, [x3, #-187]
+ 420: d8000000 prfm pldl1keep, 420 <back+0x420>
+ 424: f8ae6ba0 prfm pldl1keep, [x29, x14]
+ 428: f99a0080 prfm pldl1keep, [x4, #13312]
+ 42c: 1a070035 adc w21, w1, w7
+ 430: 3a0700a8 adcs w8, w5, w7
+ 434: 5a0e0367 sbc w7, w27, w14
+ 438: 7a11009b sbcs w27, w4, w17
+ 43c: 9a000380 adc x0, x28, x0
+ 440: ba1e030c adcs x12, x24, x30
+ 444: da0f0320 sbc x0, x25, x15
+ 448: fa030301 sbcs x1, x24, x3
+ 44c: 0b340b12 add w18, w24, w20, uxtb #2
+ 450: 2b2a278d adds w13, w28, w10, uxth #1
+ 454: cb22aa0f sub x15, x16, w2, sxth #2
+ 458: 6b2d29bd subs w29, w13, w13, uxth #2
+ 45c: 8b2cce8c add x12, x20, w12, sxtw #3
+ 460: ab2b877e adds x30, x27, w11, sxtb #1
+ 464: cb21c8ee sub x14, x7, w1, sxtw #2
+ 468: eb3ba47d subs x29, x3, w27, sxth #1
+ 46c: 3a4d400e ccmn w0, w13, #0xe, mi // mi = first
+ 470: 7a5232c6 ccmp w22, w18, #0x6, cc // cc = lo, ul, last
+ 474: ba5e624e ccmn x18, x30, #0xe, vs
+ 478: fa53814c ccmp x10, x19, #0xc, hi // hi = pmore
+ 47c: 3a52d8c2 ccmn w6, #0x12, #0x2, le
+ 480: 7a4d8924 ccmp w9, #0xd, #0x4, hi // hi = pmore
+ 484: ba4b3aab ccmn x21, #0xb, #0xb, cc // cc = lo, ul, last
+ 488: fa4d7882 ccmp x4, #0xd, #0x2, vc
+ 48c: 1a96804c csel w12, w2, w22, hi // hi = pmore
+ 490: 1a912618 csinc w24, w16, w17, cs // cs = hs, nlast
+ 494: 5a90b0e6 csinv w6, w7, w16, lt // lt = tstop
+ 498: 5a96976b csneg w11, w27, w22, ls // ls = plast
+ 49c: 9a9db06a csel x10, x3, x29, lt // lt = tstop
+ 4a0: 9a9b374c csinc x12, x26, x27, cc // cc = lo, ul, last
+ 4a4: da95c14f csinv x15, x10, x21, gt
+ 4a8: da89c6fe csneg x30, x23, x9, gt
+ 4ac: 5ac0015e rbit w30, w10
+ 4b0: 5ac005fd rev16 w29, w15
+ 4b4: 5ac00bdd rev w29, w30
+ 4b8: 5ac012b9 clz w25, w21
+ 4bc: 5ac01404 cls w4, w0
+ 4c0: dac002b2 rbit x18, x21
+ 4c4: dac0061d rev16 x29, x16
+ 4c8: dac00a95 rev32 x21, x20
+ 4cc: dac00e66 rev x6, x19
+ 4d0: dac0107e clz x30, x3
+ 4d4: dac01675 cls x21, x19
+ 4d8: 1ac00b0b udiv w11, w24, w0
+ 4dc: 1ace0f3b sdiv w27, w25, w14
+ 4e0: 1ad221c3 lsl w3, w14, w18
+ 4e4: 1ad825e7 lsr w7, w15, w24
+ 4e8: 1ad92a3c asr w28, w17, w25
+ 4ec: 1adc2f42 ror w2, w26, w28
+ 4f0: 9ada0b25 udiv x5, x25, x26
+ 4f4: 9ad20e1b sdiv x27, x16, x18
+ 4f8: 9acc22a6 lsl x6, x21, x12
+ 4fc: 9acc2480 lsr x0, x4, x12
+ 500: 9adc2a3b asr x27, x17, x28
+ 504: 9ad22c5c ror x28, x2, x18
+ 508: 1b0e39ea madd w10, w15, w14, w14
+ 50c: 1b0fcf23 msub w3, w25, w15, w19
+ 510: 9b1010ae madd x14, x5, x16, x4
+ 514: 9b048b3a msub x26, x25, x4, x2
+ 518: 9b3d4582 smaddl x2, w12, w29, x17
+ 51c: 9b2390e8 smsubl x8, w7, w3, x4
+ 520: 9bba6499 umaddl x25, w4, w26, x25
+ 524: 9ba0ea24 umsubl x4, w17, w0, x26
+ 528: 1e2f0af1 fmul s17, s23, s15
+ 52c: 1e311b95 fdiv s21, s28, s17
+ 530: 1e23295b fadd s27, s10, s3
+ 534: 1e3938e0 fsub s0, s7, s25
+ 538: 1e2f08c9 fmul s9, s6, s15
+ 53c: 1e6a09fd fmul d29, d15, d10
+ 540: 1e671a22 fdiv d2, d17, d7
+ 544: 1e77296b fadd d11, d11, d23
+ 548: 1e773ba7 fsub d7, d29, d23
+ 54c: 1e6b0b6e fmul d14, d27, d11
+ 550: 1f18308b fmadd s11, s4, s24, s12
+ 554: 1f14adcf fmsub s15, s14, s20, s11
+ 558: 1f2b31bc nmadd s28, s13, s11, s12
+ 55c: 1f3a3bd7 fnmadd s23, s30, s26, s14
+ 560: 1f4a1da9 fmadd d9, d13, d10, d7
+ 564: 1f4f8fa5 fmsub d5, d29, d15, d3
+ 568: 1f6f798b fnmadd d11, d12, d15, d30
+ 56c: 1f73523e fnmadd d30, d17, d19, d20
+ 570: 1e2040fb fmov s27, s7
+ 574: 1e20c2a9 fabs s9, s21
+ 578: 1e214122 fneg s2, s9
+ 57c: 1e21c0fb fsqrt s27, s7
+ 580: 1e22c3dd fcvt d29, s30
+ 584: 1e604031 fmov d17, d1
+ 588: 1e60c0c2 fabs d2, d6
+ 58c: 1e61406a fneg d10, d3
+ 590: 1e61c178 fsqrt d24, d11
+ 594: 1e624027 fcvt s7, d1
+ 598: 1e38000b fcvtzs w11, s0
+ 59c: 9e380243 fcvtzs x3, s18
+ 5a0: 1e7800dc fcvtzs w28, d6
+ 5a4: 9e7800d6 fcvtzs x22, d6
+ 5a8: 1e220360 scvtf s0, w27
+ 5ac: 9e22005a scvtf s26, x2
+ 5b0: 1e6200e5 scvtf d5, w7
+ 5b4: 9e62017c scvtf d28, x11
+ 5b8: 1e2601b9 fmov w25, s13
+ 5bc: 9e6602eb fmov x11, d23
+ 5c0: 1e270113 fmov s19, w8
+ 5c4: 9e6702b2 fmov d18, x21
+ 5c8: 1e342320 fcmp s25, s20
+ 5cc: 1e722260 fcmp d19, d18
+ 5d0: 1e202048 fcmp s2, #0.0
+ 5d4: 1e6023a8 fcmp d29, #0.0
+ 5d8: 29025668 stp w8, w21, [x19, #16]
+ 5dc: 29403e86 ldp w6, w15, [x20]
+ 5e0: 6966387b ldpsw x27, x14, [x3, #-208]
+ 5e4: a93b316a stp x10, x12, [x11, #-80]
+ 5e8: a97e38e7 ldp x7, x14, [x7, #-32]
+ 5ec: 298e5980 stp w0, w22, [x12, #112]!
+ 5f0: 29c61d0e ldp w14, w7, [x8, #48]!
+ 5f4: 69c00930 ldpsw x16, x2, [x9, #0]!
+ 5f8: a9bc7434 stp x20, x29, [x1, #-64]!
+ 5fc: a9c530b5 ldp x21, x12, [x5, #80]!
+ 600: 28b26378 stp w24, w24, [x27], #-112
+ 604: 28c25a5c ldp w28, w22, [x18], #16
+ 608: 68f419b1 ldpsw x17, x6, [x13], #-96
+ 60c: a8b668bc stp x28, x26, [x5], #-160
+ 610: a8f15746 ldp x6, x21, [x26], #-240
+ 614: 280453cd stnp w13, w20, [x30, #32]
+ 618: 284c2cb1 ldnp w17, w11, [x5, #96]
+ 61c: a83a534d stnp x13, x20, [x26, #-96]
+ 620: a87b32fd ldnp x29, x12, [x23, #-80]
+ 624: 05a08020 mov z0.s, p0/m, s1
+ 628: 04b0e3e0 incw x0
+ 62c: 0470e7e1 dech x1
+ 630: 042f9c20 lsl z0.b, z1.b, #7
+ 634: 043f9c35 lsl z21.h, z1.h, #15
+ 638: 047f9c20 lsl z0.s, z1.s, #31
+ 63c: 04ff9c20 lsl z0.d, z1.d, #63
+ 640: 04299420 lsr z0.b, z1.b, #7
+ 644: 04319160 asr z0.h, z11.h, #15
+ 648: 0461943e lsr z30.s, z1.s, #31
+ 64c: 04a19020 asr z0.d, z1.d, #63
+ 650: 042053ff addvl sp, x0, #31
+ 654: 047f5401 addpl x1, sp, #-32
+ 658: 25208028 cntp x8, p0, p1.b
+ 65c: 2538cfe0 mov z0.b, #127
+ 660: 2578d001 mov z1.h, #-128
+ 664: 25b8efe2 mov z2.s, #32512
+ 668: 25f8f007 mov z7.d, #-32768
+ 66c: a400a3e0 ld1b {z0.b}, p0/z, [sp]
+ 670: a4a8a7ea ld1h {z10.h}, p1/z, [sp, #-8, mul vl]
+ 674: a547a814 ld1w {z20.s}, p2/z, [x0, #7, mul vl]
+ 678: a4084ffe ld1b {z30.b}, p3/z, [sp, x8]
+ 67c: a55c53e0 ld1w {z0.s}, p4/z, [sp, x28, lsl #2]
+ 680: a5e1540b ld1d {z11.d}, p5/z, [x0, x1, lsl #3]
+ 684: e400fbf6 st1b {z22.b}, p6, [sp]
+ 688: e408ffff st1b {z31.b}, p7, [sp, #-8, mul vl]
+ 68c: e547e400 st1w {z0.s}, p1, [x0, #7, mul vl]
+ 690: e4014be0 st1b {z0.b}, p2, [sp, x1]
+ 694: e4a84fe0 st1h {z0.h}, p3, [sp, x8, lsl #1]
+ 698: e5f25000 st1d {z0.d}, p4, [x0, x18, lsl #3]
+ 69c: 858043e0 ldr z0, [sp]
+ 6a0: 85a043ff ldr z31, [sp, #-256, mul vl]
+ 6a4: e59f5d08 str z8, [x8, #255, mul vl]
+ 6a8: 1e601000 fmov d0, #2.000000000000000000e+00
+ 6ac: 1e603000 fmov d0, #2.125000000000000000e+00
+ 6b0: 1e621000 fmov d0, #4.000000000000000000e+00
+ 6b4: 1e623000 fmov d0, #4.250000000000000000e+00
+ 6b8: 1e641000 fmov d0, #8.000000000000000000e+00
+ 6bc: 1e643000 fmov d0, #8.500000000000000000e+00
+ 6c0: 1e661000 fmov d0, #1.600000000000000000e+01
+ 6c4: 1e663000 fmov d0, #1.700000000000000000e+01
+ 6c8: 1e681000 fmov d0, #1.250000000000000000e-01
+ 6cc: 1e683000 fmov d0, #1.328125000000000000e-01
+ 6d0: 1e6a1000 fmov d0, #2.500000000000000000e-01
+ 6d4: 1e6a3000 fmov d0, #2.656250000000000000e-01
+ 6d8: 1e6c1000 fmov d0, #5.000000000000000000e-01
+ 6dc: 1e6c3000 fmov d0, #5.312500000000000000e-01
+ 6e0: 1e6e1000 fmov d0, #1.000000000000000000e+00
+ 6e4: 1e6e3000 fmov d0, #1.062500000000000000e+00
+ 6e8: 1e701000 fmov d0, #-2.000000000000000000e+00
+ 6ec: 1e703000 fmov d0, #-2.125000000000000000e+00
+ 6f0: 1e721000 fmov d0, #-4.000000000000000000e+00
+ 6f4: 1e723000 fmov d0, #-4.250000000000000000e+00
+ 6f8: 1e741000 fmov d0, #-8.000000000000000000e+00
+ 6fc: 1e743000 fmov d0, #-8.500000000000000000e+00
+ 700: 1e761000 fmov d0, #-1.600000000000000000e+01
+ 704: 1e763000 fmov d0, #-1.700000000000000000e+01
+ 708: 1e781000 fmov d0, #-1.250000000000000000e-01
+ 70c: 1e783000 fmov d0, #-1.328125000000000000e-01
+ 710: 1e7a1000 fmov d0, #-2.500000000000000000e-01
+ 714: 1e7a3000 fmov d0, #-2.656250000000000000e-01
+ 718: 1e7c1000 fmov d0, #-5.000000000000000000e-01
+ 71c: 1e7c3000 fmov d0, #-5.312500000000000000e-01
+ 720: 1e7e1000 fmov d0, #-1.000000000000000000e+00
+ 724: 1e7e3000 fmov d0, #-1.062500000000000000e+00
+ 728: 04bb020e add z14.s, z16.s, z27.s
+ 72c: 04ba04c0 sub z0.s, z6.s, z26.s
+ 730: 6586019b fadd z27.s, z12.s, z6.s
+ 734: 6593089e fmul z30.s, z4.s, z19.s
+ 738: 65c2060b fsub z11.d, z16.d, z2.d
+ 73c: 04d6a18f abs z15.d, p0/m, z12.d
+ 740: 040016e9 add z9.b, p5/m, z9.b, z23.b
+ 744: 0490835e asr z30.s, p0/m, z30.s, z26.s
+ 748: 045aaa44 cnt z4.h, p2/m, z18.h
+ 74c: 04938579 lsl z25.s, p1/m, z25.s, z11.s
+ 750: 0411990a lsr z10.b, p6/m, z10.b, z8.b
+ 754: 04101624 mul z4.b, p5/m, z4.b, z17.b
+ 758: 0497ad3e neg z30.s, p3/m, z9.s
+ 75c: 04deae80 not z0.d, p3/m, z20.d
+ 760: 04481c77 smax z23.h, p7/m, z23.h, z3.h
+ 764: 044a0960 smin z0.h, p2/m, z0.h, z11.h
+ 768: 04c118ab sub z11.d, p6/m, z11.d, z5.d
+ 76c: 049caa30 fabs z16.s, p2/m, z17.s
+ 770: 6580834f fadd z15.s, p0/m, z15.s, z26.s
+ 774: 658d9e6a fdiv z10.s, p7/m, z10.s, z19.s
+ 778: 65c68238 fmax z24.d, p0/m, z24.d, z17.d
+ 77c: 65c791fa fmin z26.d, p4/m, z26.d, z15.d
+ 780: 65c28a38 fmul z24.d, p2/m, z24.d, z17.d
+ 784: 049db7be fneg z30.s, p5/m, z29.s
+ 788: 6582b552 frintm z18.s, p5/m, z10.s
+ 78c: 65c0abde frintn z30.d, p2/m, z30.d
+ 790: 6581bbc6 frintp z6.s, p6/m, z30.s
+ 794: 65cdb854 fsqrt z20.d, p6/m, z2.d
+ 798: 658197a9 fsub z9.s, p5/m, z9.s, z29.s
+ 79c: 65f60872 fmla z18.d, p2/m, z3.d, z22.d
+ 7a0: 65ec29af fmls z15.d, p2/m, z13.d, z12.d
+ 7a4: 65be43cc fnmla z12.s, p0/m, z30.s, z30.s
+ 7a8: 65e06ea7 fnmls z7.d, p3/m, z21.d, z0.d
+ 7ac: 04544b53 mla z19.h, p2/m, z26.h, z20.h
+ 7b0: 04d57c30 mls z16.d, p7/m, z1.d, z21.d
+ 7b4: 04323095 and z21.d, z4.d, z18.d
+ 7b8: 04a7324c eor z12.d, z18.d, z7.d
+ 7bc: 046d31f9 orr z25.d, z15.d, z13.d
+ 7c0: 04da30eb andv d11, p4, z7.d
+ 7c4: 04d8252b orv d11, p1, z9.d
+ 7c8: 04d93c1c eorv d28, p7, z0.d
+ 7cc: 044820f0 smaxv h16, p0, z7.h
+ 7d0: 040a2fac sminv b12, p3, z29.b
+ 7d4: 65873975 fminv s21, p6, z11.s
+ 7d8: 65c62886 fmaxv d6, p2, z4.d
+ 7dc: 65d820e7 fadda d7, p0, d7, z7.d
+ 7e0: 04013fac uaddv d12, p7, z29.b
*/
static const unsigned int insns[] =
{
- 0x8b0772d3, 0xcb4a3570, 0xab9c09bb, 0xeb9aa794,
- 0x0b934e68, 0x4b0a3924, 0x2b1e3568, 0x6b132720,
- 0x8a154c14, 0xaa1445d5, 0xca01cf99, 0xea8b3f6a,
- 0x0a8c5cb9, 0x2a4a11d2, 0x4a855aa4, 0x6a857415,
- 0x8aa697da, 0xaa6d7423, 0xca29bf80, 0xea3cb8bd,
- 0x0a675249, 0x2ab961ba, 0x4a331899, 0x6a646345,
- 0x11055267, 0x31064408, 0x51028e9d, 0x710bdee8,
- 0x91082d81, 0xb106a962, 0xd10b33ae, 0xf10918ab,
- 0x121102d7, 0x3204cd44, 0x5204cf00, 0x72099fb3,
- 0x92729545, 0xb20e37cc, 0xd27c34be, 0xf27e4efa,
- 0x14000000, 0x17ffffd7, 0x1400017f, 0x94000000,
- 0x97ffffd4, 0x9400017c, 0x3400000c, 0x34fffa2c,
- 0x34002f2c, 0x35000014, 0x35fff9d4, 0x35002ed4,
- 0xb400000c, 0xb4fff96c, 0xb4002e6c, 0xb5000018,
- 0xb5fff918, 0xb5002e18, 0x10000006, 0x10fff8a6,
- 0x10002da6, 0x90000015, 0x36080001, 0x360ff821,
- 0x36082d21, 0x37480008, 0x374ff7c8, 0x37482cc8,
- 0x128b50ec, 0x52a9ff8b, 0x7281d095, 0x92edfebd,
- 0xd28361e3, 0xf2a4cc96, 0x9346590c, 0x33194f33,
- 0x531d3d89, 0x9350433c, 0xb34464ac, 0xd3462140,
- 0x139a61a4, 0x93d87fd7, 0x54000000, 0x54fff5a0,
- 0x54002aa0, 0x54000001, 0x54fff541, 0x54002a41,
- 0x54000002, 0x54fff4e2, 0x540029e2, 0x54000002,
- 0x54fff482, 0x54002982, 0x54000003, 0x54fff423,
- 0x54002923, 0x54000003, 0x54fff3c3, 0x540028c3,
- 0x54000004, 0x54fff364, 0x54002864, 0x54000005,
- 0x54fff305, 0x54002805, 0x54000006, 0x54fff2a6,
- 0x540027a6, 0x54000007, 0x54fff247, 0x54002747,
- 0x54000008, 0x54fff1e8, 0x540026e8, 0x54000009,
- 0x54fff189, 0x54002689, 0x5400000a, 0x54fff12a,
- 0x5400262a, 0x5400000b, 0x54fff0cb, 0x540025cb,
- 0x5400000c, 0x54fff06c, 0x5400256c, 0x5400000d,
- 0x54fff00d, 0x5400250d, 0x5400000e, 0x54ffefae,
- 0x540024ae, 0x5400000f, 0x54ffef4f, 0x5400244f,
- 0xd4063721, 0xd4035082, 0xd400bfe3, 0xd4282fc0,
- 0xd444c320, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
- 0xd5033fdf, 0xd5033f9f, 0xd5033abf, 0xd61f0040,
- 0xd63f00a0, 0xc8147c55, 0xc805fcfd, 0xc85f7e05,
- 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88157cf8,
- 0x8815ff9a, 0x885f7cd5, 0x885fffcf, 0x889ffc73,
- 0x88dffc56, 0x48127c0f, 0x480bff85, 0x485f7cdd,
- 0x485ffcf2, 0x489fff99, 0x48dffe62, 0x080a7c3e,
- 0x0814fed5, 0x085f7c59, 0x085ffcb8, 0x089ffc70,
- 0x08dfffb6, 0xc87f0a68, 0xc87fcdc7, 0xc82870bb,
- 0xc825b8c8, 0x887f12d9, 0x887fb9ed, 0x8834215a,
- 0x8837ca52, 0xf806317e, 0xb81b3337, 0x39000dc2,
- 0x78005149, 0xf84391f4, 0xb85b220c, 0x385fd356,
- 0x785d127e, 0x389f4149, 0x79801e3c, 0x79c014a3,
- 0xb89a5231, 0xfc5ef282, 0xbc5f60f6, 0xfc12125e,
- 0xbc0152cd, 0xf8190e49, 0xb800befd, 0x381ffd92,
- 0x781e9e90, 0xf8409fa3, 0xb8413c79, 0x385fffa1,
- 0x785c7fa8, 0x389f3dc5, 0x78801f6a, 0x78c19d4b,
- 0xb89a4ec4, 0xfc408eeb, 0xbc436e79, 0xfc152ce1,
- 0xbc036f28, 0xf8025565, 0xb80135f8, 0x381ff74f,
- 0x781fa652, 0xf851a447, 0xb85e557b, 0x385e7472,
- 0x785e070a, 0x38804556, 0x78819591, 0x78dc24e8,
- 0xb89cd6d7, 0xfc430738, 0xbc5f6595, 0xfc1225b2,
- 0xbc1d7430, 0xf82fcac2, 0xb83d6a02, 0x382e5a54,
- 0x7834fa66, 0xf86ecbae, 0xb86cda90, 0x3860d989,
- 0x78637a2c, 0x38a3fa22, 0x78b15827, 0x78f2d9f9,
- 0xb8ac6ab7, 0xfc6879a5, 0xbc767943, 0xfc3bc84e,
- 0xbc3968d4, 0xf91fc0fe, 0xb91da50f, 0x391d280b,
- 0x791d2e23, 0xf95bc8e2, 0xb95ce525, 0x395ae53c,
- 0x795c9282, 0x399d7dd6, 0x799fe008, 0x79de9bc0,
- 0xb99aae78, 0xfd597598, 0xbd5d1d08, 0xfd1f3dea,
- 0xbd1a227a, 0x5800148a, 0x18000003, 0xf88092e0,
- 0xd8ffdf00, 0xf8a84860, 0xf99d7560, 0x1a1c012d,
- 0x3a1c027b, 0x5a060253, 0x7a03028e, 0x9a0801d0,
- 0xba0803a0, 0xda140308, 0xfa00038c, 0x0b3010d7,
- 0x2b37ab39, 0xcb2466da, 0x6b33efb1, 0x8b350fcb,
- 0xab208a70, 0xcb39e52b, 0xeb2c9291, 0x3a4bd1a3,
- 0x7a4c81a2, 0xba42106c, 0xfa5560e3, 0x3a4e3844,
- 0x7a515a26, 0xba4c2940, 0xfa52aaae, 0x1a8cc1b5,
- 0x1a8f976a, 0x5a8981a0, 0x5a9a6492, 0x9a8793ac,
- 0x9a9474e6, 0xda83d2b6, 0xda9b9593, 0x5ac00200,
- 0x5ac006f1, 0x5ac009d1, 0x5ac013d8, 0x5ac016d8,
- 0xdac00223, 0xdac005ac, 0xdac00ac9, 0xdac00c00,
- 0xdac01205, 0xdac016d9, 0x1ac0089d, 0x1add0fa0,
- 0x1ad52225, 0x1ad22529, 0x1ac82b61, 0x1acd2e92,
- 0x9acc0b28, 0x9adc0ca7, 0x9adb2225, 0x9ad42757,
- 0x9adc291c, 0x9ac42fa3, 0x1b1a55d1, 0x1b0bafc1,
- 0x9b067221, 0x9b1ea0de, 0x9b2e20d5, 0x9b38cd4a,
- 0x9bae6254, 0x9ba59452, 0x1e2d0a48, 0x1e3c19c2,
- 0x1e3c298f, 0x1e213980, 0x1e240baf, 0x1e77082c,
- 0x1e72191b, 0x1e6b2a97, 0x1e723988, 0x1e770b1a,
- 0x1f0d66f5, 0x1f01b956, 0x1f227a8e, 0x1f365ba7,
- 0x1f4f14ad, 0x1f45a98e, 0x1f60066a, 0x1f620054,
- 0x1e204139, 0x1e20c094, 0x1e214363, 0x1e21c041,
- 0x1e22c01e, 0x1e60408c, 0x1e60c361, 0x1e6142c8,
- 0x1e61c16b, 0x1e624396, 0x1e3802dc, 0x9e380374,
- 0x1e78000e, 0x9e78017a, 0x1e2202dc, 0x9e220150,
- 0x1e6202a8, 0x9e620395, 0x1e260318, 0x9e660268,
- 0x1e270188, 0x9e6700e6, 0x1e3023c0, 0x1e6b2320,
- 0x1e202168, 0x1e602168, 0x2910323d, 0x297449d6,
- 0x6948402b, 0xa9072f40, 0xa9410747, 0x29801f0a,
- 0x29e07307, 0x69e272b9, 0xa9bf49d4, 0xa9c529a8,
- 0x28b0605a, 0x28e866a2, 0x68ee0ab1, 0xa886296c,
- 0xa8fe1a38, 0x282479c3, 0x286e534f, 0xa8386596,
- 0xa8755a3b, 0x1e601000, 0x1e603000, 0x1e621000,
- 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
- 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
- 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
- 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
- 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
- 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
- 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
- 0x1e7e3000,
+ 0x8b0d82fa, 0xcb49970c, 0xab889dfc, 0xeb9ee787,
+ 0x0b9b3ec9, 0x4b9279a3, 0x2b88474e, 0x6b8c56c0,
+ 0x8a1a51e0, 0xaa11f4ba, 0xca0281b8, 0xea918c7c,
+ 0x0a5d4a19, 0x2a4b264d, 0x4a523ca5, 0x6a9b6ae2,
+ 0x8a70b79b, 0xaaba9728, 0xca6dfe3d, 0xea627f1c,
+ 0x0aa70f53, 0x2aaa0f06, 0x4a6176a4, 0x6a604eb0,
+ 0x1105ed91, 0x3100583e, 0x5101f8bd, 0x710f0306,
+ 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
+ 0x120cb166, 0x321764bc, 0x52174681, 0x720c0247,
+ 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
+ 0x14000000, 0x17ffffd7, 0x140001cf, 0x94000000,
+ 0x97ffffd4, 0x940001cc, 0x3400000a, 0x34fffa2a,
+ 0x3400392a, 0x35000008, 0x35fff9c8, 0x350038c8,
+ 0xb400000b, 0xb4fff96b, 0xb400386b, 0xb500001d,
+ 0xb5fff91d, 0xb500381d, 0x10000013, 0x10fff8b3,
+ 0x100037b3, 0x90000013, 0x36300016, 0x3637f836,
+ 0x36303736, 0x3758000c, 0x375ff7cc, 0x375836cc,
+ 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
+ 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
+ 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
+ 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
+ 0x540034a0, 0x54000001, 0x54fff541, 0x54003441,
+ 0x54000002, 0x54fff4e2, 0x540033e2, 0x54000002,
+ 0x54fff482, 0x54003382, 0x54000003, 0x54fff423,
+ 0x54003323, 0x54000003, 0x54fff3c3, 0x540032c3,
+ 0x54000004, 0x54fff364, 0x54003264, 0x54000005,
+ 0x54fff305, 0x54003205, 0x54000006, 0x54fff2a6,
+ 0x540031a6, 0x54000007, 0x54fff247, 0x54003147,
+ 0x54000008, 0x54fff1e8, 0x540030e8, 0x54000009,
+ 0x54fff189, 0x54003089, 0x5400000a, 0x54fff12a,
+ 0x5400302a, 0x5400000b, 0x54fff0cb, 0x54002fcb,
+ 0x5400000c, 0x54fff06c, 0x54002f6c, 0x5400000d,
+ 0x54fff00d, 0x54002f0d, 0x5400000e, 0x54ffefae,
+ 0x54002eae, 0x5400000f, 0x54ffef4f, 0x54002e4f,
+ 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
+ 0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
+ 0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
+ 0xd63f0280, 0xc80a7d1b, 0xc800fea1, 0xc85f7fb1,
+ 0xc85fff9d, 0xc89ffee1, 0xc8dffe95, 0x88167e7b,
+ 0x880bfcd0, 0x885f7c12, 0x885ffd44, 0x889ffed8,
+ 0x88dffe6a, 0x48017fc5, 0x4808fe2c, 0x485f7dc9,
+ 0x485ffc27, 0x489ffe05, 0x48dffd82, 0x080a7c6c,
+ 0x081cff4e, 0x085f7d5e, 0x085ffeae, 0x089ffd2d,
+ 0x08dfff76, 0xc87f4d7c, 0xc87fcc5e, 0xc8220417,
+ 0xc82cb5f0, 0x887f55b2, 0x887ff90b, 0x88382c2d,
+ 0x883aedb5, 0xf819928b, 0xb803e21c, 0x381f713b,
+ 0x781ce322, 0xf850f044, 0xb85e129e, 0x385e92f2,
+ 0x785ff35d, 0x39801921, 0x7881318b, 0x78dce02b,
+ 0xb8829313, 0xfc45f318, 0xbc5d50af, 0xfc001375,
+ 0xbc1951b7, 0xf8008c0b, 0xb801dc03, 0x38009dcb,
+ 0x781fdf1d, 0xf8570e2d, 0xb85faecc, 0x385f6d8d,
+ 0x785ebea0, 0x38804cf7, 0x789cbce3, 0x78df9cbc,
+ 0xb89eed38, 0xfc40cd6e, 0xbc5bdd93, 0xfc103c14,
+ 0xbc040c08, 0xf81a2784, 0xb81ca4ec, 0x381e855b,
+ 0x7801b506, 0xf853654e, 0xb85d74b0, 0x384095c2,
+ 0x785ec5bc, 0x389e15a9, 0x789dc703, 0x78c06474,
+ 0xb89ff667, 0xfc57e51e, 0xbc4155f9, 0xfc05a6ee,
+ 0xbc1df408, 0xf835da4a, 0xb836d9a4, 0x3833580d,
+ 0x7826cb6c, 0xf8706900, 0xb87ae880, 0x3865db2e,
+ 0x78724889, 0x38a7789b, 0x78beca2f, 0x78f6c810,
+ 0xb8bef956, 0xfc6afabd, 0xbc734963, 0xfc3d5b8d,
+ 0xbc25fbb7, 0xf9189d05, 0xb91ecb1d, 0x39187a33,
+ 0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
+ 0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
+ 0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
+ 0xbd1b1869, 0x58001e9b, 0x1800000b, 0xf8945060,
+ 0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
+ 0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
+ 0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b12,
+ 0x2b2a278d, 0xcb22aa0f, 0x6b2d29bd, 0x8b2cce8c,
+ 0xab2b877e, 0xcb21c8ee, 0xeb3ba47d, 0x3a4d400e,
+ 0x7a5232c6, 0xba5e624e, 0xfa53814c, 0x3a52d8c2,
+ 0x7a4d8924, 0xba4b3aab, 0xfa4d7882, 0x1a96804c,
+ 0x1a912618, 0x5a90b0e6, 0x5a96976b, 0x9a9db06a,
+ 0x9a9b374c, 0xda95c14f, 0xda89c6fe, 0x5ac0015e,
+ 0x5ac005fd, 0x5ac00bdd, 0x5ac012b9, 0x5ac01404,
+ 0xdac002b2, 0xdac0061d, 0xdac00a95, 0xdac00e66,
+ 0xdac0107e, 0xdac01675, 0x1ac00b0b, 0x1ace0f3b,
+ 0x1ad221c3, 0x1ad825e7, 0x1ad92a3c, 0x1adc2f42,
+ 0x9ada0b25, 0x9ad20e1b, 0x9acc22a6, 0x9acc2480,
+ 0x9adc2a3b, 0x9ad22c5c, 0x1b0e39ea, 0x1b0fcf23,
+ 0x9b1010ae, 0x9b048b3a, 0x9b3d4582, 0x9b2390e8,
+ 0x9bba6499, 0x9ba0ea24, 0x1e2f0af1, 0x1e311b95,
+ 0x1e23295b, 0x1e3938e0, 0x1e2f08c9, 0x1e6a09fd,
+ 0x1e671a22, 0x1e77296b, 0x1e773ba7, 0x1e6b0b6e,
+ 0x1f18308b, 0x1f14adcf, 0x1f2b31bc, 0x1f3a3bd7,
+ 0x1f4a1da9, 0x1f4f8fa5, 0x1f6f798b, 0x1f73523e,
+ 0x1e2040fb, 0x1e20c2a9, 0x1e214122, 0x1e21c0fb,
+ 0x1e22c3dd, 0x1e604031, 0x1e60c0c2, 0x1e61406a,
+ 0x1e61c178, 0x1e624027, 0x1e38000b, 0x9e380243,
+ 0x1e7800dc, 0x9e7800d6, 0x1e220360, 0x9e22005a,
+ 0x1e6200e5, 0x9e62017c, 0x1e2601b9, 0x9e6602eb,
+ 0x1e270113, 0x9e6702b2, 0x1e342320, 0x1e722260,
+ 0x1e202048, 0x1e6023a8, 0x29025668, 0x29403e86,
+ 0x6966387b, 0xa93b316a, 0xa97e38e7, 0x298e5980,
+ 0x29c61d0e, 0x69c00930, 0xa9bc7434, 0xa9c530b5,
+ 0x28b26378, 0x28c25a5c, 0x68f419b1, 0xa8b668bc,
+ 0xa8f15746, 0x280453cd, 0x284c2cb1, 0xa83a534d,
+ 0xa87b32fd, 0x05a08020, 0x04b0e3e0, 0x0470e7e1,
+ 0x042f9c20, 0x043f9c35, 0x047f9c20, 0x04ff9c20,
+ 0x04299420, 0x04319160, 0x0461943e, 0x04a19020,
+ 0x042053ff, 0x047f5401, 0x25208028, 0x2538cfe0,
+ 0x2578d001, 0x25b8efe2, 0x25f8f007, 0xa400a3e0,
+ 0xa4a8a7ea, 0xa547a814, 0xa4084ffe, 0xa55c53e0,
+ 0xa5e1540b, 0xe400fbf6, 0xe408ffff, 0xe547e400,
+ 0xe4014be0, 0xe4a84fe0, 0xe5f25000, 0x858043e0,
+ 0x85a043ff, 0xe59f5d08, 0x1e601000, 0x1e603000,
+ 0x1e621000, 0x1e623000, 0x1e641000, 0x1e643000,
+ 0x1e661000, 0x1e663000, 0x1e681000, 0x1e683000,
+ 0x1e6a1000, 0x1e6a3000, 0x1e6c1000, 0x1e6c3000,
+ 0x1e6e1000, 0x1e6e3000, 0x1e701000, 0x1e703000,
+ 0x1e721000, 0x1e723000, 0x1e741000, 0x1e743000,
+ 0x1e761000, 0x1e763000, 0x1e781000, 0x1e783000,
+ 0x1e7a1000, 0x1e7a3000, 0x1e7c1000, 0x1e7c3000,
+ 0x1e7e1000, 0x1e7e3000, 0x04bb020e, 0x04ba04c0,
+ 0x6586019b, 0x6593089e, 0x65c2060b, 0x04d6a18f,
+ 0x040016e9, 0x0490835e, 0x045aaa44, 0x04938579,
+ 0x0411990a, 0x04101624, 0x0497ad3e, 0x04deae80,
+ 0x04481c77, 0x044a0960, 0x04c118ab, 0x049caa30,
+ 0x6580834f, 0x658d9e6a, 0x65c68238, 0x65c791fa,
+ 0x65c28a38, 0x049db7be, 0x6582b552, 0x65c0abde,
+ 0x6581bbc6, 0x65cdb854, 0x658197a9, 0x65f60872,
+ 0x65ec29af, 0x65be43cc, 0x65e06ea7, 0x04544b53,
+ 0x04d57c30, 0x04323095, 0x04a7324c, 0x046d31f9,
+ 0x04da30eb, 0x04d8252b, 0x04d93c1c, 0x044820f0,
+ 0x040a2fac, 0x65873975, 0x65c62886, 0x65d820e7,
+ 0x04013fac,
};
// END Generated code -- do not edit
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
index 80ddb9b31..f554b5e15 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -139,6 +139,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21);
// Java stack pointer
REGISTER_DECLARATION(Register, esp, r20);
+// Preserved predicate register with all elements set TRUE.
+REGISTER_DECLARATION(PRegister, ptrue, p7);
+
#define assert_cond(ARG1) assert(ARG1, #ARG1)
namespace asm_util {
@@ -273,6 +276,14 @@ public:
f(r->encoding_nocheck(), lsb + 4, lsb);
}
+ void prf(PRegister r, int lsb) {
+ f(r->encoding_nocheck(), lsb + 3, lsb);
+ }
+
+ void pgrf(PRegister r, int lsb) {
+ f(r->encoding_nocheck(), lsb + 2, lsb);
+ }
+
unsigned get(int msb = 31, int lsb = 0) {
int nbits = msb - lsb + 1;
unsigned mask = ((1U << nbits) - 1) << lsb;
@@ -554,6 +565,18 @@ class Address {
void lea(MacroAssembler *, Register) const;
static bool offset_ok_for_immed(long offset, uint shift);
+
+ static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) {
+ if (offset % vl == 0) {
+ // Convert address offset into sve imm offset (MUL VL).
+ int sve_offset = offset / vl;
+ if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
+ // sve_offset can be encoded
+ return true;
+ }
+ }
+ return false;
+ }
};
// Convience classes
@@ -596,7 +619,9 @@ class InternalAddress: public Address {
InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
};
-const int FPUStateSizeInWords = 32 * 2;
+const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers *
+ FloatRegisterImpl::save_slots_per_register;
+
typedef enum {
PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM,
@@ -667,6 +692,12 @@ public:
void rf(FloatRegister reg, int lsb) {
current->rf(reg, lsb);
}
+ void prf(PRegister reg, int lsb) {
+ current->prf(reg, lsb);
+ }
+ void pgrf(PRegister reg, int lsb) {
+ current->pgrf(reg, lsb);
+ }
void fixed(unsigned value, unsigned mask) {
current->fixed(value, mask);
}
@@ -2228,21 +2259,27 @@ public:
#undef INSN
-#define INSN(NAME, opc, opc2) \
+#define INSN(NAME, opc, opc2, accepted) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
+ guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \
+ if (accepted < 3) guarantee(T != T2D, "incorrect arrangement"); \
+ if (accepted < 2) guarantee(T != T2S, "incorrect arrangement"); \
+ if (accepted < 1) guarantee(T == T8B || T == T16B, "incorrect arrangement"); \
starti; \
f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \
f((int)T >> 1, 23, 22), f(opc2, 21, 10); \
rf(Vn, 5), rf(Vd, 0); \
}
- INSN(absr, 0, 0b100000101110);
- INSN(negr, 1, 0b100000101110);
- INSN(notr, 1, 0b100000010110);
- INSN(addv, 0, 0b110001101110);
- INSN(cls, 0, 0b100000010010);
- INSN(clz, 1, 0b100000010010);
- INSN(cnt, 0, 0b100000010110);
+ INSN(absr, 0, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+ INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+ INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
+ INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
+ INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
+ INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
#undef INSN
@@ -2376,13 +2413,18 @@ public:
f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
}
- void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {
- starti;
- f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21);
- f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10);
- rf(Vn, 5), rf(Rd, 0);
+#define INSN(NAME, op) \
+ void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \
+ starti; \
+ f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); \
+ f(((idx<<1)|1)<<(int)T, 20, 16), f(op, 15, 10); \
+ rf(Vn, 5), rf(Rd, 0); \
}
+ INSN(umov, 0b001111);
+ INSN(smov, 0b001011);
+#undef INSN
+
#define INSN(NAME, opc, opc2, isSHR) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
starti; \
@@ -2582,13 +2624,299 @@ public:
#undef INSN
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
- {
+{
+ starti;
+ assert(T == T8B || T == T16B, "invalid arrangement");
+ assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
+ f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
+ rf(Vm, 16), f(0, 15), f(index, 14, 11);
+ f(0, 10), rf(Vn, 5), rf(Vd, 0);
+}
+
+// SVE arithmetics - unpredicated
+#define INSN(NAME, opcode) \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
+ starti; \
+ assert(T != Q, "invalid register variant"); \
+ f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \
+ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
+ }
+ INSN(sve_add, 0b000);
+ INSN(sve_sub, 0b001);
+#undef INSN
+
+// SVE floating-point arithmetic - unpredicated
+#define INSN(NAME, opcode) \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
+ starti; \
+ assert(T == S || T == D, "invalid register variant"); \
+ f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \
+ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
+ }
+
+ INSN(sve_fadd, 0b000);
+ INSN(sve_fmul, 0b010);
+ INSN(sve_fsub, 0b001);
+#undef INSN
+
+private:
+ void sve_predicate_reg_insn(unsigned op24, unsigned op13,
+ FloatRegister Zd_or_Vd, SIMD_RegVariant T,
+ PRegister Pg, FloatRegister Zn_or_Vn) {
+ starti;
+ f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
+ pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
+ }
+
+public:
+
+// SVE integer arithmetics - predicate
+#define INSN(NAME, op1, op2) \
+ void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \
+ assert(T != Q, "invalid register variant"); \
+ sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \
+ }
+
+ INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary
+ INSN(sve_add, 0b00000100, 0b000000000); // vector add
+ INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
+ INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
+ INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits
+ INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
+ INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
+ INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left
+ INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right
+ INSN(sve_mul, 0b00000100, 0b010000000); // vector mul
+ INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary
+ INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary
+ INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar
+ INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
+ INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
+ INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
+ INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
+ INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
+ INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
+#undef INSN
+
+// SVE floating-point arithmetics - predicate
+#define INSN(NAME, op1, op2) \
+ void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
+ assert(T == S || T == D, "invalid register variant"); \
+ sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \
+ }
+
+ INSN(sve_fabs, 0b00000100, 0b011100101);
+ INSN(sve_fadd, 0b01100101, 0b000000100);
+ INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
+ INSN(sve_fdiv, 0b01100101, 0b001101100);
+ INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum
+ INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
+ INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum
+ INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
+ INSN(sve_fmul, 0b01100101, 0b000010100);
+ INSN(sve_fneg, 0b00000100, 0b011101101);
+ INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
+ INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
+ INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
+ INSN(sve_fsqrt, 0b01100101, 0b001101101);
+ INSN(sve_fsub, 0b01100101, 0b000001100);
+#undef INSN
+
+ // SVE multiple-add/sub - predicated
+#define INSN(NAME, op0, op1, op2) \
+ void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
+ starti; \
+ assert(T != Q, "invalid size"); \
+ f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \
+ f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \
+ }
+
+ INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm
+ INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
+ INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
+ INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
+ INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
+ INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
+#undef INSN
+
+// SVE bitwise logical - unpredicated
+#define INSN(NAME, opc) \
+ void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \
+ starti; \
+ f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \
+ rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \
+ }
+ INSN(sve_and, 0b00);
+ INSN(sve_eor, 0b10);
+ INSN(sve_orr, 0b01);
+#undef INSN
+
+// SVE shift immediate - unpredicated
+#define INSN(NAME, opc, isSHR) \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
+ starti; \
+ /* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \
+ * for shift right is calculated as: \
+ * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \
+ * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \
+ * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \
+ * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \
+ * for shift left is calculated as: \
+ * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \
+ * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \
+ * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \
+ * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \
+ */ \
+ assert(T != Q, "Invalid register variant"); \
+ if (isSHR) { \
+ assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \
+ } else { \
+ assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \
+ } \
+ int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \
+ int encodedShift = isSHR ? cVal - shift : cVal + shift; \
+ int tszh = encodedShift >> 5; \
+ int tszl_imm = encodedShift & 0x1f; \
+ f(0b00000100, 31, 24); \
+ f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \
+ f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \
+ }
+
+ INSN(sve_asr, 0b100, /* isSHR = */ true);
+ INSN(sve_lsl, 0b111, /* isSHR = */ false);
+ INSN(sve_lsr, 0b101, /* isSHR = */ true);
+#undef INSN
+
+private:
+
+ // Scalar base + immediate index
+ void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
+ SIMD_RegVariant T, int op1, int type, int op2) {
+ starti;
+ assert_cond(T >= type);
+ f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
+ f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
+ pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
+ }
+
+ // Scalar base + scalar index
+ void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
+ SIMD_RegVariant T, int op1, int type, int op2) {
+ starti;
+ assert_cond(T >= type);
+ f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
+ rf(Xm, 16), f(op2, 15, 13);
+ pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
+ }
+
+ void sve_ld_st1(FloatRegister Zt, PRegister Pg,
+ SIMD_RegVariant T, const Address &a,
+ int op1, int type, int imm_op2, int scalar_op2) {
+ switch (a.getMode()) {
+ case Address::base_plus_offset:
+ sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
+ break;
+ case Address::base_plus_offset_reg:
+ sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+public:
+
+// SVE load/store - predicated
+#define INSN(NAME, op1, type, imm_op2, scalar_op2) \
+ void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \
+ assert(T != Q, "invalid register variant"); \
+ sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \
+ }
+
+ INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
+ INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
+ INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
+ INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
+ INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
+ INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
+ INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
+ INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
+#undef INSN
+
+// SVE load/store - unpredicated
+#define INSN(NAME, op1) \
+ void NAME(FloatRegister Zt, const Address &a) { \
+ starti; \
+ assert(a.index() == noreg, "invalid address variant"); \
+ f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \
+ f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
+ }
+
+ INSN(sve_ldr, 0b100); // LDR (vector)
+ INSN(sve_str, 0b111); // STR (vector)
+#undef INSN
+
+#define INSN(NAME, op) \
+ void NAME(Register Xd, Register Xn, int imm6) { \
+ starti; \
+ f(0b000001000, 31, 23), f(op, 22, 21); \
+ srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
+ }
+
+ INSN(sve_addvl, 0b01);
+ INSN(sve_addpl, 0b11);
+#undef INSN
+
+// SVE inc/dec register by element count
+#define INSN(NAME, op) \
+ void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
+ starti; \
+ assert(T != Q, "invalid size"); \
+ f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); \
+ f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0); \
+ }
+
+ INSN(sve_inc, 0);
+ INSN(sve_dec, 1);
+#undef INSN
+
+ // SVE predicate count
+ void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
+ starti;
+ assert(T != Q, "invalid size");
+ f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
+ prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
+ }
+
+ // SVE dup scalar
+ void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
+ starti;
+ assert(T != Q, "invalid size");
+ f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
+ srf(Rn, 5), rf(Zd, 0);
+ }
+
+ // SVE dup imm
+ void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
+ starti;
+ assert(T != Q, "invalid size");
+ int sh = 0;
+ if (imm8 <= 127 && imm8 >= -128) {
+ sh = 0;
+ } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
+ sh = 1;
+ imm8 = (imm8 >> 8);
+ } else {
+ guarantee(false, "invalid immediate");
+ }
+ f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
+ f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
+ }
+
+ void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
starti;
- assert(T == T8B || T == T16B, "invalid arrangement");
- assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
- f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
- rf(Vm, 16), f(0, 15), f(index, 14, 11);
- f(0, 10), rf(Vn, 5), rf(Vd, 0);
+ f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
+ f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
}
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
index 6ac54f257..a258528ea 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
@@ -456,8 +456,12 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z
ZSetupArguments setup_arguments(masm, stub);
__ mov(rscratch1, stub->slow_path());
__ blr(rscratch1);
+ if (UseSVE > 0) {
+ // Reinitialize the ptrue predicate register, in case the external runtime
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
+ __ reinitialize_ptrue();
+ }
}
-
// Stub exit
__ b(*stub->continuation());
}
diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
index 071845e5b..f26ea2a8b 100644
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
@@ -112,6 +112,9 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Avoid generating unaligned memory accesses") \
product(bool, UseLSE, false, \
"Use LSE instructions") \
+ product(uint, UseSVE, 0, \
+ "Highest supported SVE instruction set version") \
+ range(0, 2) \
product(bool, UseBlockZeroing, true, \
"Use DC ZVA for block zeroing") \
product(intx, BlockZeroingLowLimit, 256, \
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 241197075..431c5f005 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -50,6 +50,9 @@
#include "runtime/jniHandles.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/thread.hpp"
+#ifdef COMPILER2
+#include "opto/matcher.hpp"
+#endif
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
@@ -2098,8 +2098,17 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
}
// Push lots of registers in the bit set supplied. Don't push sp.
-// Return the number of words pushed
+// Return the number of dwords pushed
int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
+ int words_pushed = 0;
+ bool use_sve = false;
+ int sve_vector_size_in_bytes = 0;
+
+#ifdef COMPILER2
+ use_sve = Matcher::supports_scalable_vector();
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+#endif
+
// Scan bitset to accumulate register pairs
unsigned char regs[32];
int count = 0;
@@ -2114,8 +2123,18 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return 0;
}
+ // SVE
+ if (use_sve && sve_vector_size_in_bytes > 16) {
+ sub(stack, stack, sve_vector_size_in_bytes * count);
+ for (int i = 0; i < count; i++) {
+ sve_str(as_FloatRegister(regs[i]), Address(stack, i));
+ }
+ return count * sve_vector_size_in_bytes / 8;
+ }
+
add(stack, stack, -count * wordSize * 2);
+ // NEON
if (count & 1) {
strq(as_FloatRegister(regs[0]), Address(stack));
i += 1;
@@ -2128,7 +2147,16 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return count;
}
+// Return the number of dwords poped
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
+ int words_pushed = 0;
+ bool use_sve = false;
+ int sve_vector_size_in_bytes = 0;
+
+#ifdef COMPILER2
+ use_sve = Matcher::supports_scalable_vector();
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+#endif
// Scan bitset to accumulate register pairs
unsigned char regs[32];
int count = 0;
@@ -2143,6 +2171,16 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
return 0;
}
+ // SVE
+ if (use_sve && sve_vector_size_in_bytes > 16) {
+ for (int i = count - 1; i >= 0; i--) {
+ sve_ldr(as_FloatRegister(regs[i]), Address(stack, i));
+ }
+ add(stack, stack, sve_vector_size_in_bytes * count);
+ return count * sve_vector_size_in_bytes / 8;
+ }
+
+ // NEON
if (count & 1) {
ldrq(as_FloatRegister(regs[0]), Address(stack));
i += 1;
@@ -2616,23 +2654,39 @@ void MacroAssembler::pop_call_clobbered_registers() {
pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
}
-void MacroAssembler::push_CPU_state(bool save_vectors) {
- int step = (save_vectors ? 8 : 4) * wordSize;
+void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
+ int sve_vector_size_in_bytes) {
push(0x3fffffff, sp); // integer registers except lr & sp
- mov(rscratch1, -step);
- sub(sp, sp, step);
- for (int i = 28; i >= 4; i -= 4) {
- st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
- as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
+ if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) {
+ sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
+ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
+ sve_str(as_FloatRegister(i), Address(sp, i));
+ }
+ } else {
+ int step = (save_vectors ? 8 : 4) * wordSize;
+ mov(rscratch1, -step);
+ sub(sp, sp, step);
+ for (int i = 28; i >= 4; i -= 4) {
+ st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+ as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
+ }
+ st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
}
- st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
}
-void MacroAssembler::pop_CPU_state(bool restore_vectors) {
- int step = (restore_vectors ? 8 : 4) * wordSize;
- for (int i = 0; i <= 28; i += 4)
- ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
- as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
+void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve,
+ int sve_vector_size_in_bytes) {
+ if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) {
+ for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) {
+ sve_ldr(as_FloatRegister(i), Address(sp, i));
+ }
+ add(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
+ } else {
+ int step = (restore_vectors ? 8 : 4) * wordSize;
+ for (int i = 0; i <= 28; i += 4)
+ ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+ as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
+ }
pop(0x3fffffff, sp); // integer registers except lr & sp
}
@@ -2681,6 +2735,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp)
return Address(base, offset);
}
+Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) {
+ assert(offset >= 0, "spill to negative address?");
+
+ Register base = sp;
+
+ // An immediate offset in the range 0 to 255 which is multiplied
+ // by the current vector or predicate register size in bytes.
+ if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) {
+ return Address(base, offset / sve_reg_size_in_bytes);
+ }
+
+ add(tmp, base, offset);
+ return Address(tmp);
+}
+
// Checks whether offset is aligned.
// Returns true if it is, else false.
bool MacroAssembler::merge_alignment_check(Register base,
@@ -5843,3 +5912,24 @@ void MacroAssembler::get_thread(Register dst) {
pop(saved_regs, sp);
}
+
+void MacroAssembler::verify_sve_vector_length() {
+ Label verify_ok;
+ assert(UseSVE > 0, "should only be used for SVE");
+ movw(rscratch1, zr);
+ sve_inc(rscratch1, B);
+ subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length());
+ br(EQ, verify_ok);
+ stop("Error: SVE vector length has changed since jvm startup");
+ bind(verify_ok);
+}
+
+void MacroAssembler::verify_ptrue() {
+ Label verify_ok;
+ assert(UseSVE > 0, "should only be used for SVE");
+ sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count.
+ sve_dec(rscratch1, B);
+ cbz(rscratch1, verify_ok);
+ stop("Error: the preserved predicate register (p7) elements are not all true");
+ bind(verify_ok);
+}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index 014a4d3c6..9fb98c010 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -862,8 +862,10 @@ public:
DEBUG_ONLY(void verify_heapbase(const char* msg);)
- void push_CPU_state(bool save_vectors = false);
- void pop_CPU_state(bool restore_vectors = false) ;
+ void push_CPU_state(bool save_vectors = false, bool use_sve = false,
+ int sve_vector_size_in_bytes = 0);
+ void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
+ int sve_vector_size_in_bytes = 0);
// Round up to a power of two
void round_to(Register reg, int modulus);
@@ -938,6 +940,11 @@ public:
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+ void verify_sve_vector_length();
+ void reinitialize_ptrue() {
+ sve_ptrue(ptrue, B);
+ }
+ void verify_ptrue();
// Debugging
@@ -1307,6 +1314,7 @@ private:
// Returns an address on the stack which is reachable with a ldr/str of size
// Uses rscratch2 if the address is not directly reachable
Address spill_address(int size, int offset, Register tmp=rscratch2);
+ Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
bool merge_alignment_check(Register base, size_t size, long cur_offset, long prev_offset) const;
@@ -1330,6 +1338,9 @@ public:
void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
str(Vx, T, spill_address(1 << (int)T, offset));
}
+ void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
+ sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
+ }
void unspill(Register Rx, bool is64, int offset) {
if (is64) {
ldr(Rx, spill_address(8, offset));
@@ -1340,6 +1351,9 @@ public:
void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
ldr(Vx, T, spill_address(1 << (int)T, offset));
}
+ void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
+ sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
+ }
void spill_copy128(int src_offset, int dst_offset,
Register tmp1=rscratch1, Register tmp2=rscratch2) {
if (src_offset < 512 && (src_offset & 7) == 0 &&
@@ -1353,6 +1367,15 @@ public:
spill(tmp1, true, dst_offset+8);
}
}
+ void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset,
+ int sve_vec_reg_size_in_bytes) {
+ assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size");
+ for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) {
+ spill_copy128(src_offset, dst_offset);
+ src_offset += 16;
+ dst_offset += 16;
+ }
+ }
};
#ifdef ASSERT
diff --git a/src/hotspot/cpu/aarch64/register_aarch64.cpp b/src/hotspot/cpu/aarch64/register_aarch64.cpp
index 30924e8a5..3db8e8337 100644
--- a/src/hotspot/cpu/aarch64/register_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,10 +26,15 @@
#include "precompiled.hpp"
#include "register_aarch64.hpp"
-const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
+ RegisterImpl::max_slots_per_register;
const int ConcreteRegisterImpl::max_fpr
- = ConcreteRegisterImpl::max_gpr + (FloatRegisterImpl::number_of_registers << 1);
+ = ConcreteRegisterImpl::max_gpr +
+ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
+
+const int ConcreteRegisterImpl::max_pr
+ = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers;
const char* RegisterImpl::name() const {
const char* names[number_of_registers] = {
@@ -52,3 +57,10 @@ const char* FloatRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "noreg";
}
+
+const char* PRegisterImpl::name() const {
+ const char* names[number_of_registers] = {
+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7"
+ };
+ return is_valid() ? names[encoding()] : "noreg";
+}
diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp
index 5f7662c89..c211b39ee 100644
--- a/src/hotspot/cpu/aarch64/register_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -44,7 +44,8 @@ class RegisterImpl: public AbstractRegisterImpl {
enum {
number_of_registers = 32,
number_of_byte_registers = 32,
- number_of_registers_for_jvmci = 34 // Including SP and ZR.
+ number_of_registers_for_jvmci = 34, // Including SP and ZR.
+ max_slots_per_register = 2
};
// derived registers, offsets, and addresses
@@ -127,7 +128,11 @@ inline FloatRegister as_FloatRegister(int encoding) {
class FloatRegisterImpl: public AbstractRegisterImpl {
public:
enum {
- number_of_registers = 32
+ number_of_registers = 32,
+ max_slots_per_register = 8,
+ save_slots_per_register = 2,
+ slots_per_neon_register = 4,
+ extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register
};
// construction
@@ -183,6 +188,80 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29));
CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30));
CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31));
+// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0].
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31));
+
+
+class PRegisterImpl;
+typedef PRegisterImpl* PRegister;
+inline PRegister as_PRegister(int encoding) {
+ return (PRegister)(intptr_t)encoding;
+}
+
+// The implementation of predicate registers for the architecture
+class PRegisterImpl: public AbstractRegisterImpl {
+ public:
+ enum {
+ number_of_registers = 8,
+ max_slots_per_register = 1
+ };
+
+ // construction
+ inline friend PRegister as_PRegister(int encoding);
+
+ VMReg as_VMReg();
+
+ // derived registers, offsets, and addresses
+ PRegister successor() const { return as_PRegister(encoding() + 1); }
+
+ // accessors
+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+ int encoding_nocheck() const { return (intptr_t)this; }
+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+ const char* name() const;
+};
+
+// The predicate registers of SVE.
+CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0));
+CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1));
+CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2));
+CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3));
+CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4));
+CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5));
+CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6));
+CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7));
+
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
@@ -193,14 +272,16 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
// There is no requirement that any ordering here matches any ordering c2 gives
// it's optoregs.
- number_of_registers = (2 * RegisterImpl::number_of_registers +
- 4 * FloatRegisterImpl::number_of_registers +
+ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
+ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
+ PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers +
1) // flags
};
// added to make it compile
static const int max_gpr;
static const int max_fpr;
+ static const int max_pr;
};
// A set of registers
diff --git a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
index c18109087..e337f582a 100644
--- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -154,3 +154,47 @@ REGISTER_DEFINITION(Register, rthread);
REGISTER_DEFINITION(Register, rheapbase);
REGISTER_DEFINITION(Register, r31_sp);
+
+REGISTER_DEFINITION(FloatRegister, z0);
+REGISTER_DEFINITION(FloatRegister, z1);
+REGISTER_DEFINITION(FloatRegister, z2);
+REGISTER_DEFINITION(FloatRegister, z3);
+REGISTER_DEFINITION(FloatRegister, z4);
+REGISTER_DEFINITION(FloatRegister, z5);
+REGISTER_DEFINITION(FloatRegister, z6);
+REGISTER_DEFINITION(FloatRegister, z7);
+REGISTER_DEFINITION(FloatRegister, z8);
+REGISTER_DEFINITION(FloatRegister, z9);
+REGISTER_DEFINITION(FloatRegister, z10);
+REGISTER_DEFINITION(FloatRegister, z11);
+REGISTER_DEFINITION(FloatRegister, z12);
+REGISTER_DEFINITION(FloatRegister, z13);
+REGISTER_DEFINITION(FloatRegister, z14);
+REGISTER_DEFINITION(FloatRegister, z15);
+REGISTER_DEFINITION(FloatRegister, z16);
+REGISTER_DEFINITION(FloatRegister, z17);
+REGISTER_DEFINITION(FloatRegister, z18);
+REGISTER_DEFINITION(FloatRegister, z19);
+REGISTER_DEFINITION(FloatRegister, z20);
+REGISTER_DEFINITION(FloatRegister, z21);
+REGISTER_DEFINITION(FloatRegister, z22);
+REGISTER_DEFINITION(FloatRegister, z23);
+REGISTER_DEFINITION(FloatRegister, z24);
+REGISTER_DEFINITION(FloatRegister, z25);
+REGISTER_DEFINITION(FloatRegister, z26);
+REGISTER_DEFINITION(FloatRegister, z27);
+REGISTER_DEFINITION(FloatRegister, z28);
+REGISTER_DEFINITION(FloatRegister, z29);
+REGISTER_DEFINITION(FloatRegister, z30);
+REGISTER_DEFINITION(FloatRegister, z31);
+
+REGISTER_DEFINITION(PRegister, p0);
+REGISTER_DEFINITION(PRegister, p1);
+REGISTER_DEFINITION(PRegister, p2);
+REGISTER_DEFINITION(PRegister, p3);
+REGISTER_DEFINITION(PRegister, p4);
+REGISTER_DEFINITION(PRegister, p5);
+REGISTER_DEFINITION(PRegister, p6);
+REGISTER_DEFINITION(PRegister, p7);
+
+REGISTER_DEFINITION(PRegister, ptrue);
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index da2bc6b05..05cc32e7e 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -98,42 +98,60 @@ class RegisterSaver {
// Capture info about frame layout
enum layout {
fpu_state_off = 0,
- fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
+ fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
// The frame sender code expects that rfp will be in
// the "natural" place and will override any oopMap
// setting for it. We must therefore force the layout
// so that it agrees with the frame sender code.
- r0_off = fpu_state_off+FPUStateSizeInWords,
- rfp_off = r0_off + 30 * 2,
- return_off = rfp_off + 2, // slot for return address
- reg_save_size = return_off + 2};
+ r0_off = fpu_state_off + FPUStateSizeInWords,
+ rfp_off = r0_off + (RegisterImpl::number_of_registers - 2) * RegisterImpl::max_slots_per_register,
+ return_off = rfp_off + RegisterImpl::max_slots_per_register, // slot for return address
+ reg_save_size = return_off + RegisterImpl::max_slots_per_register};
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
+ bool use_sve = false;
+ int sve_vector_size_in_bytes = 0;
+ int sve_vector_size_in_slots = 0;
+
+#ifdef COMPILER2
+ use_sve = Matcher::supports_scalable_vector();
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+ sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
+#endif
+
#if COMPILER2_OR_JVMCI
if (save_vectors) {
+ int vect_words = 0;
+ int extra_save_slots_per_register = 0;
// Save upper half of vector registers
- int vect_words = 32 * 8 / wordSize;
+ if (use_sve) {
+ extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
+ } else {
+ extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
+ }
+ vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
+ VMRegImpl::slots_per_word;
additional_frame_words += vect_words;
}
#else
assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
#endif
- int frame_size_in_bytes = align_up(additional_frame_words*wordSize +
- reg_save_size*BytesPerInt, 16);
+ int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
+ reg_save_size * BytesPerInt, 16);
// OopMap frame size is in compiler stack slots (jint's) not bytes or words
int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
// The caller will allocate additional_frame_words
- int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
+ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
// CodeBlob frame size is in words.
int frame_size_in_words = frame_size_in_bytes / wordSize;
*total_frame_words = frame_size_in_words;
// Save Integer and Float registers.
__ enter();
- __ push_CPU_state(save_vectors);
+ __ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
// Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This
@@ -146,10 +164,10 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
Register r = as_Register(i);
if (r < rheapbase && r != rscratch1 && r != rscratch2) {
- int sp_offset = 2 * (i + 32); // SP offsets are in 4-byte words,
- // register slots are 8 bytes
- // wide, 32 floating-point
- // registers
+ // SP offsets are in 4-byte words.
+ // Register slots are 8 bytes wide, 32 floating-point registers.
+ int sp_offset = RegisterImpl::max_slots_per_register * i +
+ FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers;
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
r->as_VMReg());
}
@@ -157,7 +175,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
FloatRegister r = as_FloatRegister(i);
- int sp_offset = save_vectors ? (4 * i) : (2 * i);
+ int sp_offset = 0;
+ if (save_vectors) {
+ sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
+ (FloatRegisterImpl::slots_per_neon_register * i);
+ } else {
+ sp_offset = FloatRegisterImpl::save_slots_per_register * i;
+ }
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
r->as_VMReg());
}
@@ -166,10 +190,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
-#ifndef COMPILER2
+#ifdef COMPILER2
+ __ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
+ Matcher::scalable_vector_reg_size(T_BYTE));
+#else
+#if !INCLUDE_JVMCI
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
#endif
__ pop_CPU_state(restore_vectors);
+#endif
__ leave();
}
@@ -1855,6 +1884,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
}
+ if (UseSVE > 0) {
+ // Make sure that jni code does not change SVE vector length.
+ __ verify_sve_vector_length();
+ }
+
// check for safepoint operation in progress and/or pending suspend requests
Label safepoint_in_progress, safepoint_in_progress_done;
{
@@ -2785,6 +2819,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
__ maybe_isb();
__ membar(Assembler::LoadLoad | Assembler::LoadStore);
+ if (UseSVE > 0 && save_vectors) {
+ // Reinitialize the ptrue predicate register, in case the external runtime
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
+ __ reinitialize_ptrue();
+ }
+
__ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
__ cbz(rscratch1, noException);
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index 0310463ac..979ff51f8 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -486,6 +486,11 @@ class StubGenerator: public StubCodeGenerator {
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
SharedRuntime::exception_handler_for_return_address),
rthread, c_rarg1);
+ if (UseSVE > 0 ) {
+ // Reinitialize the ptrue predicate register, in case the external runtime
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
+ __ reinitialize_ptrue();
+ }
// we should not really care that lr is no longer the callee
// address. we saved the value the handler needs in r19 so we can
// just copy it to r3. however, the C2 handler will push its own
@@ -4804,6 +4809,12 @@ class StubGenerator: public StubCodeGenerator {
__ reset_last_Java_frame(true);
__ maybe_isb();
+ if (UseSVE > 0) {
+ // Reinitialize the ptrue predicate register, in case the external runtime
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
+ __ reinitialize_ptrue();
+ }
+
__ leave();
// check for pending exceptions
diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
index 6e4eb1a7a..1bb12d24f 100644
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@@ -1377,6 +1377,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ push(dtos);
__ push(ltos);
+ if (UseSVE > 0) {
+ // Make sure that jni code does not change SVE vector length.
+ __ verify_sve_vector_length();
+ }
+
// change thread state
__ mov(rscratch1, _thread_in_native_trans);
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
index 04ae1167d..8f2c95e8b 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@@ -29,13 +29,15 @@
#include "memory/resourceArea.hpp"
#include "runtime/java.hpp"
#include "runtime/stubCodeGenerator.hpp"
+#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
#include "vm_version_aarch64.hpp"
#include OS_HEADER_INLINE(os)
-#include <sys/auxv.h>
#include <asm/hwcap.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
#ifndef HWCAP_AES
#define HWCAP_AES (1<<3)
@@ -61,12 +63,27 @@
#define HWCAP_ATOMICS (1<<8)
#endif
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22)
+#endif
+
+#ifndef HWCAP2_SVE2
+#define HWCAP2_SVE2 (1 << 1)
+#endif
+
+#ifndef PR_SVE_GET_VL
+// For old toolchains which do not have SVE related macros defined.
+#define PR_SVE_SET_VL 50
+#define PR_SVE_GET_VL 51
+#endif
+
int VM_Version::_cpu;
int VM_Version::_model;
int VM_Version::_model2;
int VM_Version::_variant;
int VM_Version::_revision;
int VM_Version::_stepping;
+int VM_Version::_initial_sve_vector_length;
VM_Version::PsrInfo VM_Version::_psr_info = { 0, };
static BufferBlob* stub_blob;
@@ -160,6 +177,7 @@ void VM_Version::get_processor_features() {
}
unsigned long auxv = getauxval(AT_HWCAP);
+ unsigned long auxv2 = getauxval(AT_HWCAP2);
char buf[512];
@@ -250,6 +268,8 @@ void VM_Version::get_processor_features() {
if (auxv & HWCAP_SHA1) strcat(buf, ", sha1");
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
+ if (auxv & HWCAP_SVE) strcat(buf, ", sve");
+ if (auxv2 & HWCAP2_SVE2) strcat(buf, ", sve2");
_features_string = os::strdup(buf);
@@ -379,6 +399,18 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseBlockZeroing, false);
}
+ if (auxv & HWCAP_SVE) {
+ if (FLAG_IS_DEFAULT(UseSVE)) {
+ FLAG_SET_DEFAULT(UseSVE, (auxv2 & HWCAP2_SVE2) ? 2 : 1);
+ }
+ if (UseSVE > 0) {
+ _initial_sve_vector_length = prctl(PR_SVE_GET_VL);
+ }
+ } else if (UseSVE > 0) {
+ warning("UseSVE specified, but not supported on current CPU. Disabling SVE.");
+ FLAG_SET_DEFAULT(UseSVE, 0);
+ }
+
// This machine allows unaligned memory accesses
if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
@@ -411,6 +443,50 @@ void VM_Version::get_processor_features() {
UseMontgomerySquareIntrinsic = true;
}
+ if (UseSVE > 0) {
+ if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+ MaxVectorSize = _initial_sve_vector_length;
+ } else if (MaxVectorSize < 16) {
+ warning("SVE does not support vector length less than 16 bytes. Disabling SVE.");
+ UseSVE = 0;
+ } else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) {
+ int new_vl = prctl(PR_SVE_SET_VL, MaxVectorSize);
+ _initial_sve_vector_length = new_vl;
+ // If MaxVectorSize is larger than system largest supported SVE vector length, above prctl()
+ // call will set task vector length to the system largest supported value. So, we also update
+ // MaxVectorSize to that largest supported value.
+ if (new_vl < 0) {
+ vm_exit_during_initialization(
+ err_msg("Current system does not support SVE vector length for MaxVectorSize: %d",
+ (int)MaxVectorSize));
+ } else if (new_vl != MaxVectorSize) {
+ warning("Current system only supports max SVE vector length %d. Set MaxVectorSize to %d",
+ new_vl, new_vl);
+ }
+ MaxVectorSize = new_vl;
+ } else {
+ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
+ }
+ }
+
+ if (UseSVE == 0) { // NEON
+ int min_vector_size = 8;
+ int max_vector_size = 16;
+ if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
+ if (!is_power_of_2(MaxVectorSize)) {
+ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
+ } else if (MaxVectorSize < min_vector_size) {
+ warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
+ FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
+ } else if (MaxVectorSize > max_vector_size) {
+ warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
+ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
+ }
+ } else {
+ FLAG_SET_DEFAULT(MaxVectorSize, 16);
+ }
+ }
+
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(OptoScheduling)) {
OptoScheduling = true;
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
index 0a17f3e73..23c3c1338 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
@@ -40,6 +40,7 @@ protected:
static int _variant;
static int _revision;
static int _stepping;
+ static int _initial_sve_vector_length;
struct PsrInfo {
uint32_t dczid_el0;
@@ -101,6 +102,7 @@ public:
static int cpu_model2() { return _model2; }
static int cpu_variant() { return _variant; }
static int cpu_revision() { return _revision; }
+ static int get_initial_sve_vector_length() { return _initial_sve_vector_length; };
static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); }
static bool is_zva_enabled() {
diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.cpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.cpp
index 9fd20be0f..35d0adf5b 100644
--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.cpp
@@ -33,15 +33,17 @@ void VMRegImpl::set_regName() {
Register reg = ::as_Register(0);
int i;
for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
- regName[i++] = reg->name();
- regName[i++] = reg->name();
+ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
+ regName[i++] = reg->name();
+ }
reg = reg->successor();
}
FloatRegister freg = ::as_FloatRegister(0);
for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
- regName[i++] = freg->name();
- regName[i++] = freg->name();
+ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
+ regName[i++] = freg->name();
+ }
freg = freg->successor();
}
diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.hpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.hpp
index 0b1d000bb..c249c26a8 100644
--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.hpp
@@ -38,13 +38,14 @@ inline Register as_Register() {
assert( is_Register(), "must be");
// Yuk
- return ::as_Register(value() >> 1);
+ return ::as_Register(value() / RegisterImpl::max_slots_per_register);
}
inline FloatRegister as_FloatRegister() {
assert( is_FloatRegister() && is_even(value()), "must be" );
// Yuk
- return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
+ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
+ FloatRegisterImpl::max_slots_per_register);
}
inline bool is_concrete() {
diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
index 145f9797f..dde7a7a91 100644
--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
+++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,11 +28,16 @@
inline VMReg RegisterImpl::as_VMReg() {
if( this==noreg ) return VMRegImpl::Bad();
- return VMRegImpl::as_VMReg(encoding() << 1 );
+ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
}
inline VMReg FloatRegisterImpl::as_VMReg() {
- return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
+ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
+ ConcreteRegisterImpl::max_gpr);
+}
+
+inline VMReg PRegisterImpl::as_VMReg() {
+ return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr);
}
#endif // CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 18e81bdc5..87e5f331b 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// TODO
// identify extra cases that we might want to provide match rules for
@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
return MaxVectorSize;
}
+const bool Matcher::supports_scalable_vector() {
+ return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return -1;
+}
+
// Vector ideal reg corresponding to specified size in bytes
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize >= size, "");
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index 07bda6d71..4cbe2cf5c 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -2242,7 +2242,7 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// TODO
// identify extra cases that we might want to provide match rules for
@@ -2310,6 +2310,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return max_vector_size(bt); // Same as max.
}
+const bool Matcher::supports_scalable_vector() {
+ return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return -1;
+}
+
// PPC implementation uses VSX load/store instructions (if
// SuperwordUseVSX) which support 4 byte but not arbitrary alignment
const bool Matcher::misaligned_vectors_ok() {
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index 96c231b0a..782c1c7c4 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -1522,7 +1522,7 @@ const bool Matcher::match_rule_supported(int opcode) {
// BUT: make sure match rule is not disabled by a false predicate!
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// TODO
// Identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
@@ -1573,6 +1573,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return max_vector_size(bt); // Same as max.
}
+const bool Matcher::supports_scalable_vector() {
+ return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return -1;
+}
+
const uint Matcher::vector_shift_count_ideal_reg(int size) {
fatal("vector shift is not supported");
return Node::NotAMachineReg;
diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad
index a09c795c9..3b1b1046e 100644
--- a/src/hotspot/cpu/sparc/sparc.ad
+++ b/src/hotspot/cpu/sparc/sparc.ad
@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// TODO
// identify extra cases that we might want to provide match rules for
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 8fb9a3e34..dc5f1ecf9 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -1341,7 +1341,7 @@ const bool Matcher::match_rule_supported(int opcode) {
return ret_value; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
@@ -1468,6 +1468,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return MIN2(size,max_size);
}
+const bool Matcher::supports_scalable_vector() {
+ return false;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return -1;
+}
+
// Vector ideal reg corresponding to specified size in bytes
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize >= size, "");
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index c2d1aca0c..0db8e6a14 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -2887,7 +2887,7 @@ frame
RAX_H_num // Op_RegL
};
// Excluded flags and vector registers.
- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
+ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
%}
%}
diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp
index ba61aa4c0..9e41b2dc6 100644
--- a/src/hotspot/share/adlc/archDesc.cpp
+++ b/src/hotspot/share/adlc/archDesc.cpp
@@ -1,5 +1,5 @@
//
-// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
// Match Vector types.
if (strncmp(idealOp, "Vec",3)==0) {
switch(last_char) {
+ case 'A': return "TypeVect::VECTA";
case 'S': return "TypeVect::VECTS";
case 'D': return "TypeVect::VECTD";
case 'X': return "TypeVect::VECTX";
@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
}
}
+ if (strncmp(idealOp, "RegVMask", 8) == 0) {
+ return "Type::BOTTOM";
+ }
+
// !!!!!
switch(last_char) {
case 'I': return "TypeInt::INT";
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index 5ba1fdc57..45826d3b2 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -3946,6 +3946,8 @@ bool MatchRule::is_base_register(FormDict &globals) const {
strcmp(opType,"RegL")==0 ||
strcmp(opType,"RegF")==0 ||
strcmp(opType,"RegD")==0 ||
+ strcmp(opType,"RegVMask")==0 ||
+ strcmp(opType,"VecA")==0 ||
strcmp(opType,"VecS")==0 ||
strcmp(opType,"VecD")==0 ||
strcmp(opType,"VecX")==0 ||
diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp
index 914dc43f6..710af9de8 100644
--- a/src/hotspot/share/opto/chaitin.cpp
+++ b/src/hotspot/share/opto/chaitin.cpp
@@ -77,6 +77,7 @@ void LRG::dump() const {
if( _is_oop ) tty->print("Oop ");
if( _is_float ) tty->print("Float ");
if( _is_vector ) tty->print("Vector ");
+ if( _is_scalable ) tty->print("Scalable ");
if( _was_spilled1 ) tty->print("Spilled ");
if( _was_spilled2 ) tty->print("Spilled2 ");
if( _direct_conflict ) tty->print("Direct_conflict ");
@@ -646,7 +647,15 @@ void PhaseChaitin::Register_Allocate() {
// Live ranges record the highest register in their mask.
// We want the low register for the AD file writer's convenience.
OptoReg::Name hi = lrg.reg(); // Get hi register
- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
+ int num_regs = lrg.num_regs();
+ if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
+ // For scalable vector registers, when they are allocated in physical
+ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
+ // vector. If they are allocated on stack, we need to get the actual
+ // num_regs, which reflects the physical length of scalable registers.
+ num_regs = lrg.scalable_reg_slots();
+ }
+ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
// We have to use pair [lo,lo+1] even for wide vectors because
// the rest of code generation works only with pairs. It is safe
// since for registers encoding only 'lo' is used.
@@ -801,8 +810,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// Check for vector live range (only if vector register is used).
// On SPARC vector uses RegD which could be misaligned so it is not
// processes as vector in RA.
- if (RegMask::is_vector(ireg))
+ if (RegMask::is_vector(ireg)) {
lrg._is_vector = 1;
+ if (ireg == Op_VecA) {
+ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
+ lrg._is_scalable = 1;
+ // For scalable vector, when it is allocated in physical register,
+ // num_regs is RegMask::SlotsPerVecA for reg mask,
+ // which may not be the actual physical register size.
+ // If it is allocated in stack, we need to get the actual
+ // physical length of scalable vector register.
+ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
+ }
+ }
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
"vector must be in vector registers");
@@ -912,6 +932,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
lrg.set_reg_pressure(1);
#endif
break;
+ case Op_VecA:
+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
+ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
+ lrg.set_num_regs(RegMask::SlotsPerVecA);
+ lrg.set_reg_pressure(1);
+ break;
case Op_VecS:
assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
@@ -1358,6 +1385,46 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
return false;
}
+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
+ int num_regs = lrg.num_regs();
+ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
+
+ if (lrg.is_scalable()) {
+ // a physical register is found
+ if (chunk == 0 && OptoReg::is_reg(assigned)) {
+ return assigned;
+ }
+
+ // find available stack slots for scalable register
+ if (lrg._is_vector) {
+ num_regs = lrg.scalable_reg_slots();
+ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
+ if (num_regs == RegMask::SlotsPerVecA) {
+ return assigned;
+ }
+
+ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
+ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
+ // instead of SlotsPerVecA bits.
+ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
+ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
+ // Verify the found reg has scalable_reg_slots() bits set.
+ if (mask.is_valid_reg(assigned, num_regs)) {
+ return assigned;
+ } else {
+ // Remove more for each iteration
+ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
+ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
+ assigned = mask.find_first_set(lrg, num_regs);
+ }
+ }
+ return OptoReg::Bad; // will cause chunk change, and retry next chunk
+ }
+ }
+
+ return assigned;
+}
+
// Choose a color using the biasing heuristic
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
@@ -1391,7 +1458,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
RegMask tempmask = lrg.mask();
tempmask.AND(lrgs(copy_lrg).mask());
tempmask.clear_to_sets(lrg.num_regs());
- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
if (OptoReg::is_valid(reg))
return reg;
}
@@ -1400,7 +1467,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
// If no bias info exists, just go with the register selection ordering
if (lrg._is_vector || lrg.num_regs() == 2) {
// Find an aligned set
- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
+ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
}
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
@@ -1455,7 +1522,6 @@ uint PhaseChaitin::Select( ) {
LRG *lrg = &lrgs(lidx);
_simplified = lrg->_next;
-
#ifndef PRODUCT
if (trace_spilling()) {
ttyLocker ttyl;
@@ -1539,7 +1605,6 @@ uint PhaseChaitin::Select( ) {
// Bump register mask up to next stack chunk
chunk += RegMask::CHUNK_SIZE;
lrg->Set_All();
-
goto retry_next_chunk;
}
@@ -1564,12 +1629,21 @@ uint PhaseChaitin::Select( ) {
int n_regs = lrg->num_regs();
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
if (n_regs == 1 || !lrg->_fat_proj) {
- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+ if (Matcher::supports_scalable_vector()) {
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
+ } else {
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+ }
lrg->Clear(); // Clear the mask
lrg->Insert(reg); // Set regmask to match selected reg
// For vectors and pairs, also insert the low bit of the pair
- for (int i = 1; i < n_regs; i++)
+ // We always choose the high bit, then mask the low bits by register size
+ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
+ n_regs = lrg->scalable_reg_slots();
+ }
+ for (int i = 1; i < n_regs; i++) {
lrg->Insert(OptoReg::add(reg,-i));
+ }
lrg->set_mask_size(n_regs);
} else { // Else fatproj
// mask must be equal to fatproj bits, by definition
diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp
index 776e3cf63..674791c64 100644
--- a/src/hotspot/share/opto/chaitin.hpp
+++ b/src/hotspot/share/opto/chaitin.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -115,7 +115,9 @@ public:
_msize_valid=1;
if (_is_vector) {
assert(!_fat_proj, "sanity");
- _mask.verify_sets(_num_regs);
+ if (!(_is_scalable && OptoReg::is_stack(_reg))) {
+ _mask.verify_sets(_num_regs);
+ }
} else if (_num_regs == 2 && !_fat_proj) {
_mask.verify_pairs();
}
@@ -139,14 +141,37 @@ public:
void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
- // Number of registers this live range uses when it colors
private:
+ // Number of registers this live range uses when it colors
uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else
// except _num_regs is kill count for fat_proj
+
+ // For scalable register, num_regs may not be the actual physical register size.
+ // We need to get the actual physical length of scalable register when scalable
+ // register is spilled. The size of one slot is 32-bit.
+ uint _scalable_reg_slots; // Actual scalable register length of slots.
+ // Meaningful only when _is_scalable is true.
public:
int num_regs() const { return _num_regs; }
void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
+ uint scalable_reg_slots() { return _scalable_reg_slots; }
+ void set_scalable_reg_slots(uint slots) {
+ assert(_is_scalable, "scalable register");
+ assert(slots > 0, "slots of scalable register is not valid");
+ _scalable_reg_slots = slots;
+ }
+
+ bool is_scalable() {
+#ifdef ASSERT
+ if (_is_scalable) {
+ // Should only be a vector for now, but it could also be a RegVMask in future.
+ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
+ }
+#endif
+ return _is_scalable;
+ }
+
private:
// Number of physical registers this live range uses when it colors
// Architecture and register-set dependent
@@ -172,6 +197,8 @@ public:
uint _is_oop:1, // Live-range holds an oop
_is_float:1, // True if in float registers
_is_vector:1, // True if in vector registers
+ _is_scalable:1, // True if register size is scalable
+ // e.g. Arm SVE vector/predicate registers.
_was_spilled1:1, // True if prior spilling on def
_was_spilled2:1, // True if twice prior spilling on def
_is_bound:1, // live range starts life with no
diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
index 05fdab21e..14e5425b8 100644
--- a/src/hotspot/share/opto/matcher.cpp
+++ b/src/hotspot/share/opto/matcher.cpp
@@ -84,6 +84,7 @@ Matcher::Matcher()
idealreg2spillmask [Op_RegF] = NULL;
idealreg2spillmask [Op_RegD] = NULL;
idealreg2spillmask [Op_RegP] = NULL;
+ idealreg2spillmask [Op_VecA] = NULL;
idealreg2spillmask [Op_VecS] = NULL;
idealreg2spillmask [Op_VecD] = NULL;
idealreg2spillmask [Op_VecX] = NULL;
@@ -97,6 +98,7 @@ Matcher::Matcher()
idealreg2debugmask [Op_RegF] = NULL;
idealreg2debugmask [Op_RegD] = NULL;
idealreg2debugmask [Op_RegP] = NULL;
+ idealreg2debugmask [Op_VecA] = NULL;
idealreg2debugmask [Op_VecS] = NULL;
idealreg2debugmask [Op_VecD] = NULL;
idealreg2debugmask [Op_VecX] = NULL;
@@ -110,6 +112,7 @@ Matcher::Matcher()
idealreg2mhdebugmask[Op_RegF] = NULL;
idealreg2mhdebugmask[Op_RegD] = NULL;
idealreg2mhdebugmask[Op_RegP] = NULL;
+ idealreg2mhdebugmask[Op_VecA] = NULL;
idealreg2mhdebugmask[Op_VecS] = NULL;
idealreg2mhdebugmask[Op_VecD] = NULL;
idealreg2mhdebugmask[Op_VecX] = NULL;
@@ -417,6 +420,8 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
return rms;
}
+#define NOF_STACK_MASKS (3*6+6)
+
//---------------------------init_first_stack_mask-----------------------------
// Create the initial stack mask used by values spilling to the stack.
// Disallow any debug info in outgoing argument areas by setting the
@@ -424,7 +429,12 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
void Matcher::init_first_stack_mask() {
// Allocate storage for spill masks as masks for the appropriate load type.
- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5));
+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * NOF_STACK_MASKS);
+
+ // Initialize empty placeholder masks into the newly allocated arena
+ for (int i = 0; i < NOF_STACK_MASKS; i++) {
+ new (rms + i) RegMask();
+ }
idealreg2spillmask [Op_RegN] = &rms[0];
idealreg2spillmask [Op_RegI] = &rms[1];
@@ -447,11 +457,12 @@ void Matcher::init_first_stack_mask() {
idealreg2mhdebugmask[Op_RegD] = &rms[16];
idealreg2mhdebugmask[Op_RegP] = &rms[17];
- idealreg2spillmask [Op_VecS] = &rms[18];
- idealreg2spillmask [Op_VecD] = &rms[19];
- idealreg2spillmask [Op_VecX] = &rms[20];
- idealreg2spillmask [Op_VecY] = &rms[21];
- idealreg2spillmask [Op_VecZ] = &rms[22];
+ idealreg2spillmask [Op_VecA] = &rms[18];
+ idealreg2spillmask [Op_VecS] = &rms[19];
+ idealreg2spillmask [Op_VecD] = &rms[20];
+ idealreg2spillmask [Op_VecX] = &rms[21];
+ idealreg2spillmask [Op_VecY] = &rms[22];
+ idealreg2spillmask [Op_VecZ] = &rms[23];
OptoReg::Name i;
@@ -478,6 +489,7 @@ void Matcher::init_first_stack_mask() {
// Keep spill masks aligned.
aligned_stack_mask.clear_to_pairs();
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+ RegMask scalable_stack_mask = aligned_stack_mask;
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
#ifdef _LP64
@@ -548,28 +560,48 @@ void Matcher::init_first_stack_mask() {
*idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
}
- if (UseFPUForSpilling) {
- // This mask logic assumes that the spill operations are
- // symmetric and that the registers involved are the same size.
- // On sparc for instance we may have to use 64 bit moves will
- // kill 2 registers when used with F0-F31.
- idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
- idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
+
+ if (Matcher::supports_scalable_vector()) {
+ int k = 1;
+ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
+ // Exclude last input arg stack slots to avoid spilling vector register there,
+ // otherwise vector spills could stomp over stack slots in caller frame.
+ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
+ scalable_stack_mask.Remove(in);
+ in = OptoReg::add(in, -1);
+ }
+
+ // For VecA
+ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
+ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
+ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
+ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
+ } else {
+ *idealreg2spillmask[Op_VecA] = RegMask::Empty;
+ }
+
+ if (UseFPUForSpilling) {
+ // This mask logic assumes that the spill operations are
+ // symmetric and that the registers involved are the same size.
+ // On sparc for instance we may have to use 64 bit moves will
+ // kill 2 registers when used with F0-F31.
+ idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
+ idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
#ifdef _LP64
- idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
- idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
- idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
- idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
+ idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
+ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
#else
- idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
+ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
#ifdef ARM
- // ARM has support for moving 64bit values between a pair of
- // integer registers and a double register
- idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
- idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+ // ARM has support for moving 64bit values between a pair of
+ // integer registers and a double register
+ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
#endif
#endif
- }
+ }
// Make up debug masks. Any spill slot plus callee-save registers.
// Caller-save registers are assumed to be trashable by the various
@@ -872,6 +904,10 @@ void Matcher::init_spill_mask( Node *ret ) {
idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
// Vector regmasks.
+ if (Matcher::supports_scalable_vector()) {
+ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA));
+ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask();
+ }
if (Matcher::vector_size_supported(T_BYTE,4)) {
TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
@@ -1573,7 +1609,6 @@ Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem)
}
}
-
// Call DFA to match this node, and return
svec->DFA( n->Opcode(), n );
diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
index 244e3d1f8..9a8307102 100644
--- a/src/hotspot/share/opto/matcher.hpp
+++ b/src/hotspot/share/opto/matcher.hpp
@@ -310,7 +310,7 @@ public:
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
- static const bool match_rule_supported_vector(int opcode, int vlen);
+ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt);
// Some microarchitectures have mask registers used on vectors
static const bool has_predicated_vectors(void);
@@ -333,6 +333,10 @@ public:
Matcher::min_vector_size(bt) <= size);
}
+ static const bool supports_scalable_vector();
+ // Actual max scalable vector register length.
+ static const int scalable_vector_reg_size(const BasicType bt);
+
// Vector ideal reg
static const uint vector_ideal_reg(int len);
static const uint vector_shift_count_ideal_reg(int len);
diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp
index e31e8d847..1a826d8ba 100644
--- a/src/hotspot/share/opto/opcodes.cpp
+++ b/src/hotspot/share/opto/opcodes.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -38,12 +38,14 @@ const char *NodeClassNames[] = {
"RegF",
"RegD",
"RegL",
- "RegFlags",
+ "VecA",
"VecS",
"VecD",
"VecX",
"VecY",
"VecZ",
+ "RegVMask",
+ "RegFlags",
"_last_machine_leaf",
#include "classes.hpp"
"_last_class_name",
diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp
index ae3d61ce0..ec96ba055 100644
--- a/src/hotspot/share/opto/opcodes.hpp
+++ b/src/hotspot/share/opto/opcodes.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -37,11 +37,13 @@ enum Opcodes {
macro(RegF) // Machine float register
macro(RegD) // Machine double register
macro(RegL) // Machine long register
+ macro(VecA) // Machine vectora register
macro(VecS) // Machine vectors register
macro(VecD) // Machine vectord register
macro(VecX) // Machine vectorx register
macro(VecY) // Machine vectory register
macro(VecZ) // Machine vectorz register
+ macro(RegVMask) // Vector mask/predicate register
macro(RegFlags) // Machine flags register
_last_machine_leaf, // Split between regular opcodes and machine
#include "classes.hpp"
diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp
index d572ac9fe..3514b37bc 100644
--- a/src/hotspot/share/opto/postaloc.cpp
+++ b/src/hotspot/share/opto/postaloc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -266,9 +266,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
Node *val = skip_copies(n->in(k));
if (val == x) return blk_adjust; // No progress?
- int n_regs = RegMask::num_registers(val->ideal_reg());
uint val_idx = _lrg_map.live_range_id(val);
OptoReg::Name val_reg = lrgs(val_idx).reg();
+ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
// See if it happens to already be in the correct register!
// (either Phi's direct register, or the common case of the name
@@ -305,8 +305,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
}
Node *vv = value[reg];
+ // For scalable register, number of registers may be inconsistent between
+ // "val_reg" and "reg". For example, when "val" resides in register
+ // but "reg" is located in stack.
+ if (lrgs(val_idx).is_scalable()) {
+ assert(val->ideal_reg() == Op_VecA, "scalable vector register");
+ if (OptoReg::is_stack(reg)) {
+ n_regs = lrgs(val_idx).scalable_reg_slots();
+ } else {
+ n_regs = RegMask::SlotsPerVecA;
+ }
+ }
if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
- uint last = (n_regs-1); // Looking for the last part of a set
+ uint last;
+ if (lrgs(val_idx).is_scalable()) {
+ assert(val->ideal_reg() == Op_VecA, "scalable vector register");
+ // For scalable vector register, regmask is always SlotsPerVecA bits aligned
+ last = RegMask::SlotsPerVecA - 1;
+ } else {
+ last = (n_regs-1); // Looking for the last part of a set
+ }
if ((reg&last) != last) continue; // Wrong part of a set
if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
}
@@ -591,7 +609,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
uint k;
Node *phi = block->get_node(j);
uint pidx = _lrg_map.live_range_id(phi);
- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
+ OptoReg::Name preg = lrgs(pidx).reg();
// Remove copies remaining on edges. Check for junk phi.
Node *u = NULL;
@@ -619,7 +637,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
if( pidx ) {
value.map(preg,phi);
regnd.map(preg,phi);
- int n_regs = RegMask::num_registers(phi->ideal_reg());
+ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
for (int l = 1; l < n_regs; l++) {
OptoReg::Name preg_lo = OptoReg::add(preg,-l);
value.map(preg_lo,phi);
@@ -663,7 +681,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
regnd.map(ureg, def);
// Record other half of doubles
uint def_ideal_reg = def->ideal_reg();
- int n_regs = RegMask::num_registers(def_ideal_reg);
+ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
for (int l = 1; l < n_regs; l++) {
OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
if (!value[ureg_lo] &&
@@ -707,7 +725,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
}
uint n_ideal_reg = n->ideal_reg();
- int n_regs = RegMask::num_registers(n_ideal_reg);
+ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
if (n_regs == 1) {
// If Node 'n' does not change the value mapped by the register,
// then 'n' is a useless copy. Do not update the register->node
diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp
index 2e04c42eb..dd9b5476b 100644
--- a/src/hotspot/share/opto/regmask.cpp
+++ b/src/hotspot/share/opto/regmask.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "opto/ad.hpp"
+#include "opto/chaitin.hpp"
#include "opto/compile.hpp"
#include "opto/matcher.hpp"
#include "opto/node.hpp"
@@ -116,30 +117,47 @@ const RegMask RegMask::Empty(
//=============================================================================
bool RegMask::is_vector(uint ireg) {
- return (ireg == Op_VecS || ireg == Op_VecD ||
+ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
}
int RegMask::num_registers(uint ireg) {
switch(ireg) {
case Op_VecZ:
- return 16;
+ return SlotsPerVecZ;
case Op_VecY:
- return 8;
+ return SlotsPerVecY;
case Op_VecX:
- return 4;
+ return SlotsPerVecX;
case Op_VecD:
+ return SlotsPerVecD;
case Op_RegD:
case Op_RegL:
#ifdef _LP64
case Op_RegP:
#endif
return 2;
+ case Op_VecA:
+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
+ return SlotsPerVecA;
}
// Op_VecS and the rest ideal registers.
return 1;
}
+int RegMask::num_registers(uint ireg, LRG &lrg) {
+ int n_regs = num_registers(ireg);
+
+ // assigned is OptoReg which is selected by register allocator
+ OptoReg::Name assigned = lrg.reg();
+ assert(OptoReg::is_valid(assigned), "should be valid opto register");
+
+ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
+ n_regs = lrg.scalable_reg_slots();
+ }
+ return n_regs;
+}
+
//------------------------------find_first_pair--------------------------------
// Find the lowest-numbered register pair in the mask. Return the
// HIGHEST register number in the pair, or BAD if no pairs.
@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const {
return true;
}
+// Check that whether given reg number with size is valid
+// for current regmask, where reg is the highest number.
+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
+ for (int i = 0; i < size; i++) {
+ if (!Member(reg - i)) {
+ return false;
+ }
+ }
+ return true;
+}
+
// only indicies of power 2 are accessed, so index 3 is only filled in for storage.
static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
//------------------------------find_first_set---------------------------------
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Works also for size 1.
-OptoReg::Name RegMask::find_first_set(const int size) const {
- verify_sets(size);
+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
+ if (lrg.is_scalable()) {
+ // For scalable vector register, regmask is SlotsPerVecA bits aligned.
+ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
+ } else {
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+ }
for (int i = 0; i < RM_SIZE; i++) {
if (_A[i]) { // Found some bits
int bit = _A[i] & -_A[i]; // Extract low bit
@@ -325,12 +359,16 @@ bool RegMask::is_aligned_sets(const int size) const {
while (bits) { // Check bits for pairing
int bit = bits & -bits; // Extract low bit
// Low bit is not odd means its mis-aligned.
- if ((bit & low_bits_mask) == 0) return false;
+ if ((bit & low_bits_mask) == 0) {
+ return false;
+ }
// Do extra work since (bit << size) may overflow.
int hi_bit = bit << (size-1); // high bit
int set = hi_bit + ((hi_bit-1) & ~(bit-1));
// Check for aligned adjacent bits in this set
- if ((bits & set) != set) return false;
+ if ((bits & set) != set) {
+ return false;
+ }
bits -= set; // Remove this set
}
}
diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
index c64d08795..b733b87ad 100644
--- a/src/hotspot/share/opto/regmask.hpp
+++ b/src/hotspot/share/opto/regmask.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,8 @@
#include "code/vmreg.hpp"
#include "opto/optoreg.hpp"
+class LRG;
+
// Some fun naming (textual) substitutions:
//
// RegMask::get_low_elem() ==> RegMask::find_first_elem()
@@ -95,11 +97,13 @@ public:
// requirement is internal to the allocator, and independent of any
// particular platform.
enum { SlotsPerLong = 2,
+ SlotsPerVecA = 8,
SlotsPerVecS = 1,
SlotsPerVecD = 2,
SlotsPerVecX = 4,
SlotsPerVecY = 8,
- SlotsPerVecZ = 16 };
+ SlotsPerVecZ = 16,
+ };
// A constructor only used by the ADLC output. All mask fields are filled
// in directly. Calls to this look something like RM(1,2,3,4);
@@ -204,10 +208,14 @@ public:
return false;
}
+ // Check that whether given reg number with size is valid
+ // for current regmask, where reg is the highest number.
+ bool is_valid_reg(OptoReg::Name reg, const int size) const;
+
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Assert that the mask contains only bit sets.
- OptoReg::Name find_first_set(const int size) const;
+ OptoReg::Name find_first_set(LRG &lrg, const int size) const;
// Clear out partial bits; leave only aligned adjacent bit sets of size.
void clear_to_sets(const int size);
@@ -226,6 +234,7 @@ public:
static bool is_vector(uint ireg);
static int num_registers(uint ireg);
+ static int num_registers(uint ireg, LRG &lrg);
// Fast overlap test. Non-zero if any registers in common.
int overlap( const RegMask &rm ) const {
diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index e7714ba3e..a6a62ea4a 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -93,8 +93,11 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
//------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
assert(UseSuperWord, "should be");
- // Do vectors exist on this architecture?
- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
+ // SuperWord only works with power of two vector sizes.
+ int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
+ if (vector_width < 2 || !is_power_of_2(vector_width)) {
+ return;
+ }
assert(lpt->_head->is_CountedLoop(), "must be");
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
index 8898a3f00..37ec81995 100644
--- a/src/hotspot/share/opto/type.cpp
+++ b/src/hotspot/share/opto/type.cpp
@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
#else // all other
+ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
@@ -649,6 +650,10 @@ void Type::Initialize_shared(Compile* current) {
// get_zero_type() should not happen for T_CONFLICT
_zero_type[T_CONFLICT]= NULL;
+ if (Matcher::supports_scalable_vector()) {
+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
+ }
+
// Vector predefined types, it needs initialized _const_basic_type[].
if (Matcher::vector_size_supported(T_BYTE,4)) {
TypeVect::VECTS = TypeVect::make(T_BYTE,4);
@@ -665,6 +670,8 @@ void Type::Initialize_shared(Compile* current) {
if (Matcher::vector_size_supported(T_FLOAT,16)) {
TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
}
+
+ mreg2type[Op_VecA] = TypeVect::VECTA;
mreg2type[Op_VecS] = TypeVect::VECTS;
mreg2type[Op_VecD] = TypeVect::VECTD;
mreg2type[Op_VecX] = TypeVect::VECTX;
@@ -984,6 +991,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
Bad, // Tuple - handled in v-call
Bad, // Array - handled in v-call
+ Bad, // VectorA - handled in v-call
Bad, // VectorS - handled in v-call
Bad, // VectorD - handled in v-call
Bad, // VectorX - handled in v-call
@@ -1880,7 +1888,6 @@ const TypeTuple *TypeTuple::LONG_PAIR;
const TypeTuple *TypeTuple::INT_CC_PAIR;
const TypeTuple *TypeTuple::LONG_CC_PAIR;
-
//------------------------------make-------------------------------------------
// Make a TypeTuple from the range of a method signature
const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
@@ -2252,6 +2259,7 @@ bool TypeAry::ary_must_be_exact() const {
//==============================TypeVect=======================================
// Convenience common pre-built types.
+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors
const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
@@ -2262,10 +2270,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
const TypeVect* TypeVect::make(const Type *elem, uint length) {
BasicType elem_bt = elem->array_element_basic_type();
assert(is_java_primitive(elem_bt), "only primitive types in vector");
- assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
int size = length * type2aelembytes(elem_bt);
switch (Matcher::vector_ideal_reg(size)) {
+ case Op_VecA:
+ return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
case Op_VecS:
return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
case Op_RegL:
@@ -2297,7 +2306,7 @@ const Type *TypeVect::xmeet( const Type *t ) const {
default: // All else is a mistake
typerr(t);
-
+ case VectorA:
case VectorS:
case VectorD:
case VectorX:
@@ -2352,6 +2361,8 @@ bool TypeVect::empty(void) const {
#ifndef PRODUCT
void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
switch (base()) {
+ case VectorA:
+ st->print("vectora["); break;
case VectorS:
st->print("vectors["); break;
case VectorD:
diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp
index 6c8194670..ca92fe3ab 100644
--- a/src/hotspot/share/opto/type.hpp
+++ b/src/hotspot/share/opto/type.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -53,6 +53,7 @@ class TypeNarrowKlass;
class TypeAry;
class TypeTuple;
class TypeVect;
+class TypeVectA;
class TypeVectS;
class TypeVectD;
class TypeVectX;
@@ -87,6 +88,7 @@ public:
Tuple, // Method signature or object layout
Array, // Array types
+ VectorA, // (Scalable) Vector types for vector length agnostic
VectorS, // 32bit Vector types
VectorD, // 64bit Vector types
VectorX, // 128bit Vector types
@@ -754,6 +756,7 @@ public:
virtual const Type *xmeet( const Type *t) const;
virtual const Type *xdual() const; // Compute dual right now.
+ static const TypeVect *VECTA;
static const TypeVect *VECTS;
static const TypeVect *VECTD;
static const TypeVect *VECTX;
@@ -765,6 +768,11 @@ public:
#endif
};
+class TypeVectA : public TypeVect {
+ friend class TypeVect;
+ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
+};
+
class TypeVectS : public TypeVect {
friend class TypeVect;
TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
@@ -1611,12 +1619,12 @@ inline const TypeAry *Type::is_ary() const {
}
inline const TypeVect *Type::is_vect() const {
- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
+ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
return (TypeVect*)this;
}
inline const TypeVect *Type::isa_vect() const {
- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
+ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
}
inline const TypePtr *Type::is_ptr() const {
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
index fae147fa8..3a0a42513 100644
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -221,7 +221,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
int vopc = VectorNode::opcode(opc, bt);
- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
+ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt);
}
return false;
}
@@ -608,7 +608,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
int vopc = ReductionNode::opcode(opc, bt);
- return vopc != opc && Matcher::match_rule_supported(vopc);
+ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
}
return false;
}
diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java
new file mode 100644
index 000000000..dc15ca800
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java
@@ -0,0 +1,128 @@
+/*
+* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+* Copyright (c) 2020, Arm Limited. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+/**
+ * @test
+ *
+ * @requires os.arch == "aarch64" & vm.compiler2.enabled
+ * @summary Verify VM SVE checking behavior
+ * @library /test/lib
+ * @run main/othervm/native compiler.c2.aarch64.TestSVEWithJNI
+ *
+ */
+
+package compiler.c2.aarch64;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import jdk.test.lib.process.ProcessTools;
+import jdk.test.lib.process.OutputAnalyzer;
+
+public class TestSVEWithJNI {
+ static {
+ System.loadLibrary("TestSVEWithJNI");
+ }
+
+ static final int EXIT_CODE = 99;
+ // Returns a nonnegative on success, or a negative value on error.
+ public static native int setVectorLength(int arg);
+ // Returns a nonnegative value on success, or a negative value on error.
+ public static native int getVectorLength();
+
+ public static final String MSG = "Current Vector Size: ";
+ public static void testNormal() {
+ int vlen = getVectorLength();
+ System.out.println(MSG + vlen);
+ // Should be fine if no vector length changed.
+ if (setVectorLength(vlen) < 0) {
+ throw new Error("Error in setting vector length.");
+ }
+ }
+
+ public static void testAbort() {
+ int vlen = getVectorLength();
+ if (vlen <= 16) {
+ throw new Error("Error: unsupported vector length.");
+ }
+ if (setVectorLength(16) < 0) {
+ throw new Error("Error: setting vector length failed.");
+ }
+ }
+
+ public static ProcessBuilder createProcessBuilder(String [] args, String mode) {
+ List<String> vmopts = new ArrayList<>();
+ String testjdkPath = System.getProperty("test.jdk");
+ Collections.addAll(vmopts, "-Dtest.jdk=" + testjdkPath);
+ Collections.addAll(vmopts, args);
+ Collections.addAll(vmopts, TestSVEWithJNI.class.getName(), mode);
+ return ProcessTools.createJavaProcessBuilder(vmopts.toArray(new String[vmopts.size()]));
+ }
+
+ public static void main(String [] args) throws Exception {
+ if (args.length == 0) {
+ int vlen = getVectorLength();
+ if (vlen < 0) {
+ return;
+ }
+ String [][] testOpts = {
+ {"-Xint", "-XX:UseSVE=1"},
+ {"-Xcomp", "-XX:UseSVE=1"},
+ };
+ ProcessBuilder pb;
+ OutputAnalyzer output;
+ for (String [] opts : testOpts) {
+ pb = createProcessBuilder(opts, "normal");
+ output = new OutputAnalyzer(pb.start());
+ output.shouldHaveExitValue(EXIT_CODE);
+
+ pb = createProcessBuilder(opts, "abort");
+ output = new OutputAnalyzer(pb.start());
+ output.shouldNotHaveExitValue(EXIT_CODE);
+ output.shouldMatch("(error|Error|ERROR)");
+ }
+
+ // Verify MaxVectorSize
+
+ // Any SVE architecture should support 128-bit vector size.
+ pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=16"}, "normal");
+ output = new OutputAnalyzer(pb.start());
+ output.shouldHaveExitValue(EXIT_CODE);
+ output.shouldContain(MSG + 16);
+
+ // An unsupported large vector size value.
+ pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=512"}, "normal");
+ output = new OutputAnalyzer(pb.start());
+ output.shouldHaveExitValue(EXIT_CODE);
+ output.shouldContain("warning");
+ } else if (args[0].equals("normal")) {
+ testNormal();
+ System.exit(EXIT_CODE);
+ } else if (args[0].equals("abort")) {
+ testAbort();
+ System.exit(EXIT_CODE);
+ }
+ }
+}
diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c b/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c
new file mode 100644
index 000000000..0cb3ab0b5
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c
@@ -0,0 +1,68 @@
+/*
+* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+* Copyright (c) 2020, Arm Limited. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+#ifdef __aarch64__
+
+#include <jni.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#ifndef PR_SVE_GET_VL
+// For old toolchains which do not have SVE related macros defined.
+#define PR_SVE_SET_VL 50
+#define PR_SVE_GET_VL 51
+#endif
+
+int get_current_thread_vl() {
+ return prctl(PR_SVE_GET_VL);
+}
+
+int set_current_thread_vl(unsigned long arg) {
+ return prctl(PR_SVE_SET_VL, arg);
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_setVectorLength
+(JNIEnv * env, jclass clz, jint length) {
+ return set_current_thread_vl(length);
+}
+
+JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_getVectorLength
+(JNIEnv *env, jclass clz) {
+ return get_current_thread_vl();
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--
2.19.1