2020-12-24 15:35:16 +08:00
|
|
|
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
|
2021-08-13 14:54:30 +08:00
|
|
|
index a39640526..2479853fa 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
|
|
|
|
|
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -146,6 +146,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
|
|
|
|
|
)))
|
|
|
|
|
endif
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
|
|
|
|
|
+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
|
|
|
|
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
|
|
|
|
|
+ )))
|
|
|
|
|
+ endif
|
|
|
|
|
+
|
2021-08-13 14:54:30 +08:00
|
|
|
ifeq ($(call check-jvm-feature, shenandoahgc), true)
|
2020-12-24 15:35:16 +08:00
|
|
|
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
2021-08-13 14:54:30 +08:00
|
|
|
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
|
2020-12-24 15:35:16 +08:00
|
|
|
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
|
2021-08-13 14:54:30 +08:00
|
|
|
index b64919a62..fa434df7d 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/aarch64.ad
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
|
|
|
|
|
@@ -69,7 +69,7 @@ register %{
|
|
|
|
|
//
|
|
|
|
|
// r0-r7,r10-r26 volatile (caller save)
|
|
|
|
|
// r27-r32 system (no save, no allocate)
|
|
|
|
|
-// r8-r9 invisible to the allocator (so we can use them as scratch regs)
|
|
|
|
|
+// r8-r9 non-allocatable (so we can use them as scratch regs)
|
|
|
|
|
//
|
|
|
|
|
// as regards Java usage. we don't use any callee save registers
|
|
|
|
|
// because this makes it difficult to de-optimise a frame (see comment
|
|
|
|
|
@@ -94,6 +94,10 @@ reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() );
|
|
|
|
|
reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() );
|
|
|
|
|
reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() );
|
|
|
|
|
reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() );
|
|
|
|
|
+reg_def R8 ( NS, SOC, Op_RegI, 8, r8->as_VMReg() ); // rscratch1, non-allocatable
|
|
|
|
|
+reg_def R8_H ( NS, SOC, Op_RegI, 8, r8->as_VMReg()->next() );
|
|
|
|
|
+reg_def R9 ( NS, SOC, Op_RegI, 9, r9->as_VMReg() ); // rscratch2, non-allocatable
|
|
|
|
|
+reg_def R9_H ( NS, SOC, Op_RegI, 9, r9->as_VMReg()->next() );
|
|
|
|
|
reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() );
|
|
|
|
|
reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
|
|
|
|
|
reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() );
|
|
|
|
|
@@ -140,7 +144,7 @@ reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp
|
|
|
|
|
reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
|
|
|
|
|
|
|
|
|
|
// ----------------------------
|
|
|
|
|
-// Float/Double Registers
|
|
|
|
|
+// Float/Double/Vector Registers
|
|
|
|
|
// ----------------------------
|
|
|
|
|
|
|
|
|
|
// Double Registers
|
|
|
|
|
@@ -161,165 +165,316 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
|
|
|
|
|
// the platform ABI treats v8-v15 as callee save). float registers
|
|
|
|
|
// v16-v31 are SOC as per the platform spec
|
|
|
|
|
|
|
|
|
|
- reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
|
|
|
|
|
- reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
|
|
|
|
|
- reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
|
|
|
|
|
- reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
|
|
|
|
|
- reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
|
|
|
|
|
- reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
|
|
|
|
|
- reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
|
|
|
|
|
- reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
|
|
|
|
|
- reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
|
|
|
|
|
- reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
|
|
|
|
|
- reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
|
|
|
|
|
- reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
|
|
|
|
|
- reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
|
|
|
|
|
- reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
|
|
|
|
|
- reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
|
|
|
|
|
- reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
|
|
|
|
|
- reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() );
|
|
|
|
|
- reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() );
|
|
|
|
|
- reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() );
|
|
|
|
|
- reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() );
|
|
|
|
|
- reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
|
|
|
|
|
- reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
|
|
|
|
|
-
|
|
|
|
|
- reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() );
|
|
|
|
|
- reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
|
|
|
|
|
- reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() );
|
|
|
|
|
- reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
|
|
|
|
|
- reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() );
|
|
|
|
|
- reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
|
|
|
|
|
- reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() );
|
|
|
|
|
- reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
|
|
|
|
|
- reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() );
|
|
|
|
|
- reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
|
|
|
|
|
- reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() );
|
|
|
|
|
- reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
|
|
|
|
|
- reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
|
|
|
|
|
- reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
|
|
|
|
|
- reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
|
|
|
|
|
- reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
|
|
|
|
|
- reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
|
|
|
|
|
- reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
|
|
|
|
|
- reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
|
|
|
|
|
- reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
|
|
|
|
|
- reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
|
|
|
|
|
- reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
|
|
|
|
|
- reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
|
|
|
|
|
- reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
|
|
|
|
|
- reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
|
|
|
|
|
- reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
|
|
|
|
|
- reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
|
|
|
|
|
- reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
|
|
|
|
|
- reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
|
|
|
|
|
- reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
|
|
|
|
|
- reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
|
|
|
|
|
- reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
|
|
|
|
|
- reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
|
|
|
|
|
- reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
|
|
|
|
|
- reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
|
|
|
|
|
- reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
|
|
|
|
|
- reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
|
|
|
|
|
- reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
|
|
|
|
|
- reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
|
|
|
|
|
- reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
|
|
|
|
|
- reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
|
|
|
|
|
- reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
|
|
|
|
|
- reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
|
|
|
|
|
-
|
|
|
|
|
- reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
|
|
|
|
|
- reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
|
|
|
|
|
- reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
|
|
|
|
|
- reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
|
|
|
|
|
+// For SVE vector registers, we simply extend vector register size to 8
|
|
|
|
|
+// 'logical' slots. This is nominally 256 bits but it actually covers
|
|
|
|
|
+// all possible 'physical' SVE vector register lengths from 128 ~ 2048
|
|
|
|
|
+// bits. The 'physical' SVE vector register length is detected during
|
|
|
|
|
+// startup, so the register allocator is able to identify the correct
|
|
|
|
|
+// number of bytes needed for an SVE spill/unspill.
|
|
|
|
|
+// Note that a vector register with 4 slots denotes a 128-bit NEON
|
|
|
|
|
+// register allowing it to be distinguished from the corresponding SVE
|
|
|
|
|
+// vector register when the SVE vector length is 128 bits.
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
|
|
|
|
|
+ reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V0_L ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V0_M ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V0_N ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V0_O ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
|
|
|
|
|
+ reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V1_L ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V1_M ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V1_N ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V1_O ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
|
|
|
|
|
+ reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V2_L ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V2_M ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V2_N ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V2_O ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
|
|
|
|
|
+ reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V3_L ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V3_M ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V3_N ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V3_O ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
|
|
|
|
|
+ reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V4_L ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V4_M ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V4_N ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V4_O ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
|
|
|
|
|
+ reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V5_L ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V5_M ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V5_N ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V5_O ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
|
|
|
|
|
+ reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V6_L ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V6_M ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V6_N ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V6_O ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
|
|
|
|
|
+ reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V7_L ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V7_M ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V7_N ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V7_O ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() );
|
|
|
|
|
+ reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V8_L ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V8_M ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V8_N ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V8_O ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() );
|
|
|
|
|
+ reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V9_L ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V9_M ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V9_N ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V9_O ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() );
|
|
|
|
|
+ reg_def V10_H ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V10_L ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V10_M ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V10_N ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V10_O ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() );
|
|
|
|
|
+ reg_def V11_H ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V11_L ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V11_M ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V11_N ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V11_O ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() );
|
|
|
|
|
+ reg_def V12_H ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V12_L ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V12_M ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V12_N ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V12_O ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() );
|
|
|
|
|
+ reg_def V13_H ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V13_L ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V13_M ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V13_N ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V13_O ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() );
|
|
|
|
|
+ reg_def V14_H ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V14_L ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V14_M ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V14_N ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V14_O ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() );
|
|
|
|
|
+ reg_def V15_H ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V15_L ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V15_M ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V15_N ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V15_O ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
|
|
|
|
|
+ reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V16_L ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V16_M ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V16_N ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V16_O ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
|
|
|
|
|
+ reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V17_L ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V17_M ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V17_N ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V17_O ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
|
|
|
|
|
+ reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V18_L ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V18_M ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V18_N ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V18_O ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
|
|
|
|
|
+ reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V19_L ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V19_M ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V19_N ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V19_O ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
|
|
|
|
|
+ reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V20_L ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V20_M ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V20_N ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V20_O ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
|
|
|
|
|
+ reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V21_L ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V21_M ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V21_N ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V21_O ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
|
|
|
|
|
+ reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V22_L ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V22_M ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V22_N ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V22_O ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
|
|
|
|
|
+ reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V23_L ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V23_M ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V23_N ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V23_O ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
|
|
|
|
|
+ reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V24_L ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V24_M ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V24_N ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V24_O ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
|
|
|
|
|
+ reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V25_L ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V25_M ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V25_N ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V25_O ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
|
|
|
|
|
+ reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V26_L ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V26_M ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V26_N ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V26_O ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
|
|
|
|
|
+ reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V27_L ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V27_M ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V27_N ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V27_O ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
|
|
|
|
|
+ reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V28_L ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V28_M ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V28_N ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V28_O ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
|
|
|
|
|
+ reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V29_L ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V29_M ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V29_N ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V29_O ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
|
|
|
|
|
+ reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V30_L ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V30_M ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V30_N ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V30_O ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+ reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
|
|
|
|
|
+ reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
|
|
|
|
|
+ reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) );
|
|
|
|
|
+ reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) );
|
|
|
|
|
+ reg_def V31_L ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(4) );
|
|
|
|
|
+ reg_def V31_M ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(5) );
|
|
|
|
|
+ reg_def V31_N ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(6) );
|
|
|
|
|
+ reg_def V31_O ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(7) );
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+// ----------------------------
|
|
|
|
|
+// SVE Predicate Registers
|
|
|
|
|
+// ----------------------------
|
|
|
|
|
+ reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg());
|
|
|
|
|
+ reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg());
|
|
|
|
|
+ reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg());
|
|
|
|
|
+ reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg());
|
|
|
|
|
+ reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg());
|
|
|
|
|
+ reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg());
|
|
|
|
|
+ reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg());
|
|
|
|
|
+ reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg());
|
|
|
|
|
|
|
|
|
|
// ----------------------------
|
|
|
|
|
// Special Registers
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -381,50 +536,64 @@ alloc_class chunk0(
|
2020-12-24 15:35:16 +08:00
|
|
|
R29, R29_H, // fp
|
|
|
|
|
R30, R30_H, // lr
|
|
|
|
|
R31, R31_H, // sp
|
|
|
|
|
+ R8, R8_H, // rscratch1
|
|
|
|
|
+ R9, R9_H, // rscratch2
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
alloc_class chunk1(
|
|
|
|
|
|
|
|
|
|
// no save
|
|
|
|
|
- V16, V16_H, V16_J, V16_K,
|
|
|
|
|
- V17, V17_H, V17_J, V17_K,
|
|
|
|
|
- V18, V18_H, V18_J, V18_K,
|
|
|
|
|
- V19, V19_H, V19_J, V19_K,
|
|
|
|
|
- V20, V20_H, V20_J, V20_K,
|
|
|
|
|
- V21, V21_H, V21_J, V21_K,
|
|
|
|
|
- V22, V22_H, V22_J, V22_K,
|
|
|
|
|
- V23, V23_H, V23_J, V23_K,
|
|
|
|
|
- V24, V24_H, V24_J, V24_K,
|
|
|
|
|
- V25, V25_H, V25_J, V25_K,
|
|
|
|
|
- V26, V26_H, V26_J, V26_K,
|
|
|
|
|
- V27, V27_H, V27_J, V27_K,
|
|
|
|
|
- V28, V28_H, V28_J, V28_K,
|
|
|
|
|
- V29, V29_H, V29_J, V29_K,
|
|
|
|
|
- V30, V30_H, V30_J, V30_K,
|
|
|
|
|
- V31, V31_H, V31_J, V31_K,
|
|
|
|
|
+ V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O,
|
|
|
|
|
+ V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O,
|
|
|
|
|
+ V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O,
|
|
|
|
|
+ V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O,
|
|
|
|
|
+ V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O,
|
|
|
|
|
+ V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O,
|
|
|
|
|
+ V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O,
|
|
|
|
|
+ V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O,
|
|
|
|
|
+ V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O,
|
|
|
|
|
+ V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O,
|
|
|
|
|
+ V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O,
|
|
|
|
|
+ V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O,
|
|
|
|
|
+ V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O,
|
|
|
|
|
+ V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O,
|
|
|
|
|
+ V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O,
|
|
|
|
|
+ V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O,
|
|
|
|
|
|
|
|
|
|
// arg registers
|
|
|
|
|
- V0, V0_H, V0_J, V0_K,
|
|
|
|
|
- V1, V1_H, V1_J, V1_K,
|
|
|
|
|
- V2, V2_H, V2_J, V2_K,
|
|
|
|
|
- V3, V3_H, V3_J, V3_K,
|
|
|
|
|
- V4, V4_H, V4_J, V4_K,
|
|
|
|
|
- V5, V5_H, V5_J, V5_K,
|
|
|
|
|
- V6, V6_H, V6_J, V6_K,
|
|
|
|
|
- V7, V7_H, V7_J, V7_K,
|
|
|
|
|
+ V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O,
|
|
|
|
|
+ V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O,
|
|
|
|
|
+ V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O,
|
|
|
|
|
+ V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O,
|
|
|
|
|
+ V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O,
|
|
|
|
|
+ V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O,
|
|
|
|
|
+ V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O,
|
|
|
|
|
+ V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O,
|
|
|
|
|
|
|
|
|
|
// non-volatiles
|
|
|
|
|
- V8, V8_H, V8_J, V8_K,
|
|
|
|
|
- V9, V9_H, V9_J, V9_K,
|
|
|
|
|
- V10, V10_H, V10_J, V10_K,
|
|
|
|
|
- V11, V11_H, V11_J, V11_K,
|
|
|
|
|
- V12, V12_H, V12_J, V12_K,
|
|
|
|
|
- V13, V13_H, V13_J, V13_K,
|
|
|
|
|
- V14, V14_H, V14_J, V14_K,
|
|
|
|
|
- V15, V15_H, V15_J, V15_K,
|
|
|
|
|
+ V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O,
|
|
|
|
|
+ V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O,
|
|
|
|
|
+ V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O,
|
|
|
|
|
+ V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O,
|
|
|
|
|
+ V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O,
|
|
|
|
|
+ V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O,
|
|
|
|
|
+ V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O,
|
|
|
|
|
+ V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O,
|
2021-08-13 14:54:30 +08:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
-alloc_class chunk2(RFLAGS);
|
2020-12-24 15:35:16 +08:00
|
|
|
+alloc_class chunk2 (
|
|
|
|
|
+ P0,
|
|
|
|
|
+ P1,
|
|
|
|
|
+ P2,
|
|
|
|
|
+ P3,
|
|
|
|
|
+ P4,
|
|
|
|
|
+ P5,
|
|
|
|
|
+ P6,
|
|
|
|
|
+ P7,
|
|
|
|
|
+ // Only use P0~P7 here for performance
|
2021-08-13 14:54:30 +08:00
|
|
|
+);
|
|
|
|
|
+
|
2020-12-24 15:35:16 +08:00
|
|
|
+alloc_class chunk3(RFLAGS);
|
|
|
|
|
|
|
|
|
|
//----------Architecture Description Register Classes--------------------------
|
|
|
|
|
// Several register classes are automatically defined based upon information in
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -865,6 +1034,42 @@ reg_class double_reg(
|
2020-12-24 15:35:16 +08:00
|
|
|
V31, V31_H
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
+// Class for all SVE vector registers.
|
|
|
|
|
+reg_class vectora_reg (
|
|
|
|
|
+ V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O,
|
|
|
|
|
+ V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O,
|
|
|
|
|
+ V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O,
|
|
|
|
|
+ V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O,
|
|
|
|
|
+ V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O,
|
|
|
|
|
+ V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O,
|
|
|
|
|
+ V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O,
|
|
|
|
|
+ V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O,
|
|
|
|
|
+ V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O,
|
|
|
|
|
+ V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O,
|
|
|
|
|
+ V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O,
|
|
|
|
|
+ V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O,
|
|
|
|
|
+ V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O,
|
|
|
|
|
+ V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O,
|
|
|
|
|
+ V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O,
|
|
|
|
|
+ V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O,
|
|
|
|
|
+ V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O,
|
|
|
|
|
+ V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O,
|
|
|
|
|
+ V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O,
|
|
|
|
|
+ V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O,
|
|
|
|
|
+ V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O,
|
|
|
|
|
+ V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O,
|
|
|
|
|
+ V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O,
|
|
|
|
|
+ V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O,
|
|
|
|
|
+ V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O,
|
|
|
|
|
+ V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O,
|
|
|
|
|
+ V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O,
|
|
|
|
|
+ V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O,
|
|
|
|
|
+ V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O,
|
|
|
|
|
+ V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O,
|
|
|
|
|
+ V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O,
|
|
|
|
|
+ V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O,
|
|
|
|
|
+);
|
|
|
|
|
+
|
|
|
|
|
// Class for all 64bit vector registers
|
|
|
|
|
reg_class vectord_reg(
|
|
|
|
|
V0, V0_H,
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1097,6 +1302,31 @@ reg_class v31_reg(
|
2020-12-24 15:35:16 +08:00
|
|
|
V31, V31_H
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
+// Class for all SVE predicate registers.
|
|
|
|
|
+reg_class pr_reg (
|
|
|
|
|
+ P0,
|
|
|
|
|
+ P1,
|
|
|
|
|
+ P2,
|
|
|
|
|
+ P3,
|
|
|
|
|
+ P4,
|
|
|
|
|
+ P5,
|
|
|
|
|
+ P6,
|
|
|
|
|
+ // P7, non-allocatable, preserved with all elements preset to TRUE.
|
|
|
|
|
+);
|
|
|
|
|
+
|
|
|
|
|
+// Class for SVE governing predicate registers, which are used
|
|
|
|
|
+// to determine the active elements of a predicated instruction.
|
|
|
|
|
+reg_class gov_pr (
|
|
|
|
|
+ P0,
|
|
|
|
|
+ P1,
|
|
|
|
|
+ P2,
|
|
|
|
|
+ P3,
|
|
|
|
|
+ P4,
|
|
|
|
|
+ P5,
|
|
|
|
|
+ P6,
|
|
|
|
|
+ // P7, non-allocatable, preserved with all elements preset to TRUE.
|
|
|
|
|
+);
|
|
|
|
|
+
|
|
|
|
|
// Singleton class for condition codes
|
|
|
|
|
reg_class int_flags(RFLAGS);
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1761,6 +1991,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
2020-12-24 15:35:16 +08:00
|
|
|
// branch if we need to invalidate the method later
|
|
|
|
|
__ nop();
|
|
|
|
|
|
|
|
|
|
+ if (UseSVE > 0 && C->max_vector_size() >= 16) {
|
|
|
|
|
+ __ reinitialize_ptrue();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
int bangsize = C->bang_size_in_bytes();
|
|
|
|
|
if (C->need_stack_bang(bangsize) && UseStackBanging)
|
|
|
|
|
__ generate_stack_overflow_check(bangsize);
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1862,7 +2096,7 @@ int MachEpilogNode::safepoint_offset() const {
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// Figure out which register class each belongs in: rc_int, rc_float or
|
|
|
|
|
// rc_stack.
|
|
|
|
|
-enum RC { rc_bad, rc_int, rc_float, rc_stack };
|
|
|
|
|
+enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
|
|
|
|
|
|
|
|
|
|
static enum RC rc_class(OptoReg::Name reg) {
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1870,20 +2104,25 @@ static enum RC rc_class(OptoReg::Name reg) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return rc_bad;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
- // we have 30 int registers * 2 halves
|
|
|
|
|
- // (rscratch1 and rscratch2 are omitted)
|
2021-08-13 14:54:30 +08:00
|
|
|
- int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
|
2020-12-24 15:35:16 +08:00
|
|
|
+ // we have 32 int registers * 2 halves
|
|
|
|
|
+ int slots_of_int_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers;
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
if (reg < slots_of_int_registers) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return rc_int;
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
- // we have 32 float register * 4 halves
|
|
|
|
|
- if (reg < slots_of_int_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) {
|
2020-12-24 15:35:16 +08:00
|
|
|
+ // we have 32 float register * 8 halves
|
|
|
|
|
+ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
|
|
|
|
|
+ if (reg < slots_of_int_registers + slots_of_float_registers) {
|
|
|
|
|
return rc_float;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
- // Between float regs & stack is the flags regs.
|
|
|
|
|
+ int slots_of_predicate_registers = PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers;
|
|
|
|
|
+ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) {
|
|
|
|
|
+ return rc_predicate;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Between predicate regs & stack is the flags.
|
|
|
|
|
assert(OptoReg::is_stack(reg), "blow up if spilling flags");
|
|
|
|
|
|
|
|
|
|
return rc_stack;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1922,8 +2161,28 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
if (bottom_type()->isa_vect() != NULL) {
|
|
|
|
|
uint ireg = ideal_reg();
|
|
|
|
|
- assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
|
|
|
|
|
- if (cbuf) {
|
|
|
|
|
+ if (ireg == Op_VecA && cbuf) {
|
|
|
|
|
+ MacroAssembler _masm(cbuf);
|
|
|
|
|
+ int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
|
|
|
|
|
+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
|
|
|
|
|
+ // stack->stack
|
|
|
|
|
+ __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset,
|
|
|
|
|
+ sve_vector_reg_size_in_bytes);
|
|
|
|
|
+ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
|
|
|
|
|
+ __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo),
|
|
|
|
|
+ sve_vector_reg_size_in_bytes);
|
|
|
|
|
+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
|
|
|
|
|
+ __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo),
|
|
|
|
|
+ sve_vector_reg_size_in_bytes);
|
|
|
|
|
+ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
|
|
|
|
|
+ as_FloatRegister(Matcher::_regEncode[src_lo]),
|
|
|
|
|
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ ShouldNotReachHere();
|
|
|
|
|
+ }
|
|
|
|
|
+ } else if (cbuf) {
|
|
|
|
|
+ assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
|
|
|
|
|
MacroAssembler _masm(cbuf);
|
|
|
|
|
assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
|
|
|
|
|
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1941,12 +2200,12 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
2020-12-24 15:35:16 +08:00
|
|
|
as_FloatRegister(Matcher::_regEncode[src_lo]));
|
|
|
|
|
} else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
|
|
|
|
|
__ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
|
|
|
|
|
- ireg == Op_VecD ? __ D : __ Q,
|
|
|
|
|
- ra_->reg2offset(dst_lo));
|
|
|
|
|
+ ireg == Op_VecD ? __ D : __ Q,
|
|
|
|
|
+ ra_->reg2offset(dst_lo));
|
|
|
|
|
} else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
|
|
|
|
|
__ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
|
|
|
|
|
- ireg == Op_VecD ? __ D : __ Q,
|
|
|
|
|
- ra_->reg2offset(src_lo));
|
|
|
|
|
+ ireg == Op_VecD ? __ D : __ Q,
|
2021-08-13 14:54:30 +08:00
|
|
|
+ ra_->reg2offset(src_lo));
|
2020-12-24 15:35:16 +08:00
|
|
|
} else {
|
|
|
|
|
ShouldNotReachHere();
|
|
|
|
|
}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2031,9 +2290,24 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
2020-12-24 15:35:16 +08:00
|
|
|
st->print("%s", Matcher::regName[dst_lo]);
|
|
|
|
|
}
|
|
|
|
|
if (bottom_type()->isa_vect() != NULL) {
|
|
|
|
|
- st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
|
|
|
|
|
+ int vsize = 0;
|
|
|
|
|
+ switch (ideal_reg()) {
|
|
|
|
|
+ case Op_VecD:
|
|
|
|
|
+ vsize = 64;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case Op_VecX:
|
|
|
|
|
+ vsize = 128;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case Op_VecA:
|
|
|
|
|
+ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
|
|
|
|
|
+ break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ assert(false, "bad register type for spill");
|
|
|
|
|
+ ShouldNotReachHere();
|
|
|
|
|
+ }
|
|
|
|
|
+ st->print("\t# vector spill size = %d", vsize);
|
|
|
|
|
} else {
|
|
|
|
|
- st->print("\t# spill size = %d", is64 ? 64:32);
|
|
|
|
|
+ st->print("\t# spill size = %d", is64 ? 64 : 32);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2192,19 +2466,32 @@ const bool Matcher::match_rule_supported(int opcode) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return true; // Per default match rules are supported.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
|
|
|
-
|
|
|
|
|
- // TODO
|
|
|
|
|
- // identify extra cases that we might want to provide match rules for
|
|
|
|
|
- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
|
|
|
|
- bool ret_value = match_rule_supported(opcode);
|
|
|
|
|
- // Add rules here.
|
|
|
|
|
-
|
|
|
|
|
- return ret_value; // Per default match rules are supported.
|
|
|
|
|
+ // Identify extra cases that we might want to provide match rules for vector nodes and
|
|
|
|
|
+ // other intrinsics guarded with vector length (vlen) and element type (bt).
|
|
|
|
|
+ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
|
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ int bit_size = vlen * type2aelembytes(bt) * 8;
|
|
|
|
|
+ if (UseSVE == 0 && bit_size > 128) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (UseSVE > 0) {
|
|
|
|
|
+ return op_sve_supported(opcode);
|
|
|
|
|
+ } else { // NEON
|
|
|
|
|
+ // Special cases
|
|
|
|
|
+ switch (opcode) {
|
|
|
|
|
+ case Op_MulVL:
|
|
|
|
|
+ return false;
|
|
|
|
|
+ default:
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return true; // Per default match rules are supported.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const bool Matcher::has_predicated_vectors(void) {
|
|
|
|
|
- return false;
|
|
|
|
|
+ return UseSVE > 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const int Matcher::float_pressure(int default_pressure_threshold) {
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2240,7 +2527,8 @@ const bool Matcher::convL2FSupported(void) {
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// Vector width in bytes.
|
|
|
|
|
const int Matcher::vector_width_in_bytes(BasicType bt) {
|
|
|
|
|
- int size = MIN2(16,(int)MaxVectorSize);
|
|
|
|
|
+ // The MaxVectorSize should have been set by detecting SVE max vector register size.
|
|
|
|
|
+ int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize);
|
|
|
|
|
// Minimum 2 values in vector
|
|
|
|
|
if (size < 2*type2aelembytes(bt)) size = 0;
|
|
|
|
|
// But never < 4
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2253,14 +2541,32 @@ const int Matcher::max_vector_size(const BasicType bt) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return vector_width_in_bytes(bt)/type2aelembytes(bt);
|
|
|
|
|
}
|
|
|
|
|
const int Matcher::min_vector_size(const BasicType bt) {
|
|
|
|
|
-// For the moment limit the vector size to 8 bytes
|
|
|
|
|
+ int max_size = max_vector_size(bt);
|
|
|
|
|
+ if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
|
|
|
|
|
+ // Currently vector length less than SVE vector register size is not supported.
|
|
|
|
|
+ return max_size;
|
|
|
|
|
+ } else {
|
2021-08-13 14:54:30 +08:00
|
|
|
+ // For the moment limit the vector size to 8 bytes with NEON.
|
2020-12-24 15:35:16 +08:00
|
|
|
int size = 8 / type2aelembytes(bt);
|
|
|
|
|
if (size < 2) size = 2;
|
|
|
|
|
return size;
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
|
|
|
+ return UseSVE > 0;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// Actual max scalable vector register length.
|
|
|
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
|
|
|
+ return Matcher::max_vector_size(bt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Vector ideal reg.
|
|
|
|
|
const uint Matcher::vector_ideal_reg(int len) {
|
|
|
|
|
+ if (UseSVE > 0 && 16 <= len && len <= 256) {
|
|
|
|
|
+ return Op_VecA;
|
|
|
|
|
+ }
|
|
|
|
|
switch(len) {
|
|
|
|
|
case 8: return Op_VecD;
|
|
|
|
|
case 16: return Op_VecX;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2270,6 +2576,9 @@ const uint Matcher::vector_ideal_reg(int len) {
|
2020-12-24 15:35:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const uint Matcher::vector_shift_count_ideal_reg(int size) {
|
|
|
|
|
+ if (UseSVE > 0 && 16 <= size && size <= 256) {
|
|
|
|
|
+ return Op_VecA;
|
|
|
|
|
+ }
|
|
|
|
|
switch(size) {
|
|
|
|
|
case 8: return Op_VecD;
|
|
|
|
|
case 16: return Op_VecX;
|
|
|
|
|
@@ -3419,6 +3728,11 @@ encode %{
|
2021-08-13 14:54:30 +08:00
|
|
|
if (call == NULL) {
|
|
|
|
|
ciEnv::current()->record_failure("CodeCache is full");
|
|
|
|
|
return;
|
2020-12-24 15:35:16 +08:00
|
|
|
+ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
|
|
|
|
|
+ // Only non uncommon_trap calls need to reinitialize ptrue.
|
|
|
|
|
+ if (uncommon_trap_request() == 0) {
|
|
|
|
|
+ __ reinitialize_ptrue();
|
|
|
|
|
+ }
|
2021-08-13 14:54:30 +08:00
|
|
|
}
|
|
|
|
|
// Emit stub for static call
|
|
|
|
|
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
|
|
|
|
|
@@ -3436,6 +3750,8 @@ encode %{
|
2020-12-24 15:35:16 +08:00
|
|
|
if (call == NULL) {
|
|
|
|
|
ciEnv::current()->record_failure("CodeCache is full");
|
|
|
|
|
return;
|
|
|
|
|
+ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
|
|
|
|
|
+ __ reinitialize_ptrue();
|
|
|
|
|
}
|
|
|
|
|
%}
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -3472,6 +3788,9 @@ encode %{
|
2020-12-24 15:35:16 +08:00
|
|
|
__ bind(retaddr);
|
|
|
|
|
__ add(sp, sp, 2 * wordSize);
|
|
|
|
|
}
|
|
|
|
|
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
|
|
|
|
|
+ __ reinitialize_ptrue();
|
|
|
|
|
+ }
|
|
|
|
|
%}
|
|
|
|
|
|
|
|
|
|
enc_class aarch64_enc_rethrow() %{
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -3481,6 +3800,11 @@ encode %{
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
enc_class aarch64_enc_ret() %{
|
|
|
|
|
MacroAssembler _masm(&cbuf);
|
|
|
|
|
+#ifdef ASSERT
|
|
|
|
|
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
|
|
|
|
|
+ __ verify_ptrue();
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
__ ret(lr);
|
|
|
|
|
%}
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -4222,6 +4546,41 @@ operand immLoffset16()
|
2020-12-24 15:35:16 +08:00
|
|
|
interface(CONST_INTER);
|
|
|
|
|
%}
|
|
|
|
|
|
|
|
|
|
+// 8 bit signed value.
|
|
|
|
|
+operand immI8()
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(n->get_int() <= 127 && n->get_int() >= -128);
|
|
|
|
|
+ match(ConI);
|
|
|
|
|
+
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ %}
|
|
|
|
|
+ interface(CONST_INTER);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// 8 bit signed value (simm8), or #simm8 LSL 8.
|
|
|
|
|
+operand immI8_shift8()
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate((n->get_int() <= 127 && n->get_int() >= -128) ||
|
|
|
|
|
+ (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0));
|
|
|
|
|
+ match(ConI);
|
|
|
|
|
+
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ %}
|
|
|
|
|
+ interface(CONST_INTER);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// 8 bit signed value (simm8), or #simm8 LSL 8.
|
|
|
|
|
+operand immL8_shift8()
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate((n->get_long() <= 127 && n->get_long() >= -128) ||
|
|
|
|
|
+ (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0));
|
|
|
|
|
+ match(ConL);
|
|
|
|
|
+
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ %}
|
|
|
|
|
+ interface(CONST_INTER);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
// 32 bit integer valid for add sub immediate
|
|
|
|
|
operand immIAddSub()
|
|
|
|
|
%{
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -4851,6 +5210,18 @@ operand vRegD()
|
2020-12-24 15:35:16 +08:00
|
|
|
interface(REG_INTER);
|
|
|
|
|
%}
|
|
|
|
|
|
|
|
|
|
+// Generic vector class. This will be used for
|
|
|
|
|
+// all vector operands, including NEON and SVE,
|
|
|
|
|
+// but currently only used for SVE VecA.
|
|
|
|
|
+operand vReg()
|
|
|
|
|
+%{
|
|
|
|
|
+ constraint(ALLOC_IN_RC(vectora_reg));
|
|
|
|
|
+ match(VecA);
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ %}
|
|
|
|
|
+ interface(REG_INTER);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
operand vecD()
|
|
|
|
|
%{
|
|
|
|
|
constraint(ALLOC_IN_RC(vectord_reg));
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -5159,6 +5530,15 @@ operand vRegD_V31()
|
2020-12-24 15:35:16 +08:00
|
|
|
interface(REG_INTER);
|
|
|
|
|
%}
|
|
|
|
|
|
|
|
|
|
+operand pRegGov()
|
|
|
|
|
+%{
|
|
|
|
|
+ constraint(ALLOC_IN_RC(gov_pr));
|
|
|
|
|
+ match(RegVMask);
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ %}
|
|
|
|
|
+ interface(REG_INTER);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
// Flags register, used as output of signed compare instructions
|
|
|
|
|
|
|
|
|
|
// note that on AArch64 we also use this register as the output for
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15745,7 +16125,7 @@ instruct loadV8(vecD dst, vmem8 mem)
|
2020-12-24 15:35:16 +08:00
|
|
|
// Load Vector (128 bits)
|
|
|
|
|
instruct loadV16(vecX dst, vmem16 mem)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_LoadVector()->memory_size() == 16);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16);
|
|
|
|
|
match(Set dst (LoadVector mem));
|
|
|
|
|
ins_cost(4 * INSN_COST);
|
|
|
|
|
format %{ "ldrq $dst,$mem\t# vector (128 bits)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15801,7 +16181,7 @@ instruct replicate8B(vecD dst, iRegIorL2I src)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate16B(vecX dst, iRegIorL2I src)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 16);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
|
|
|
|
|
match(Set dst (ReplicateB src));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "dup $dst, $src\t# vector (16B)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15826,7 +16206,7 @@ instruct replicate8B_imm(vecD dst, immI con)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate16B_imm(vecX dst, immI con)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 16);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
|
|
|
|
|
match(Set dst (ReplicateB con));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "movi $dst, $con\t# vector(16B)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15851,7 +16231,7 @@ instruct replicate4S(vecD dst, iRegIorL2I src)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate8S(vecX dst, iRegIorL2I src)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 8);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
|
|
|
|
|
match(Set dst (ReplicateS src));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "dup $dst, $src\t# vector (8S)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15876,7 +16256,7 @@ instruct replicate4S_imm(vecD dst, immI con)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate8S_imm(vecX dst, immI con)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 8);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
|
|
|
|
|
match(Set dst (ReplicateS con));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "movi $dst, $con\t# vector(8H)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15900,7 +16280,7 @@ instruct replicate2I(vecD dst, iRegIorL2I src)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate4I(vecX dst, iRegIorL2I src)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 4);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
|
|
|
|
|
match(Set dst (ReplicateI src));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "dup $dst, $src\t# vector (4I)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15924,7 +16304,7 @@ instruct replicate2I_imm(vecD dst, immI con)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate4I_imm(vecX dst, immI con)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 4);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
|
|
|
|
|
match(Set dst (ReplicateI con));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "movi $dst, $con\t# vector(4I)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15936,7 +16316,7 @@ instruct replicate4I_imm(vecX dst, immI con)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate2L(vecX dst, iRegL src)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 2);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
|
|
|
|
|
match(Set dst (ReplicateL src));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "dup $dst, $src\t# vector (2L)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15948,7 +16328,7 @@ instruct replicate2L(vecX dst, iRegL src)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate2L_zero(vecX dst, immI0 zero)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 2);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
|
|
|
|
|
match(Set dst (ReplicateI zero));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "movi $dst, $zero\t# vector(4I)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15975,7 +16355,7 @@ instruct replicate2F(vecD dst, vRegF src)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate4F(vecX dst, vRegF src)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 4);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
|
|
|
|
|
match(Set dst (ReplicateF src));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "dup $dst, $src\t# vector (4F)" %}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -15988,7 +16368,7 @@ instruct replicate4F(vecX dst, vRegF src)
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
instruct replicate2D(vecX dst, vRegD src)
|
|
|
|
|
%{
|
|
|
|
|
- predicate(n->as_Vector()->length() == 2);
|
|
|
|
|
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
|
|
|
|
|
match(Set dst (ReplicateD src));
|
|
|
|
|
ins_cost(INSN_COST);
|
|
|
|
|
format %{ "dup $dst, $src\t# vector (2D)" %}
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad
|
|
|
|
|
new file mode 100644
|
|
|
|
|
index 000000000..8d80cb37a
|
|
|
|
|
--- /dev/null
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad
|
|
|
|
|
@@ -0,0 +1,1366 @@
|
|
|
|
|
+//
|
|
|
|
|
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+// Copyright (c) 2020, Arm Limited. All rights reserved.
|
|
|
|
|
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
+//
|
|
|
|
|
+// This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
+// under the terms of the GNU General Public License version 2 only, as
|
|
|
|
|
+// published by the Free Software Foundation.
|
|
|
|
|
+//
|
|
|
|
|
+// This code is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
|
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
|
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
|
+// version 2 for more details (a copy is included in the LICENSE file that
|
|
|
|
|
+// accompanied this code).
|
|
|
|
|
+//
|
|
|
|
|
+// You should have received a copy of the GNU General Public License version
|
|
|
|
|
+// 2 along with this work; if not, write to the Free Software Foundation,
|
|
|
|
|
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
+//
|
|
|
|
|
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
|
|
|
+// or visit www.oracle.com if you need additional information or have any
|
|
|
|
|
+// questions.
|
|
|
|
|
+//
|
|
|
|
|
+//
|
|
|
|
|
+
|
|
|
|
|
+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
|
|
|
|
|
+
|
|
|
|
|
+// AArch64 SVE Architecture Description File
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+// 4 bit signed offset -- for predicated load/store
|
|
|
|
|
+
|
|
|
|
|
+operand vmemA_immIOffset4()
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4,
|
|
|
|
|
+ Matcher::scalable_vector_reg_size(T_BYTE)));
|
|
|
|
|
+ match(ConI);
|
|
|
|
|
+
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ %}
|
|
|
|
|
+ interface(CONST_INTER);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+operand vmemA_immLOffset4()
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4,
|
|
|
|
|
+ Matcher::scalable_vector_reg_size(T_BYTE)));
|
|
|
|
|
+ match(ConL);
|
|
|
|
|
+
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ %}
|
|
|
|
|
+ interface(CONST_INTER);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off)
|
|
|
|
|
+%{
|
|
|
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
|
|
|
+ match(AddP reg off);
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ "[$reg, $off, MUL VL]" %}
|
|
|
|
|
+ interface(MEMORY_INTER) %{
|
|
|
|
|
+ base($reg);
|
|
|
|
|
+ index(0xffffffff);
|
|
|
|
|
+ scale(0x0);
|
|
|
|
|
+ disp($off);
|
|
|
|
|
+ %}
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off)
|
|
|
|
|
+%{
|
|
|
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
|
|
|
+ match(AddP reg off);
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ "[$reg, $off, MUL VL]" %}
|
|
|
|
|
+ interface(MEMORY_INTER) %{
|
|
|
|
|
+ base($reg);
|
|
|
|
|
+ index(0xffffffff);
|
|
|
|
|
+ scale(0x0);
|
|
|
|
|
+ disp($off);
|
|
|
|
|
+ %}
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
|
|
|
|
|
+
|
|
|
|
|
+source_hpp %{
|
|
|
|
|
+ bool op_sve_supported(int opcode);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+source %{
|
|
|
|
|
+
|
|
|
|
|
+ static inline BasicType vector_element_basic_type(const MachNode* n) {
|
|
|
|
|
+ const TypeVect* vt = n->bottom_type()->is_vect();
|
|
|
|
|
+ return vt->element_basic_type();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
|
|
|
|
|
+ int def_idx = use->operand_index(opnd);
|
|
|
|
|
+ Node* def = use->in(def_idx);
|
|
|
|
|
+ const TypeVect* vt = def->bottom_type()->is_vect();
|
|
|
|
|
+ return vt->element_basic_type();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
|
|
|
|
|
+ PRegister Pg, const Address &adr);
|
|
|
|
|
+
|
|
|
|
|
+ // Predicated load/store, with optional ptrue to all elements of given predicate register.
|
|
|
|
|
+ static void loadStoreA_predicate(MacroAssembler masm, bool is_store,
|
|
|
|
|
+ FloatRegister reg, PRegister pg, BasicType bt,
|
|
|
|
|
+ int opcode, Register base, int index, int size, int disp) {
|
|
|
|
|
+ sve_mem_insn_predicate insn = NULL;
|
|
|
|
|
+ Assembler::SIMD_RegVariant type = Assembler::B;
|
|
|
|
|
+ int esize = type2aelembytes(bt);
|
|
|
|
|
+ if (index == -1) {
|
|
|
|
|
+ assert(size == 0, "unsupported address mode: scale size = %d", size);
|
|
|
|
|
+ switch(esize) {
|
|
|
|
|
+ case 1:
|
|
|
|
|
+ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b;
|
|
|
|
|
+ type = Assembler::B;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 2:
|
|
|
|
|
+ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h;
|
|
|
|
|
+ type = Assembler::H;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 4:
|
|
|
|
|
+ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w;
|
|
|
|
|
+ type = Assembler::S;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 8:
|
|
|
|
|
+ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d;
|
|
|
|
|
+ type = Assembler::D;
|
|
|
|
|
+ break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ assert(false, "unsupported");
|
|
|
|
|
+ ShouldNotReachHere();
|
|
|
|
|
+ }
|
|
|
|
|
+ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ assert(false, "unimplemented");
|
|
|
|
|
+ ShouldNotReachHere();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool op_sve_supported(int opcode) {
|
|
|
|
|
+ switch (opcode) {
|
|
|
|
|
+ // No multiply reduction instructions
|
|
|
|
|
+ case Op_MulReductionVD:
|
|
|
|
|
+ case Op_MulReductionVF:
|
|
|
|
|
+ case Op_MulReductionVI:
|
|
|
|
|
+ case Op_MulReductionVL:
|
|
|
|
|
+ // Others
|
|
|
|
|
+ case Op_Extract:
|
|
|
|
|
+ case Op_ExtractB:
|
|
|
|
|
+ case Op_ExtractC:
|
|
|
|
|
+ case Op_ExtractD:
|
|
|
|
|
+ case Op_ExtractF:
|
|
|
|
|
+ case Op_ExtractI:
|
|
|
|
|
+ case Op_ExtractL:
|
|
|
|
|
+ case Op_ExtractS:
|
|
|
|
|
+ case Op_ExtractUB:
|
|
|
|
|
+ return false;
|
|
|
|
|
+ default:
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+definitions %{
|
|
|
|
|
+ int_def SVE_COST (200, 200);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+// All SVE instructions
|
|
|
|
|
+
|
|
|
|
|
+// vector load/store
|
|
|
|
|
+
|
|
|
|
|
+// Use predicated vector load/store
|
|
|
|
|
+instruct loadV(vReg dst, vmemA mem) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
|
|
|
|
|
+ match(Set dst (LoadVector mem));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
|
|
|
|
+ loadStoreA_predicate(MacroAssembler(&cbuf), false, dst_reg, ptrue,
|
|
|
|
|
+ vector_element_basic_type(this), $mem->opcode(),
|
|
|
|
|
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct storeV(vReg src, vmemA mem) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
|
|
|
|
|
+ match(Set mem (StoreVector mem src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_str $mem, $src\t # vector (sve)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ FloatRegister src_reg = as_FloatRegister($src$$reg);
|
|
|
|
|
+ loadStoreA_predicate(MacroAssembler(&cbuf), true, src_reg, ptrue,
|
|
|
|
|
+ vector_element_basic_type(this, $src), $mem->opcode(),
|
|
|
|
|
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector add
|
|
|
|
|
+
|
|
|
|
|
+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (AddVB src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_add(as_FloatRegister($dst$$reg), __ B,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (AddVS src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_add(as_FloatRegister($dst$$reg), __ H,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (AddVI src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_add(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (AddVL src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_add(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (AddVF src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fadd(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (AddVD src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fadd(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector and
|
|
|
|
|
+
|
|
|
|
|
+instruct vand(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set dst (AndV src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_and $dst, $src1, $src2\t# vector (sve)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_and(as_FloatRegister($dst$$reg),
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector or
|
|
|
|
|
+
|
|
|
|
|
+instruct vor(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set dst (OrV src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_orr $dst, $src1, $src2\t# vector (sve)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg),
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector xor
|
|
|
|
|
+
|
|
|
|
|
+instruct vxor(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set dst (XorV src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_eor $dst, $src1, $src2\t# vector (sve)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_eor(as_FloatRegister($dst$$reg),
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector float div
|
|
|
|
|
+
|
|
|
|
|
+instruct vdivF(vReg dst_src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst_src1 (DivVF dst_src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vdivD(vReg dst_src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst_src1 (DivVD dst_src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector fmla
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
|
|
|
+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
|
|
|
+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector fmls
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 + -src2 * src3
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * -src3
|
|
|
|
|
+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
|
|
|
|
|
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 + -src2 * src3
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * -src3
|
|
|
|
|
+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
|
|
|
|
|
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector fnmla
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = -dst_src1 + -src2 * src3
|
|
|
|
|
+// dst_src1 = -dst_src1 + src2 * -src3
|
|
|
|
|
+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
|
|
|
|
|
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = -dst_src1 + -src2 * src3
|
|
|
|
|
+// dst_src1 = -dst_src1 + src2 * -src3
|
|
|
|
|
+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
|
|
|
|
|
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector fnmls
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = -dst_src1 + src2 * src3
|
|
|
|
|
+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = -dst_src1 + src2 * src3
|
|
|
|
|
+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector mla
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
|
|
|
+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3)
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
|
|
|
+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3)
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
|
|
|
+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3)
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector mls
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 - src2 * src3
|
|
|
|
|
+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3)
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 - src2 * src3
|
|
|
|
|
+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3)
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// dst_src1 = dst_src1 - src2 * src3
|
|
|
|
|
+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3)
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+// vector mul
|
|
|
|
|
+
|
|
|
|
|
+instruct vmulS(vReg dst_src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst_src1 (MulVS dst_src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vmulI(vReg dst_src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst_src1 (MulVI dst_src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vmulL(vReg dst_src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst_src1 (MulVL dst_src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (MulVF src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fmul(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (MulVD src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fmul(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector fneg
|
|
|
|
|
+
|
|
|
|
|
+instruct vnegF(vReg dst, vReg src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set dst (NegVF src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fneg(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vnegD(vReg dst, vReg src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set dst (NegVD src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fneg(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// popcount vector
|
|
|
|
|
+
|
|
|
|
|
+instruct vpopcountI(vReg dst, vReg src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (PopCountVI src));
|
|
|
|
|
+ format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector add reduction
|
|
|
|
|
+
|
|
|
|
|
+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
|
|
|
|
|
+ (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
|
|
|
|
|
+ match(Set dst (AddReductionVI src1 src2));
|
|
|
|
|
+ effect(TEMP_DEF dst, TEMP tmp);
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (S)\n\t"
|
|
|
|
|
+ "umov $dst, $tmp, S, 0\n\t"
|
|
|
|
|
+ "addw $dst, $dst, $src1\t # add reduction S" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
|
|
|
|
|
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
|
|
|
|
|
+ (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG));
|
|
|
|
|
+ match(Set dst (AddReductionVL src1 src2));
|
|
|
|
|
+ effect(TEMP_DEF dst, TEMP tmp);
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (D)\n\t"
|
|
|
|
|
+ "umov $dst, $tmp, D, 0\n\t"
|
|
|
|
|
+ "add $dst, $dst, $src1\t # add reduction D" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
|
|
|
|
|
+ __ add($dst$$Register, $dst$$Register, $src1$$Register);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct reduce_addF(vRegF src1_dst, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set src1_dst (AddReductionVF src1_dst src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct reduce_addD(vRegD src1_dst, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set src1_dst (AddReductionVD src1_dst src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector replicate
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateB(vReg dst, iRegIorL2I src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (ReplicateB src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $src\t# vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateS(vReg dst, iRegIorL2I src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (ReplicateS src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $src\t# vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateI(vReg dst, iRegIorL2I src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (ReplicateI src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $src\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateL(vReg dst, iRegL src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (ReplicateL src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $src\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateB_imm8(vReg dst, immI8 con) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (ReplicateB con));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $con\t# vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (ReplicateS con));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $con\t# vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (ReplicateI con));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $con\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (ReplicateL con));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $con\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateF(vReg dst, vRegF src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (ReplicateF src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_cpy $dst, $src\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct replicateD(vReg dst, vRegD src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (ReplicateD src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_cpy $dst, $src\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector shift
|
|
|
|
|
+
|
|
|
|
|
+instruct vasrB(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (RShiftVB dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_asr(as_FloatRegister($dst$$reg), __ B,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vasrS(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (RShiftVS dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_asr(as_FloatRegister($dst$$reg), __ H,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vasrI(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (RShiftVI dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_asr(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vasrL(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (RShiftVL dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_asr(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlslB(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (LShiftVB dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlslS(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (LShiftVS dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlslI(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (LShiftVI dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlslL(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (LShiftVL dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlsrB(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (URShiftVB dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlsrS(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (URShiftVS dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlsrI(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (URShiftVI dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlsrL(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (URShiftVL dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (RShiftVB src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (con >= 8) con = 7;
|
|
|
|
|
+ __ sve_asr(as_FloatRegister($dst$$reg), __ B,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (RShiftVS src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (con >= 16) con = 15;
|
|
|
|
|
+ __ sve_asr(as_FloatRegister($dst$$reg), __ H,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (RShiftVI src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ __ sve_asr(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (RShiftVL src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ __ sve_asr(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (URShiftVB src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (con >= 8) {
|
|
|
|
|
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (URShiftVS src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (con >= 8) {
|
|
|
|
|
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (URShiftVI src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (URShiftVL src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (LShiftVB src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con >= 8) {
|
|
|
|
|
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (LShiftVS src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ if (con >= 8) {
|
|
|
|
|
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (LShiftVI src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (LShiftVL src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;
|
|
|
|
|
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 &&
|
|
|
|
|
+ (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE));
|
|
|
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
|
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
|
|
|
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 &&
|
|
|
|
|
+ (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
|
|
|
|
|
+ (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR)));
|
|
|
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
|
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
|
|
|
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
|
|
|
|
|
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
|
|
|
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
|
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
|
|
|
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
|
|
|
|
|
+ (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
|
|
|
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
|
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
|
|
|
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector sqrt
|
|
|
|
|
+
|
|
|
|
|
+instruct vsqrtF(vReg dst, vReg src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set dst (SqrtVF src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vsqrtD(vReg dst, vReg src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set dst (SqrtVD src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+// vector sub
|
|
|
|
|
+
|
|
|
|
|
+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
|
|
|
|
|
+ match(Set dst (SubVB src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_sub(as_FloatRegister($dst$$reg), __ B,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
|
|
|
|
|
+ match(Set dst (SubVS src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_sub(as_FloatRegister($dst$$reg), __ H,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (SubVI src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_sub(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (SubVL src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_sub(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (SubVF src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fsub(as_FloatRegister($dst$$reg), __ S,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
|
|
|
|
|
+ match(Set dst (SubVD src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fsub(as_FloatRegister($dst$$reg), __ D,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
|
|
|
|
|
new file mode 100644
|
|
|
|
|
index 000000000..0323f2f8c
|
|
|
|
|
--- /dev/null
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
|
|
|
|
|
@@ -0,0 +1,727 @@
|
|
|
|
|
+//
|
|
|
|
|
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+// Copyright (c) 2020, Arm Limited. All rights reserved.
|
|
|
|
|
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
+//
|
|
|
|
|
+// This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
+// under the terms of the GNU General Public License version 2 only, as
|
|
|
|
|
+// published by the Free Software Foundation.
|
|
|
|
|
+//
|
|
|
|
|
+// This code is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
|
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
|
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
|
+// version 2 for more details (a copy is included in the LICENSE file that
|
|
|
|
|
+// accompanied this code).
|
|
|
|
|
+//
|
|
|
|
|
+// You should have received a copy of the GNU General Public License version
|
|
|
|
|
+// 2 along with this work; if not, write to the Free Software Foundation,
|
|
|
|
|
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
+//
|
|
|
|
|
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
|
|
|
+// or visit www.oracle.com if you need additional information or have any
|
|
|
|
|
+// questions.
|
|
|
|
|
+//
|
|
|
|
|
+//
|
|
|
|
|
+
|
|
|
|
|
+dnl Generate the warning
|
|
|
|
|
+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
|
|
|
|
|
+dnl
|
|
|
|
|
+
|
|
|
|
|
+// AArch64 SVE Architecture Description File
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 )
|
|
|
|
|
+dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len)
|
|
|
|
|
+define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
|
|
|
|
|
+operand vmemA_imm$1Offset$3()
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
|
|
|
|
|
+ Matcher::scalable_vector_reg_size(T_BYTE)));
|
|
|
|
|
+ match(Con$1);
|
|
|
|
|
+
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ %}
|
|
|
|
|
+ interface(CONST_INTER);
|
|
|
|
|
+%}')
|
|
|
|
|
+dnl
|
|
|
|
|
+// 4 bit signed offset -- for predicated load/store
|
|
|
|
|
+OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4)
|
|
|
|
|
+OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4)
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1, $2 )
|
|
|
|
|
+dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
|
|
|
|
|
+define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
|
|
|
|
|
+operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off)
|
|
|
|
|
+%{
|
|
|
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
|
|
|
+ match(AddP reg off);
|
|
|
|
|
+ op_cost(0);
|
|
|
|
|
+ format %{ "[$reg, $off, MUL VL]" %}
|
|
|
|
|
+ interface(MEMORY_INTER) %{
|
|
|
|
|
+ base($reg);
|
|
|
|
|
+ `index'(0xffffffff);
|
|
|
|
|
+ scale(0x0);
|
|
|
|
|
+ disp($off);
|
|
|
|
|
+ %}
|
|
|
|
|
+%}')
|
|
|
|
|
+dnl
|
|
|
|
|
+OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
|
|
|
|
|
+OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
|
|
|
|
|
+
|
|
|
|
|
+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
|
|
|
|
|
+
|
|
|
|
|
+source_hpp %{
|
|
|
|
|
+ bool op_sve_supported(int opcode);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+source %{
|
|
|
|
|
+
|
|
|
|
|
+ static inline BasicType vector_element_basic_type(const MachNode* n) {
|
|
|
|
|
+ const TypeVect* vt = n->bottom_type()->is_vect();
|
|
|
|
|
+ return vt->element_basic_type();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
|
|
|
|
|
+ int def_idx = use->operand_index(opnd);
|
|
|
|
|
+ Node* def = use->in(def_idx);
|
|
|
|
|
+ const TypeVect* vt = def->bottom_type()->is_vect();
|
|
|
|
|
+ return vt->element_basic_type();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
|
|
|
|
|
+ PRegister Pg, const Address &adr);
|
|
|
|
|
+
|
|
|
|
|
+ // Predicated load/store, with optional ptrue to all elements of given predicate register.
|
|
|
|
|
+ static void loadStoreA_predicate(MacroAssembler masm, bool is_store,
|
|
|
|
|
+ FloatRegister reg, PRegister pg, BasicType bt,
|
|
|
|
|
+ int opcode, Register base, int index, int size, int disp) {
|
|
|
|
|
+ sve_mem_insn_predicate insn;
|
|
|
|
|
+ Assembler::SIMD_RegVariant type;
|
|
|
|
|
+ int esize = type2aelembytes(bt);
|
|
|
|
|
+ if (index == -1) {
|
|
|
|
|
+ assert(size == 0, "unsupported address mode: scale size = %d", size);
|
|
|
|
|
+ switch(esize) {
|
|
|
|
|
+ case 1:
|
|
|
|
|
+ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b;
|
|
|
|
|
+ type = Assembler::B;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 2:
|
|
|
|
|
+ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h;
|
|
|
|
|
+ type = Assembler::H;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 4:
|
|
|
|
|
+ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w;
|
|
|
|
|
+ type = Assembler::S;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 8:
|
|
|
|
|
+ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d;
|
|
|
|
|
+ type = Assembler::D;
|
|
|
|
|
+ break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ assert(false, "unsupported");
|
|
|
|
|
+ ShouldNotReachHere();
|
|
|
|
|
+ }
|
|
|
|
|
+ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ assert(false, "unimplemented");
|
|
|
|
|
+ ShouldNotReachHere();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool op_sve_supported(int opcode) {
|
|
|
|
|
+ switch (opcode) {
|
|
|
|
|
+ // No multiply reduction instructions
|
|
|
|
|
+ case Op_MulReductionVD:
|
|
|
|
|
+ case Op_MulReductionVF:
|
|
|
|
|
+ case Op_MulReductionVI:
|
|
|
|
|
+ case Op_MulReductionVL:
|
|
|
|
|
+ // Others
|
|
|
|
|
+ case Op_Extract:
|
|
|
|
|
+ case Op_ExtractB:
|
|
|
|
|
+ case Op_ExtractC:
|
|
|
|
|
+ case Op_ExtractD:
|
|
|
|
|
+ case Op_ExtractF:
|
|
|
|
|
+ case Op_ExtractI:
|
|
|
|
|
+ case Op_ExtractL:
|
|
|
|
|
+ case Op_ExtractS:
|
|
|
|
|
+ case Op_ExtractUB:
|
|
|
|
|
+ return false;
|
|
|
|
|
+ default:
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+definitions %{
|
|
|
|
|
+ int_def SVE_COST (200, 200);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl ELEMENT_SHORT_CHART($1, $2)
|
|
|
|
|
+dnl ELEMENT_SHORT_CHART(etype, node)
|
|
|
|
|
+define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
|
|
|
|
|
+ `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
|
|
|
|
|
+ ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
|
|
|
|
|
+ `($2->bottom_type()->is_vect()->element_basic_type() == $1)')')
|
|
|
|
|
+dnl
|
|
|
|
|
+
|
|
|
|
|
+// All SVE instructions
|
|
|
|
|
+
|
|
|
|
|
+// vector load/store
|
|
|
|
|
+
|
|
|
|
|
+// Use predicated vector load/store
|
|
|
|
|
+instruct loadV(vReg dst, vmemA mem) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
|
|
|
|
|
+ match(Set dst (LoadVector mem));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
|
|
|
|
+ loadStoreA_predicate(MacroAssembler(&cbuf), false, dst_reg, ptrue,
|
|
|
|
|
+ vector_element_basic_type(this), $mem->opcode(),
|
|
|
|
|
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+instruct storeV(vReg src, vmemA mem) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
|
|
|
|
|
+ match(Set mem (StoreVector mem src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_str $mem, $src\t # vector (sve)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ FloatRegister src_reg = as_FloatRegister($src$$reg);
|
|
|
|
|
+ loadStoreA_predicate(MacroAssembler(&cbuf), true, src_reg, ptrue,
|
|
|
|
|
+ vector_element_basic_type(this, $src), $mem->opcode(),
|
|
|
|
|
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 )
|
|
|
|
|
+dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
|
|
|
|
|
+define(`UNARY_OP_TRUE_PREDICATE_ETYPE', `
|
|
|
|
|
+instruct $1(vReg dst, vReg src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
|
|
|
|
|
+ n->bottom_type()->is_vect()->element_basic_type() == $3);
|
|
|
|
|
+ match(Set dst ($2 src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "$6 $dst, $src\t# vector (sve) ($4)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ $6(as_FloatRegister($dst$$reg), __ $4,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 )
|
|
|
|
|
+dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn)
|
|
|
|
|
+define(`BINARY_OP_UNPREDICATED', `
|
|
|
|
|
+instruct $1(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
|
|
|
|
|
+ match(Set dst ($2 src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+
|
|
|
|
|
+// vector add
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8, sve_add)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4, sve_add)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2, sve_add)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4, sve_fadd)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2, sve_fadd)
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl BINARY_OP_UNSIZED($1, $2, $3, $4 )
|
|
|
|
|
+dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn)
|
|
|
|
|
+define(`BINARY_OP_UNSIZED', `
|
|
|
|
|
+instruct $1(vReg dst, vReg src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3);
|
|
|
|
|
+ match(Set dst ($2 src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ $4(as_FloatRegister($dst$$reg),
|
|
|
|
|
+ as_FloatRegister($src1$$reg),
|
|
|
|
|
+ as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+
|
|
|
|
|
+// vector and
|
|
|
|
|
+BINARY_OP_UNSIZED(vand, AndV, 16, sve_and)
|
|
|
|
|
+
|
|
|
|
|
+// vector or
|
|
|
|
|
+BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)
|
|
|
|
|
+
|
|
|
|
|
+// vector xor
|
|
|
|
|
+BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VDIVF($1, $2 , $3 )
|
|
|
|
|
+dnl VDIVF(name_suffix, size, min_vec_len)
|
|
|
|
|
+define(`VDIVF', `
|
|
|
|
|
+instruct vdiv$1(vReg dst_src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
|
|
|
|
|
+ match(Set dst_src1 (DivV$1 dst_src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+
|
|
|
|
|
+// vector float div
|
|
|
|
|
+VDIVF(F, S, 4)
|
|
|
|
|
+VDIVF(D, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl BINARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, $6 )
|
|
|
|
|
+dnl BINARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
|
|
|
|
|
+define(`BINARY_OP_TRUE_PREDICATE_ETYPE', `
|
|
|
|
|
+instruct $1(vReg dst_src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
|
|
|
|
|
+ n->bottom_type()->is_vect()->element_basic_type() == $3);
|
|
|
|
|
+ match(Set dst_src1 ($2 dst_src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "$6 $dst_src1, $dst_src1, $src2\t # vector (sve) ($4)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ $6(as_FloatRegister($dst_src1$$reg), __ $4,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VFMLA($1 $2 $3 )
|
|
|
|
|
+dnl VFMLA(name_suffix, size, min_vec_len)
|
|
|
|
|
+define(`VFMLA', `
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
|
|
|
+instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
|
|
|
|
|
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+// vector fmla
|
|
|
|
|
+VFMLA(F, S, 4)
|
|
|
|
|
+VFMLA(D, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VFMLS($1 $2 $3 )
|
|
|
|
|
+dnl VFMLS(name_suffix, size, min_vec_len)
|
|
|
|
|
+define(`VFMLS', `
|
|
|
|
|
+// dst_src1 = dst_src1 + -src2 * src3
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * -src3
|
|
|
|
|
+instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
|
|
|
|
|
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
|
|
|
|
|
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+// vector fmls
|
|
|
|
|
+VFMLS(F, S, 4)
|
|
|
|
|
+VFMLS(D, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VFNMLA($1 $2 $3 )
|
|
|
|
|
+dnl VFNMLA(name_suffix, size, min_vec_len)
|
|
|
|
|
+define(`VFNMLA', `
|
|
|
|
|
+// dst_src1 = -dst_src1 + -src2 * src3
|
|
|
|
|
+// dst_src1 = -dst_src1 + src2 * -src3
|
|
|
|
|
+instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
|
|
|
|
|
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
|
|
|
|
|
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+// vector fnmla
|
|
|
|
|
+VFNMLA(F, S, 4)
|
|
|
|
|
+VFNMLA(D, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VFNMLS($1 $2 $3 )
|
|
|
|
|
+dnl VFNMLS(name_suffix, size, min_vec_len)
|
|
|
|
|
+define(`VFNMLS', `
|
|
|
|
|
+// dst_src1 = -dst_src1 + src2 * src3
|
|
|
|
|
+instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
|
|
|
|
|
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
|
|
|
|
|
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+// vector fnmls
|
|
|
|
|
+VFNMLS(F, S, 4)
|
|
|
|
|
+VFNMLS(D, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VMLA($1 $2 $3 )
|
|
|
|
|
+dnl VMLA(name_suffix, size, min_vec_len)
|
|
|
|
|
+define(`VMLA', `
|
|
|
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
|
|
|
+instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
|
|
|
|
|
+ match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+// vector mla
|
|
|
|
|
+VMLA(B, B, 16)
|
|
|
|
|
+VMLA(S, H, 8)
|
|
|
|
|
+VMLA(I, S, 4)
|
|
|
|
|
+VMLA(L, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VMLS($1 $2 $3 )
|
|
|
|
|
+dnl VMLS(name_suffix, size, min_vec_len)
|
|
|
|
|
+define(`VMLS', `
|
|
|
|
|
+// dst_src1 = dst_src1 - src2 * src3
|
|
|
|
|
+instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
|
|
|
|
|
+%{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
|
|
|
|
|
+ match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+// vector mls
|
|
|
|
|
+VMLS(B, B, 16)
|
|
|
|
|
+VMLS(S, H, 8)
|
|
|
|
|
+VMLS(I, S, 4)
|
|
|
|
|
+VMLS(L, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
|
|
|
|
|
+dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
|
|
|
|
|
+define(`BINARY_OP_TRUE_PREDICATE', `
|
|
|
|
|
+instruct $1(vReg dst_src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
|
|
|
|
|
+ match(Set dst_src1 ($2 dst_src1 src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ $5(as_FloatRegister($dst_src1$$reg), __ $3,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+
|
|
|
|
|
+// vector mul
|
|
|
|
|
+BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8, sve_mul)
|
|
|
|
|
+BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4, sve_mul)
|
|
|
|
|
+BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2, sve_mul)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
|
|
|
|
|
+dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn)
|
|
|
|
|
+define(`UNARY_OP_TRUE_PREDICATE', `
|
|
|
|
|
+instruct $1(vReg dst, vReg src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4);
|
|
|
|
|
+ match(Set dst ($2 src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "$5 $dst, $src\t# vector (sve) ($3)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+// vector fneg
|
|
|
|
|
+UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg)
|
|
|
|
|
+UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg)
|
|
|
|
|
+
|
|
|
|
|
+// popcount vector
|
|
|
|
|
+
|
|
|
|
|
+instruct vpopcountI(vReg dst, vReg src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
|
|
|
|
+ match(Set dst (PopCountVI src));
|
|
|
|
|
+ format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 )
|
|
|
|
|
+dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
|
|
|
|
|
+define(`REDUCE_ADD', `
|
|
|
|
|
+instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
|
|
|
|
|
+ ELEMENT_SHORT_CHAR($6, n->in(2)));
|
|
|
|
|
+ match(Set dst ($2 src1 src2));
|
|
|
|
|
+ effect(TEMP_DEF dst, TEMP tmp);
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t"
|
|
|
|
|
+ "umov $dst, $tmp, $5, 0\n\t"
|
|
|
|
|
+ "$7 $dst, $dst, $src1\t # add reduction $5" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0);
|
|
|
|
|
+ __ $7($dst$$Register, $dst$$Register, $src1$$Register);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl REDUCE_ADDF($1, $2, $3, $4 )
|
|
|
|
|
+dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
|
|
|
|
|
+define(`REDUCE_ADDF', `
|
|
|
|
|
+instruct $1($3 src1_dst, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set src1_dst ($2 src1_dst src2));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+// vector add reduction
|
|
|
|
|
+REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw)
|
|
|
|
|
+REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add)
|
|
|
|
|
+REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S)
|
|
|
|
|
+REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 )
|
|
|
|
|
+dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
|
|
|
|
|
+define(`REDUCE_FMINMAX', `
|
|
|
|
|
+instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
|
|
|
|
|
+ n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
|
|
|
|
|
+ match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
|
|
|
|
|
+ ins_cost(INSN_COST);
|
|
|
|
|
+ effect(TEMP_DEF dst);
|
|
|
|
|
+ format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t"
|
|
|
|
|
+ "f$1s $dst, $dst, $src1\t # $1 reduction $2" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
|
|
|
|
|
+ ptrue, as_FloatRegister($src2$$reg));
|
|
|
|
|
+ __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+// vector max reduction
|
|
|
|
|
+REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF)
|
|
|
|
|
+REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
|
|
|
|
|
+
|
|
|
|
|
+// vector min reduction
|
|
|
|
|
+REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF)
|
|
|
|
|
+REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
|
|
|
|
|
+
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl REPLICATE($1, $2, $3, $4, $5 )
|
|
|
|
|
+dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
|
|
|
|
|
+define(`REPLICATE', `
|
|
|
|
|
+instruct $1(vReg dst, $3 src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
|
|
|
|
|
+ match(Set dst ($2 src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl REPLICATE_IMM8($1, $2, $3, $4, $5 )
|
|
|
|
|
+dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
|
|
|
|
|
+define(`REPLICATE_IMM8', `
|
|
|
|
|
+instruct $1(vReg dst, $3 con) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
|
|
|
|
|
+ match(Set dst ($2 con));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl FREPLICATE($1, $2, $3, $4, $5 )
|
|
|
|
|
+dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
|
|
|
|
|
+define(`FREPLICATE', `
|
|
|
|
|
+instruct $1(vReg dst, $3 src) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
|
|
|
|
|
+ match(Set dst ($2 src));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
|
|
|
|
|
+ ptrue, as_FloatRegister($src$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+
|
|
|
|
|
+// vector replicate
|
|
|
|
|
+REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
|
|
|
|
|
+REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
|
|
|
|
|
+REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
|
|
|
|
|
+REPLICATE(replicateL, ReplicateL, iRegL, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16)
|
|
|
|
|
+REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
|
|
|
|
|
+REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
|
|
|
|
|
+REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
|
|
|
|
|
+
|
|
|
|
|
+FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
|
|
|
|
|
+FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 )
|
|
|
|
|
+dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
|
|
|
|
|
+define(`VSHIFT_TRUE_PREDICATE', `
|
|
|
|
|
+instruct $1(vReg dst, vReg shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
|
|
|
|
|
+ match(Set dst ($2 dst shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
|
|
|
|
|
+ ptrue, as_FloatRegister($shift$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 )
|
|
|
|
|
+dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
|
|
|
|
|
+define(`VSHIFT_IMM_UNPREDICATE', `
|
|
|
|
|
+instruct $1(vReg dst, vReg src, immI shift) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
|
|
|
|
|
+ match(Set dst ($2 src shift));
|
|
|
|
|
+ ins_cost(SVE_COST);
|
|
|
|
|
+ format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ int con = (int)$shift$$constant;dnl
|
|
|
|
|
+ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
|
|
|
|
|
+ if (con == 0) {
|
|
|
|
|
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }')dnl
|
|
|
|
|
+ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
|
|
|
|
|
+ if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
|
|
|
|
|
+ if (con >= 16) con = 15;')')dnl
|
|
|
|
|
+ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
|
|
|
|
|
+ if (con >= 8) {
|
|
|
|
|
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
|
|
|
|
+ as_FloatRegister($src$$reg));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }')
|
|
|
|
|
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
|
|
|
|
|
+ as_FloatRegister($src$$reg), con);
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+dnl
|
|
|
|
|
+dnl VSHIFT_COUNT($1, $2, $3, $4 )
|
|
|
|
|
+dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
|
|
|
|
|
+define(`VSHIFT_COUNT', `
|
|
|
|
|
+instruct $1(vReg dst, iRegIorL2I cnt) %{
|
|
|
|
|
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 &&
|
|
|
|
|
+ ELEMENT_SHORT_CHAR($4, n));
|
|
|
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
|
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
|
|
|
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
|
|
|
|
|
+ ins_encode %{
|
|
|
|
|
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
|
|
|
|
|
+ %}
|
|
|
|
|
+ ins_pipe(pipe_slow);
|
|
|
|
|
+%}')dnl
|
|
|
|
|
+
|
|
|
|
|
+// vector shift
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB, B, 16, sve_asr)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS, H, 8, sve_asr)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI, S, 4, sve_asr)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL, D, 2, sve_asr)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB, B, 16, sve_lsl)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS, H, 8, sve_lsl)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI, S, 4, sve_lsl)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL, D, 2, sve_lsl)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr)
|
|
|
|
|
+VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl)
|
|
|
|
|
+VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl)
|
|
|
|
|
+VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
|
|
|
|
|
+VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT)
|
|
|
|
|
+VSHIFT_COUNT(vshiftcntI, S, 4, T_INT)
|
|
|
|
|
+VSHIFT_COUNT(vshiftcntL, D, 2, T_LONG)
|
|
|
|
|
+
|
|
|
|
|
+// vector sqrt
|
|
|
|
|
+UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt)
|
|
|
|
|
+UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt)
|
|
|
|
|
+
|
|
|
|
|
+// vector sub
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub)
|
|
|
|
|
+BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub)
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 2a17d8e0f..943d2a615 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -96,662 +96,662 @@ void entry(CodeBuffer *cb) {
|
2020-12-24 15:35:16 +08:00
|
|
|
__ bind(back);
|
|
|
|
|
|
|
|
|
|
// ArithOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ add(r15, r12, r16, Assembler::LSR, 30); // add x15, x12, x16, LSR #30
|
|
|
|
|
- __ sub(r1, r15, r3, Assembler::LSR, 32); // sub x1, x15, x3, LSR #32
|
|
|
|
|
- __ adds(r13, r25, r5, Assembler::LSL, 13); // adds x13, x25, x5, LSL #13
|
|
|
|
|
- __ subs(r22, r28, r6, Assembler::ASR, 17); // subs x22, x28, x6, ASR #17
|
|
|
|
|
- __ addw(r0, r9, r22, Assembler::ASR, 6); // add w0, w9, w22, ASR #6
|
|
|
|
|
- __ subw(r19, r3, r25, Assembler::LSL, 21); // sub w19, w3, w25, LSL #21
|
|
|
|
|
- __ addsw(r4, r19, r11, Assembler::LSL, 20); // adds w4, w19, w11, LSL #20
|
|
|
|
|
- __ subsw(r24, r7, r19, Assembler::ASR, 0); // subs w24, w7, w19, ASR #0
|
|
|
|
|
- __ andr(r30, r7, r11, Assembler::LSL, 48); // and x30, x7, x11, LSL #48
|
|
|
|
|
- __ orr(r24, r8, r15, Assembler::LSL, 12); // orr x24, x8, x15, LSL #12
|
|
|
|
|
- __ eor(r17, r9, r23, Assembler::LSL, 1); // eor x17, x9, x23, LSL #1
|
|
|
|
|
- __ ands(r14, r11, r4, Assembler::LSR, 55); // ands x14, x11, x4, LSR #55
|
|
|
|
|
- __ andw(r19, r7, r12, Assembler::LSR, 17); // and w19, w7, w12, LSR #17
|
|
|
|
|
- __ orrw(r19, r27, r11, Assembler::ASR, 28); // orr w19, w27, w11, ASR #28
|
|
|
|
|
- __ eorw(r30, r3, r22, Assembler::LSR, 31); // eor w30, w3, w22, LSR #31
|
|
|
|
|
- __ andsw(r19, r26, r28, Assembler::ASR, 0); // ands w19, w26, w28, ASR #0
|
|
|
|
|
- __ bic(r29, r6, r26, Assembler::LSL, 51); // bic x29, x6, x26, LSL #51
|
|
|
|
|
- __ orn(r26, r27, r17, Assembler::LSL, 35); // orn x26, x27, x17, LSL #35
|
|
|
|
|
- __ eon(r21, r4, r14, Assembler::LSL, 5); // eon x21, x4, x14, LSL #5
|
|
|
|
|
- __ bics(r2, r15, r0, Assembler::ASR, 5); // bics x2, x15, x0, ASR #5
|
|
|
|
|
- __ bicw(r2, r7, r2, Assembler::LSL, 29); // bic w2, w7, w2, LSL #29
|
|
|
|
|
- __ ornw(r24, r12, r21, Assembler::LSR, 5); // orn w24, w12, w21, LSR #5
|
|
|
|
|
- __ eonw(r30, r15, r19, Assembler::LSL, 2); // eon w30, w15, w19, LSL #2
|
|
|
|
|
- __ bicsw(r30, r23, r17, Assembler::ASR, 28); // bics w30, w23, w17, ASR #28
|
|
|
|
|
+ __ add(r23, r1, r13, Assembler::LSR, 45); // add x23, x1, x13, LSR #45
|
|
|
|
|
+ __ sub(r8, r30, r12, Assembler::ASR, 56); // sub x8, x30, x12, ASR #56
|
|
|
|
|
+ __ adds(r27, r23, r14, Assembler::LSL, 54); // adds x27, x23, x14, LSL #54
|
|
|
|
|
+ __ subs(r21, r15, r20, Assembler::LSR, 38); // subs x21, x15, x20, LSR #38
|
|
|
|
|
+ __ addw(r25, r17, r4, Assembler::LSL, 3); // add w25, w17, w4, LSL #3
|
|
|
|
|
+ __ subw(r29, r1, r9, Assembler::ASR, 20); // sub w29, w1, w9, ASR #20
|
|
|
|
|
+ __ addsw(r10, r26, r9, Assembler::ASR, 9); // adds w10, w26, w9, ASR #9
|
|
|
|
|
+ __ subsw(r21, r30, r7, Assembler::ASR, 3); // subs w21, w30, w7, ASR #3
|
|
|
|
|
+ __ andr(r9, r8, r11, Assembler::LSR, 4); // and x9, x8, x11, LSR #4
|
|
|
|
|
+ __ orr(r3, r18, r1, Assembler::ASR, 1); // orr x3, x18, x1, ASR #1
|
|
|
|
|
+ __ eor(r10, r20, r2, Assembler::LSL, 27); // eor x10, x20, x2, LSL #27
|
|
|
|
|
+ __ ands(r12, r9, r11, Assembler::ASR, 31); // ands x12, x9, x11, ASR #31
|
|
|
|
|
+ __ andw(r20, r9, r30, Assembler::ASR, 26); // and w20, w9, w30, ASR #26
|
|
|
|
|
+ __ orrw(r21, r10, r26, Assembler::ASR, 17); // orr w21, w10, w26, ASR #17
|
|
|
|
|
+ __ eorw(r0, r8, r7, Assembler::ASR, 7); // eor w0, w8, w7, ASR #7
|
|
|
|
|
+ __ andsw(r19, r11, r8, Assembler::LSL, 18); // ands w19, w11, w8, LSL #18
|
|
|
|
|
+ __ bic(r23, r4, r3, Assembler::LSL, 53); // bic x23, x4, x3, LSL #53
|
|
|
|
|
+ __ orn(r9, r6, r19, Assembler::LSL, 24); // orn x9, x6, x19, LSL #24
|
|
|
|
|
+ __ eon(r12, r6, r26, Assembler::LSR, 54); // eon x12, x6, x26, LSR #54
|
|
|
|
|
+ __ bics(r22, r19, r12, Assembler::LSL, 14); // bics x22, x19, x12, LSL #14
|
|
|
|
|
+ __ bicw(r29, r13, r22, Assembler::LSL, 11); // bic w29, w13, w22, LSL #11
|
|
|
|
|
+ __ ornw(r17, r30, r20, Assembler::ASR, 5); // orn w17, w30, w20, ASR #5
|
|
|
|
|
+ __ eonw(r1, r29, r11, Assembler::LSL, 8); // eon w1, w29, w11, LSL #8
|
|
|
|
|
+ __ bicsw(r4, r20, r6, Assembler::LSR, 29); // bics w4, w20, w6, LSR #29
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// AddSubImmOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ addw(r4, r20, 660u); // add w4, w20, #660
|
|
|
|
|
- __ addsw(r2, r10, 710u); // adds w2, w10, #710
|
|
|
|
|
- __ subw(r19, r26, 244u); // sub w19, w26, #244
|
|
|
|
|
- __ subsw(r28, r13, 73u); // subs w28, w13, #73
|
|
|
|
|
- __ add(r2, r30, 862u); // add x2, x30, #862
|
|
|
|
|
- __ adds(r27, r16, 574u); // adds x27, x16, #574
|
|
|
|
|
- __ sub(r22, r9, 589u); // sub x22, x9, #589
|
|
|
|
|
- __ subs(r4, r1, 698u); // subs x4, x1, #698
|
|
|
|
|
+ __ addw(r30, r6, 504u); // add w30, w6, #504
|
|
|
|
|
+ __ addsw(r19, r8, 943u); // adds w19, w8, #943
|
|
|
|
|
+ __ subw(r29, r10, 365u); // sub w29, w10, #365
|
|
|
|
|
+ __ subsw(r4, r8, 284u); // subs w4, w8, #284
|
|
|
|
|
+ __ add(r3, r14, 958u); // add x3, x14, #958
|
|
|
|
|
+ __ adds(r22, r20, 167u); // adds x22, x20, #167
|
|
|
|
|
+ __ sub(r27, r15, 725u); // sub x27, x15, #725
|
|
|
|
|
+ __ subs(r24, r28, 947u); // subs x24, x28, #947
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LogicalImmOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ andw(r28, r19, 4294709247ul); // and w28, w19, #0xfffc0fff
|
|
|
|
|
- __ orrw(r27, r5, 536870910ul); // orr w27, w5, #0x1ffffffe
|
|
|
|
|
- __ eorw(r30, r20, 4294840319ul); // eor w30, w20, #0xfffe0fff
|
|
|
|
|
- __ andsw(r22, r26, 4294959615ul); // ands w22, w26, #0xffffe1ff
|
|
|
|
|
- __ andr(r5, r7, 4194300ul); // and x5, x7, #0x3ffffc
|
|
|
|
|
- __ orr(r13, r7, 18014398509481728ul); // orr x13, x7, #0x3fffffffffff00
|
|
|
|
|
- __ eor(r7, r9, 18442240474082197503ul); // eor x7, x9, #0xfff0000000003fff
|
|
|
|
|
- __ ands(r3, r0, 18374686479671656447ul); // ands x3, x0, #0xff00000000007fff
|
|
|
|
|
+ __ andw(r25, r25, 2139127680ul); // and w25, w25, #0x7f807f80
|
|
|
|
|
+ __ orrw(r13, r26, 2097120ul); // orr w13, w26, #0x1fffe0
|
|
|
|
|
+ __ eorw(r21, r13, 3758096384ul); // eor w21, w13, #0xe0000000
|
|
|
|
|
+ __ andsw(r2, r3, 1073733632ul); // ands w2, w3, #0x3fffe000
|
|
|
|
|
+ __ andr(r8, r10, 1125895612137471ul); // and x8, x10, #0x3ffff0003ffff
|
|
|
|
|
+ __ orr(r27, r16, 18444492273897963519ul); // orr x27, x16, #0xfff80000001fffff
|
|
|
|
|
+ __ eor(r27, r3, 4611685469745315712ul); // eor x27, x3, #0x3fffff803fffff80
|
|
|
|
|
+ __ ands(r4, r23, 18446744056529698815ul); // ands x4, x23, #0xfffffffc00003fff
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// AbsOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ b(__ pc()); // b .
|
|
|
|
|
- __ b(back); // b back
|
|
|
|
|
- __ b(forth); // b forth
|
|
|
|
|
- __ bl(__ pc()); // bl .
|
|
|
|
|
- __ bl(back); // bl back
|
|
|
|
|
- __ bl(forth); // bl forth
|
2020-12-24 15:35:16 +08:00
|
|
|
+ __ b(__ pc()); // b .
|
|
|
|
|
+ __ b(back); // b back
|
|
|
|
|
+ __ b(forth); // b forth
|
|
|
|
|
+ __ bl(__ pc()); // bl .
|
|
|
|
|
+ __ bl(back); // bl back
|
|
|
|
|
+ __ bl(forth); // bl forth
|
|
|
|
|
|
|
|
|
|
// RegAndAbsOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ cbzw(r16, __ pc()); // cbz w16, .
|
|
|
|
|
- __ cbzw(r16, back); // cbz w16, back
|
|
|
|
|
- __ cbzw(r16, forth); // cbz w16, forth
|
|
|
|
|
- __ cbnzw(r19, __ pc()); // cbnz w19, .
|
|
|
|
|
- __ cbnzw(r19, back); // cbnz w19, back
|
|
|
|
|
- __ cbnzw(r19, forth); // cbnz w19, forth
|
|
|
|
|
- __ cbz(r5, __ pc()); // cbz x5, .
|
|
|
|
|
- __ cbz(r5, back); // cbz x5, back
|
|
|
|
|
- __ cbz(r5, forth); // cbz x5, forth
|
|
|
|
|
- __ cbnz(r4, __ pc()); // cbnz x4, .
|
|
|
|
|
- __ cbnz(r4, back); // cbnz x4, back
|
|
|
|
|
- __ cbnz(r4, forth); // cbnz x4, forth
|
|
|
|
|
- __ adr(r27, __ pc()); // adr x27, .
|
|
|
|
|
- __ adr(r27, back); // adr x27, back
|
|
|
|
|
- __ adr(r27, forth); // adr x27, forth
|
|
|
|
|
- __ _adrp(r16, __ pc()); // adrp x16, .
|
|
|
|
|
+ __ cbzw(r3, __ pc()); // cbz w3, .
|
|
|
|
|
+ __ cbzw(r3, back); // cbz w3, back
|
|
|
|
|
+ __ cbzw(r3, forth); // cbz w3, forth
|
|
|
|
|
+ __ cbnzw(r2, __ pc()); // cbnz w2, .
|
|
|
|
|
+ __ cbnzw(r2, back); // cbnz w2, back
|
|
|
|
|
+ __ cbnzw(r2, forth); // cbnz w2, forth
|
|
|
|
|
+ __ cbz(r25, __ pc()); // cbz x25, .
|
|
|
|
|
+ __ cbz(r25, back); // cbz x25, back
|
|
|
|
|
+ __ cbz(r25, forth); // cbz x25, forth
|
|
|
|
|
+ __ cbnz(r18, __ pc()); // cbnz x18, .
|
|
|
|
|
+ __ cbnz(r18, back); // cbnz x18, back
|
|
|
|
|
+ __ cbnz(r18, forth); // cbnz x18, forth
|
|
|
|
|
+ __ adr(r8, __ pc()); // adr x8, .
|
|
|
|
|
+ __ adr(r8, back); // adr x8, back
|
|
|
|
|
+ __ adr(r8, forth); // adr x8, forth
|
|
|
|
|
+ __ _adrp(r15, __ pc()); // adrp x15, .
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// RegImmAbsOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ tbz(r28, 8, __ pc()); // tbz x28, #8, .
|
|
|
|
|
- __ tbz(r28, 8, back); // tbz x28, #8, back
|
|
|
|
|
- __ tbz(r28, 8, forth); // tbz x28, #8, forth
|
|
|
|
|
- __ tbnz(r1, 1, __ pc()); // tbnz x1, #1, .
|
|
|
|
|
- __ tbnz(r1, 1, back); // tbnz x1, #1, back
|
|
|
|
|
- __ tbnz(r1, 1, forth); // tbnz x1, #1, forth
|
|
|
|
|
+ __ tbz(r18, 14, __ pc()); // tbz x18, #14, .
|
|
|
|
|
+ __ tbz(r18, 14, back); // tbz x18, #14, back
|
|
|
|
|
+ __ tbz(r18, 14, forth); // tbz x18, #14, forth
|
|
|
|
|
+ __ tbnz(r25, 15, __ pc()); // tbnz x25, #15, .
|
|
|
|
|
+ __ tbnz(r25, 15, back); // tbnz x25, #15, back
|
|
|
|
|
+ __ tbnz(r25, 15, forth); // tbnz x25, #15, forth
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// MoveWideImmOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ movnw(r20, 8639, 16); // movn w20, #8639, lsl 16
|
|
|
|
|
- __ movzw(r7, 25835, 0); // movz w7, #25835, lsl 0
|
|
|
|
|
- __ movkw(r17, 7261, 0); // movk w17, #7261, lsl 0
|
|
|
|
|
- __ movn(r14, 2097, 32); // movn x14, #2097, lsl 32
|
|
|
|
|
- __ movz(r9, 16082, 0); // movz x9, #16082, lsl 0
|
|
|
|
|
- __ movk(r19, 13962, 16); // movk x19, #13962, lsl 16
|
|
|
|
|
+ __ movnw(r18, 4126, 16); // movn w18, #4126, lsl 16
|
|
|
|
|
+ __ movzw(r30, 13712, 0); // movz w30, #13712, lsl 0
|
|
|
|
|
+ __ movkw(r21, 13161, 16); // movk w21, #13161, lsl 16
|
|
|
|
|
+ __ movn(r18, 28524, 48); // movn x18, #28524, lsl 48
|
|
|
|
|
+ __ movz(r13, 30710, 48); // movz x13, #30710, lsl 48
|
|
|
|
|
+ __ movk(r3, 31565, 48); // movk x3, #31565, lsl 48
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// BitfieldOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ sbfm(r9, r22, 6, 22); // sbfm x9, x22, #6, #22
|
|
|
|
|
- __ bfmw(r19, r0, 11, 0); // bfm w19, w0, #11, #0
|
|
|
|
|
- __ ubfmw(r10, r19, 11, 19); // ubfm w10, w19, #11, #19
|
|
|
|
|
- __ sbfm(r4, r15, 5, 17); // sbfm x4, x15, #5, #17
|
|
|
|
|
- __ bfm(r3, r5, 19, 28); // bfm x3, x5, #19, #28
|
|
|
|
|
- __ ubfm(r12, r28, 17, 2); // ubfm x12, x28, #17, #2
|
|
|
|
|
+ __ sbfm(r10, r1, 0, 3); // sbfm x10, x1, #0, #3
|
|
|
|
|
+ __ bfmw(r12, r22, 5, 24); // bfm w12, w22, #5, #24
|
|
|
|
|
+ __ ubfmw(r17, r3, 11, 8); // ubfm w17, w3, #11, #8
|
|
|
|
|
+ __ sbfm(r0, r3, 11, 14); // sbfm x0, x3, #11, #14
|
|
|
|
|
+ __ bfm(r28, r6, 7, 15); // bfm x28, x6, #7, #15
|
|
|
|
|
+ __ ubfm(r9, r10, 1, 25); // ubfm x9, x10, #1, #25
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// ExtractOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ extrw(r15, r0, r22, 3); // extr w15, w0, w22, #3
|
|
|
|
|
- __ extr(r6, r14, r14, 55); // extr x6, x14, x14, #55
|
|
|
|
|
+ __ extrw(r21, r27, r25, 23); // extr w21, w27, w25, #23
|
|
|
|
|
+ __ extr(r14, r17, r22, 17); // extr x14, x17, x22, #17
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// CondBranchOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ br(Assembler::EQ, __ pc()); // b.EQ .
|
|
|
|
|
- __ br(Assembler::EQ, back); // b.EQ back
|
|
|
|
|
- __ br(Assembler::EQ, forth); // b.EQ forth
|
|
|
|
|
- __ br(Assembler::NE, __ pc()); // b.NE .
|
|
|
|
|
- __ br(Assembler::NE, back); // b.NE back
|
|
|
|
|
- __ br(Assembler::NE, forth); // b.NE forth
|
|
|
|
|
- __ br(Assembler::HS, __ pc()); // b.HS .
|
|
|
|
|
- __ br(Assembler::HS, back); // b.HS back
|
|
|
|
|
- __ br(Assembler::HS, forth); // b.HS forth
|
|
|
|
|
- __ br(Assembler::CS, __ pc()); // b.CS .
|
|
|
|
|
- __ br(Assembler::CS, back); // b.CS back
|
|
|
|
|
- __ br(Assembler::CS, forth); // b.CS forth
|
|
|
|
|
- __ br(Assembler::LO, __ pc()); // b.LO .
|
|
|
|
|
- __ br(Assembler::LO, back); // b.LO back
|
|
|
|
|
- __ br(Assembler::LO, forth); // b.LO forth
|
|
|
|
|
- __ br(Assembler::CC, __ pc()); // b.CC .
|
|
|
|
|
- __ br(Assembler::CC, back); // b.CC back
|
|
|
|
|
- __ br(Assembler::CC, forth); // b.CC forth
|
|
|
|
|
- __ br(Assembler::MI, __ pc()); // b.MI .
|
|
|
|
|
- __ br(Assembler::MI, back); // b.MI back
|
|
|
|
|
- __ br(Assembler::MI, forth); // b.MI forth
|
|
|
|
|
- __ br(Assembler::PL, __ pc()); // b.PL .
|
|
|
|
|
- __ br(Assembler::PL, back); // b.PL back
|
|
|
|
|
- __ br(Assembler::PL, forth); // b.PL forth
|
|
|
|
|
- __ br(Assembler::VS, __ pc()); // b.VS .
|
|
|
|
|
- __ br(Assembler::VS, back); // b.VS back
|
|
|
|
|
- __ br(Assembler::VS, forth); // b.VS forth
|
|
|
|
|
- __ br(Assembler::VC, __ pc()); // b.VC .
|
|
|
|
|
- __ br(Assembler::VC, back); // b.VC back
|
|
|
|
|
- __ br(Assembler::VC, forth); // b.VC forth
|
|
|
|
|
- __ br(Assembler::HI, __ pc()); // b.HI .
|
|
|
|
|
- __ br(Assembler::HI, back); // b.HI back
|
|
|
|
|
- __ br(Assembler::HI, forth); // b.HI forth
|
|
|
|
|
- __ br(Assembler::LS, __ pc()); // b.LS .
|
|
|
|
|
- __ br(Assembler::LS, back); // b.LS back
|
|
|
|
|
- __ br(Assembler::LS, forth); // b.LS forth
|
|
|
|
|
- __ br(Assembler::GE, __ pc()); // b.GE .
|
|
|
|
|
- __ br(Assembler::GE, back); // b.GE back
|
|
|
|
|
- __ br(Assembler::GE, forth); // b.GE forth
|
|
|
|
|
- __ br(Assembler::LT, __ pc()); // b.LT .
|
|
|
|
|
- __ br(Assembler::LT, back); // b.LT back
|
|
|
|
|
- __ br(Assembler::LT, forth); // b.LT forth
|
|
|
|
|
- __ br(Assembler::GT, __ pc()); // b.GT .
|
|
|
|
|
- __ br(Assembler::GT, back); // b.GT back
|
|
|
|
|
- __ br(Assembler::GT, forth); // b.GT forth
|
|
|
|
|
- __ br(Assembler::LE, __ pc()); // b.LE .
|
|
|
|
|
- __ br(Assembler::LE, back); // b.LE back
|
|
|
|
|
- __ br(Assembler::LE, forth); // b.LE forth
|
|
|
|
|
- __ br(Assembler::AL, __ pc()); // b.AL .
|
|
|
|
|
- __ br(Assembler::AL, back); // b.AL back
|
|
|
|
|
- __ br(Assembler::AL, forth); // b.AL forth
|
|
|
|
|
- __ br(Assembler::NV, __ pc()); // b.NV .
|
|
|
|
|
- __ br(Assembler::NV, back); // b.NV back
|
|
|
|
|
- __ br(Assembler::NV, forth); // b.NV forth
|
2020-12-24 15:35:16 +08:00
|
|
|
+ __ br(Assembler::EQ, __ pc()); // b.EQ .
|
|
|
|
|
+ __ br(Assembler::EQ, back); // b.EQ back
|
|
|
|
|
+ __ br(Assembler::EQ, forth); // b.EQ forth
|
|
|
|
|
+ __ br(Assembler::NE, __ pc()); // b.NE .
|
|
|
|
|
+ __ br(Assembler::NE, back); // b.NE back
|
|
|
|
|
+ __ br(Assembler::NE, forth); // b.NE forth
|
|
|
|
|
+ __ br(Assembler::HS, __ pc()); // b.HS .
|
|
|
|
|
+ __ br(Assembler::HS, back); // b.HS back
|
|
|
|
|
+ __ br(Assembler::HS, forth); // b.HS forth
|
|
|
|
|
+ __ br(Assembler::CS, __ pc()); // b.CS .
|
|
|
|
|
+ __ br(Assembler::CS, back); // b.CS back
|
|
|
|
|
+ __ br(Assembler::CS, forth); // b.CS forth
|
|
|
|
|
+ __ br(Assembler::LO, __ pc()); // b.LO .
|
|
|
|
|
+ __ br(Assembler::LO, back); // b.LO back
|
|
|
|
|
+ __ br(Assembler::LO, forth); // b.LO forth
|
|
|
|
|
+ __ br(Assembler::CC, __ pc()); // b.CC .
|
|
|
|
|
+ __ br(Assembler::CC, back); // b.CC back
|
|
|
|
|
+ __ br(Assembler::CC, forth); // b.CC forth
|
|
|
|
|
+ __ br(Assembler::MI, __ pc()); // b.MI .
|
|
|
|
|
+ __ br(Assembler::MI, back); // b.MI back
|
|
|
|
|
+ __ br(Assembler::MI, forth); // b.MI forth
|
|
|
|
|
+ __ br(Assembler::PL, __ pc()); // b.PL .
|
|
|
|
|
+ __ br(Assembler::PL, back); // b.PL back
|
|
|
|
|
+ __ br(Assembler::PL, forth); // b.PL forth
|
|
|
|
|
+ __ br(Assembler::VS, __ pc()); // b.VS .
|
|
|
|
|
+ __ br(Assembler::VS, back); // b.VS back
|
|
|
|
|
+ __ br(Assembler::VS, forth); // b.VS forth
|
|
|
|
|
+ __ br(Assembler::VC, __ pc()); // b.VC .
|
|
|
|
|
+ __ br(Assembler::VC, back); // b.VC back
|
|
|
|
|
+ __ br(Assembler::VC, forth); // b.VC forth
|
|
|
|
|
+ __ br(Assembler::HI, __ pc()); // b.HI .
|
|
|
|
|
+ __ br(Assembler::HI, back); // b.HI back
|
|
|
|
|
+ __ br(Assembler::HI, forth); // b.HI forth
|
|
|
|
|
+ __ br(Assembler::LS, __ pc()); // b.LS .
|
|
|
|
|
+ __ br(Assembler::LS, back); // b.LS back
|
|
|
|
|
+ __ br(Assembler::LS, forth); // b.LS forth
|
|
|
|
|
+ __ br(Assembler::GE, __ pc()); // b.GE .
|
|
|
|
|
+ __ br(Assembler::GE, back); // b.GE back
|
|
|
|
|
+ __ br(Assembler::GE, forth); // b.GE forth
|
|
|
|
|
+ __ br(Assembler::LT, __ pc()); // b.LT .
|
|
|
|
|
+ __ br(Assembler::LT, back); // b.LT back
|
|
|
|
|
+ __ br(Assembler::LT, forth); // b.LT forth
|
|
|
|
|
+ __ br(Assembler::GT, __ pc()); // b.GT .
|
|
|
|
|
+ __ br(Assembler::GT, back); // b.GT back
|
|
|
|
|
+ __ br(Assembler::GT, forth); // b.GT forth
|
|
|
|
|
+ __ br(Assembler::LE, __ pc()); // b.LE .
|
|
|
|
|
+ __ br(Assembler::LE, back); // b.LE back
|
|
|
|
|
+ __ br(Assembler::LE, forth); // b.LE forth
|
|
|
|
|
+ __ br(Assembler::AL, __ pc()); // b.AL .
|
|
|
|
|
+ __ br(Assembler::AL, back); // b.AL back
|
|
|
|
|
+ __ br(Assembler::AL, forth); // b.AL forth
|
|
|
|
|
+ __ br(Assembler::NV, __ pc()); // b.NV .
|
|
|
|
|
+ __ br(Assembler::NV, back); // b.NV back
|
|
|
|
|
+ __ br(Assembler::NV, forth); // b.NV forth
|
|
|
|
|
|
|
|
|
|
// ImmOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ svc(22064); // svc #22064
|
|
|
|
|
- __ hvc(533); // hvc #533
|
|
|
|
|
- __ smc(9942); // smc #9942
|
|
|
|
|
- __ brk(4714); // brk #4714
|
|
|
|
|
- __ hlt(4302); // hlt #4302
|
|
|
|
|
+ __ svc(31973); // svc #31973
|
|
|
|
|
+ __ hvc(1113); // hvc #1113
|
|
|
|
|
+ __ smc(24334); // smc #24334
|
|
|
|
|
+ __ brk(7815); // brk #7815
|
|
|
|
|
+ __ hlt(28529); // hlt #28529
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// Op
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ nop(); // nop
|
|
|
|
|
- __ eret(); // eret
|
|
|
|
|
- __ drps(); // drps
|
|
|
|
|
- __ isb(); // isb
|
2020-12-24 15:35:16 +08:00
|
|
|
+ __ nop(); // nop
|
|
|
|
|
+ __ eret(); // eret
|
|
|
|
|
+ __ drps(); // drps
|
|
|
|
|
+ __ isb(); // isb
|
|
|
|
|
|
|
|
|
|
// SystemOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ dsb(Assembler::OSH); // dsb OSH
|
|
|
|
|
- __ dmb(Assembler::NSHLD); // dmb NSHLD
|
|
|
|
|
+ __ dsb(Assembler::NSHLD); // dsb NSHLD
|
|
|
|
|
+ __ dmb(Assembler::NSH); // dmb NSH
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// OneRegOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ br(r20); // br x20
|
|
|
|
|
- __ blr(r2); // blr x2
|
|
|
|
|
+ __ br(r28); // br x28
|
|
|
|
|
+ __ blr(r17); // blr x17
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreExclusiveOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ stxr(r18, r23, r0); // stxr w18, x23, [x0]
|
|
|
|
|
- __ stlxr(r30, r5, r22); // stlxr w30, x5, [x22]
|
|
|
|
|
- __ ldxr(r5, r8); // ldxr x5, [x8]
|
|
|
|
|
- __ ldaxr(r20, r16); // ldaxr x20, [x16]
|
|
|
|
|
- __ stlr(r6, r11); // stlr x6, [x11]
|
|
|
|
|
- __ ldar(r6, r27); // ldar x6, [x27]
|
|
|
|
|
+ __ stxr(r18, r7, r26); // stxr w18, x7, [x26]
|
|
|
|
|
+ __ stlxr(r25, r12, r6); // stlxr w25, x12, [x6]
|
|
|
|
|
+ __ ldxr(r0, r16); // ldxr x0, [x16]
|
|
|
|
|
+ __ ldaxr(r6, r3); // ldaxr x6, [x3]
|
|
|
|
|
+ __ stlr(r14, r1); // stlr x14, [x1]
|
|
|
|
|
+ __ ldar(r29, r24); // ldar x29, [x24]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreExclusiveOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ stxrw(r10, r17, r5); // stxr w10, w17, [x5]
|
|
|
|
|
- __ stlxrw(r22, r9, r12); // stlxr w22, w9, [x12]
|
|
|
|
|
- __ ldxrw(r27, r8); // ldxr w27, [x8]
|
|
|
|
|
- __ ldaxrw(r23, r2); // ldaxr w23, [x2]
|
|
|
|
|
- __ stlrw(r26, r29); // stlr w26, [x29]
|
|
|
|
|
- __ ldarw(r13, r10); // ldar w13, [x10]
|
|
|
|
|
+ __ stxrw(r28, r15, r23); // stxr w28, w15, [x23]
|
|
|
|
|
+ __ stlxrw(r9, r7, r3); // stlxr w9, w7, [x3]
|
|
|
|
|
+ __ ldxrw(r1, r20); // ldxr w1, [x20]
|
|
|
|
|
+ __ ldaxrw(r20, r15); // ldaxr w20, [x15]
|
|
|
|
|
+ __ stlrw(r21, r9); // stlr w21, [x9]
|
|
|
|
|
+ __ ldarw(r5, r17); // ldar w5, [x17]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreExclusiveOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ stxrh(r25, r28, r27); // stxrh w25, w28, [x27]
|
|
|
|
|
- __ stlxrh(r29, r22, r12); // stlxrh w29, w22, [x12]
|
|
|
|
|
- __ ldxrh(r22, r28); // ldxrh w22, [x28]
|
|
|
|
|
- __ ldaxrh(r3, r30); // ldaxrh w3, [x30]
|
|
|
|
|
- __ stlrh(r24, r15); // stlrh w24, [x15]
|
|
|
|
|
- __ ldarh(r27, r26); // ldarh w27, [x26]
|
|
|
|
|
+ __ stxrh(r13, r20, r30); // stxrh w13, w20, [x30]
|
|
|
|
|
+ __ stlxrh(r10, r12, r18); // stlxrh w10, w12, [x18]
|
|
|
|
|
+ __ ldxrh(r4, r19); // ldxrh w4, [x19]
|
|
|
|
|
+ __ ldaxrh(r22, r10); // ldaxrh w22, [x10]
|
|
|
|
|
+ __ stlrh(r30, r15); // stlrh w30, [x15]
|
|
|
|
|
+ __ ldarh(r4, r24); // ldarh w4, [x24]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreExclusiveOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ stxrb(r11, r10, r19); // stxrb w11, w10, [x19]
|
|
|
|
|
- __ stlxrb(r23, r27, r22); // stlxrb w23, w27, [x22]
|
|
|
|
|
- __ ldxrb(r24, r16); // ldxrb w24, [x16]
|
|
|
|
|
- __ ldaxrb(r24, r1); // ldaxrb w24, [x1]
|
|
|
|
|
- __ stlrb(r5, r29); // stlrb w5, [x29]
|
|
|
|
|
- __ ldarb(r24, r16); // ldarb w24, [x16]
|
|
|
|
|
+ __ stxrb(r10, r20, r12); // stxrb w10, w20, [x12]
|
|
|
|
|
+ __ stlxrb(r20, r29, r11); // stlxrb w20, w29, [x11]
|
|
|
|
|
+ __ ldxrb(r21, r5); // ldxrb w21, [x5]
|
|
|
|
|
+ __ ldaxrb(r4, r9); // ldaxrb w4, [x9]
|
|
|
|
|
+ __ stlrb(r30, r28); // stlrb w30, [x28]
|
|
|
|
|
+ __ ldarb(r19, r24); // ldarb w19, [x24]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreExclusiveOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ ldxp(r25, r24, r17); // ldxp x25, x24, [x17]
|
|
|
|
|
- __ ldaxp(r22, r12, r19); // ldaxp x22, x12, [x19]
|
|
|
|
|
- __ stxp(r0, r26, r21, r25); // stxp w0, x26, x21, [x25]
|
|
|
|
|
- __ stlxp(r1, r6, r11, r5); // stlxp w1, x6, x11, [x5]
|
|
|
|
|
+ __ ldxp(r11, r16, r18); // ldxp x11, x16, [x18]
|
|
|
|
|
+ __ ldaxp(r8, r7, r15); // ldaxp x8, x7, [x15]
|
|
|
|
|
+ __ stxp(r28, r20, r16, r10); // stxp w28, x20, x16, [x10]
|
|
|
|
|
+ __ stlxp(r7, r9, r21, r3); // stlxp w7, x9, x21, [x3]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreExclusiveOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ ldxpw(r13, r14, r4); // ldxp w13, w14, [x4]
|
|
|
|
|
- __ ldaxpw(r17, r2, r6); // ldaxp w17, w2, [x6]
|
|
|
|
|
- __ stxpw(r15, r3, r9, r18); // stxp w15, w3, w9, [x18]
|
|
|
|
|
- __ stlxpw(r18, r17, r4, r9); // stlxp w18, w17, w4, [x9]
|
|
|
|
|
+ __ ldxpw(r25, r6, r19); // ldxp w25, w6, [x19]
|
|
|
|
|
+ __ ldaxpw(r30, r9, r2); // ldaxp w30, w9, [x2]
|
|
|
|
|
+ __ stxpw(r16, r0, r20, r12); // stxp w16, w0, w20, [x12]
|
|
|
|
|
+ __ stlxpw(r5, r2, r7, r28); // stlxp w5, w2, w7, [x28]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
-// base_plus_unscaled_offset
|
|
|
|
|
+// base_plus_unscaled_offset
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ str(r23, Address(r21, -49)); // str x23, [x21, -49]
|
|
|
|
|
- __ strw(r21, Address(r2, 63)); // str w21, [x2, 63]
|
|
|
|
|
- __ strb(r27, Address(r28, 11)); // strb w27, [x28, 11]
|
|
|
|
|
- __ strh(r29, Address(r15, -13)); // strh w29, [x15, -13]
|
|
|
|
|
- __ ldr(r14, Address(r30, -45)); // ldr x14, [x30, -45]
|
|
|
|
|
- __ ldrw(r29, Address(r28, 53)); // ldr w29, [x28, 53]
|
|
|
|
|
- __ ldrb(r20, Address(r26, 7)); // ldrb w20, [x26, 7]
|
|
|
|
|
- __ ldrh(r25, Address(r2, -50)); // ldrh w25, [x2, -50]
|
|
|
|
|
- __ ldrsb(r3, Address(r10, -15)); // ldrsb x3, [x10, -15]
|
|
|
|
|
- __ ldrsh(r14, Address(r15, 19)); // ldrsh x14, [x15, 19]
|
|
|
|
|
- __ ldrshw(r29, Address(r11, -5)); // ldrsh w29, [x11, -5]
|
|
|
|
|
- __ ldrsw(r15, Address(r5, -71)); // ldrsw x15, [x5, -71]
|
|
|
|
|
- __ ldrd(v19, Address(r12, 3)); // ldr d19, [x12, 3]
|
|
|
|
|
- __ ldrs(v12, Address(r27, 42)); // ldr s12, [x27, 42]
|
|
|
|
|
- __ strd(v22, Address(r28, 125)); // str d22, [x28, 125]
|
|
|
|
|
- __ strs(v24, Address(r15, -20)); // str s24, [x15, -20]
|
2020-12-24 15:35:16 +08:00
|
|
|
-
|
|
|
|
|
-// pre
|
2021-08-13 14:54:30 +08:00
|
|
|
+ __ str(r16, Address(r19, -75)); // str x16, [x19, -75]
|
|
|
|
|
+ __ strw(r1, Address(r28, 30)); // str w1, [x28, 30]
|
|
|
|
|
+ __ strb(r28, Address(r13, -26)); // strb w28, [x13, -26]
|
|
|
|
|
+ __ strh(r8, Address(r6, -51)); // strh w8, [x6, -51]
|
|
|
|
|
+ __ ldr(r0, Address(r28, -227)); // ldr x0, [x28, -227]
|
|
|
|
|
+ __ ldrw(r28, Address(r10, -26)); // ldr w28, [x10, -26]
|
|
|
|
|
+ __ ldrb(r4, Address(r11, 12)); // ldrb w4, [x11, 12]
|
|
|
|
|
+ __ ldrh(r1, Address(r17, 5)); // ldrh w1, [x17, 5]
|
|
|
|
|
+ __ ldrsb(r11, Address(r9, 12)); // ldrsb x11, [x9, 12]
|
|
|
|
|
+ __ ldrsh(r8, Address(r8, -17)); // ldrsh x8, [x8, -17]
|
|
|
|
|
+ __ ldrshw(r20, Address(r13, -35)); // ldrsh w20, [x13, -35]
|
|
|
|
|
+ __ ldrsw(r23, Address(r9, 49)); // ldrsw x23, [x9, 49]
|
|
|
|
|
+ __ ldrd(v9, Address(r4, 29)); // ldr d9, [x4, 29]
|
|
|
|
|
+ __ ldrs(v11, Address(r19, 40)); // ldr s11, [x19, 40]
|
|
|
|
|
+ __ strd(v25, Address(r20, -43)); // str d25, [x20, -43]
|
|
|
|
|
+ __ strs(v25, Address(r1, -80)); // str s25, [x1, -80]
|
2020-12-24 15:35:16 +08:00
|
|
|
+
|
|
|
|
|
+// pre
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ str(r8, Address(__ pre(r28, -24))); // str x8, [x28, -24]!
|
|
|
|
|
- __ strw(r6, Address(__ pre(r15, 37))); // str w6, [x15, 37]!
|
|
|
|
|
- __ strb(r7, Address(__ pre(r1, 7))); // strb w7, [x1, 7]!
|
|
|
|
|
- __ strh(r0, Address(__ pre(r17, 30))); // strh w0, [x17, 30]!
|
|
|
|
|
- __ ldr(r25, Address(__ pre(r29, 84))); // ldr x25, [x29, 84]!
|
|
|
|
|
- __ ldrw(r26, Address(__ pre(r20, -52))); // ldr w26, [x20, -52]!
|
|
|
|
|
- __ ldrb(r26, Address(__ pre(r29, -25))); // ldrb w26, [x29, -25]!
|
|
|
|
|
- __ ldrh(r4, Address(__ pre(r25, 26))); // ldrh w4, [x25, 26]!
|
|
|
|
|
- __ ldrsb(r28, Address(__ pre(r8, -21))); // ldrsb x28, [x8, -21]!
|
|
|
|
|
- __ ldrsh(r17, Address(__ pre(r14, -6))); // ldrsh x17, [x14, -6]!
|
|
|
|
|
- __ ldrshw(r28, Address(__ pre(r23, 10))); // ldrsh w28, [x23, 10]!
|
|
|
|
|
- __ ldrsw(r30, Address(__ pre(r27, -64))); // ldrsw x30, [x27, -64]!
|
|
|
|
|
- __ ldrd(v20, Address(__ pre(r30, -242))); // ldr d20, [x30, -242]!
|
|
|
|
|
- __ ldrs(v17, Address(__ pre(r27, 20))); // ldr s17, [x27, 20]!
|
|
|
|
|
- __ strd(v7, Address(__ pre(r3, 17))); // str d7, [x3, 17]!
|
|
|
|
|
- __ strs(v13, Address(__ pre(r11, -16))); // str s13, [x11, -16]!
|
2020-12-24 15:35:16 +08:00
|
|
|
-
|
|
|
|
|
-// post
|
2021-08-13 14:54:30 +08:00
|
|
|
+ __ str(r20, Address(__ pre(r0, 25))); // str x20, [x0, 25]!
|
|
|
|
|
+ __ strw(r12, Address(__ pre(r12, -49))); // str w12, [x12, -49]!
|
|
|
|
|
+ __ strb(r28, Address(__ pre(r19, -10))); // strb w28, [x19, -10]!
|
|
|
|
|
+ __ strh(r13, Address(__ pre(r28, -63))); // strh w13, [x28, -63]!
|
|
|
|
|
+ __ ldr(r11, Address(__ pre(r23, -46))); // ldr x11, [x23, -46]!
|
|
|
|
|
+ __ ldrw(r27, Address(__ pre(r24, 17))); // ldr w27, [x24, 17]!
|
|
|
|
|
+ __ ldrb(r14, Address(__ pre(r26, -12))); // ldrb w14, [x26, -12]!
|
|
|
|
|
+ __ ldrh(r24, Address(__ pre(r22, -45))); // ldrh w24, [x22, -45]!
|
|
|
|
|
+ __ ldrsb(r25, Address(__ pre(r9, -11))); // ldrsb x25, [x9, -11]!
|
|
|
|
|
+ __ ldrsh(r5, Address(__ pre(r6, 29))); // ldrsh x5, [x6, 29]!
|
|
|
|
|
+ __ ldrshw(r7, Address(__ pre(r23, -1))); // ldrsh w7, [x23, -1]!
|
|
|
|
|
+ __ ldrsw(r26, Address(__ pre(r13, -61))); // ldrsw x26, [x13, -61]!
|
|
|
|
|
+ __ ldrd(v24, Address(__ pre(r24, -245))); // ldr d24, [x24, -245]!
|
|
|
|
|
+ __ ldrs(v20, Address(__ pre(r25, -55))); // ldr s20, [x25, -55]!
|
|
|
|
|
+ __ strd(v9, Address(__ pre(r2, -203))); // str d9, [x2, -203]!
|
|
|
|
|
+ __ strs(v14, Address(__ pre(r1, -59))); // str s14, [x1, -59]!
|
2020-12-24 15:35:16 +08:00
|
|
|
+
|
|
|
|
|
+// post
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ str(r6, Address(__ post(r9, -61))); // str x6, [x9], -61
|
|
|
|
|
- __ strw(r16, Address(__ post(r5, -29))); // str w16, [x5], -29
|
|
|
|
|
- __ strb(r29, Address(__ post(r29, 15))); // strb w29, [x29], 15
|
|
|
|
|
- __ strh(r4, Address(__ post(r20, 18))); // strh w4, [x20], 18
|
|
|
|
|
- __ ldr(r19, Address(__ post(r18, 46))); // ldr x19, [x18], 46
|
|
|
|
|
- __ ldrw(r22, Address(__ post(r2, 23))); // ldr w22, [x2], 23
|
|
|
|
|
- __ ldrb(r7, Address(__ post(r3, -30))); // ldrb w7, [x3], -30
|
|
|
|
|
- __ ldrh(r11, Address(__ post(r12, -29))); // ldrh w11, [x12], -29
|
|
|
|
|
- __ ldrsb(r8, Address(__ post(r6, -29))); // ldrsb x8, [x6], -29
|
|
|
|
|
- __ ldrsh(r24, Address(__ post(r23, 4))); // ldrsh x24, [x23], 4
|
|
|
|
|
- __ ldrshw(r17, Address(__ post(r16, 0))); // ldrsh w17, [x16], 0
|
|
|
|
|
- __ ldrsw(r0, Address(__ post(r20, -8))); // ldrsw x0, [x20], -8
|
|
|
|
|
- __ ldrd(v20, Address(__ post(r2, -126))); // ldr d20, [x2], -126
|
|
|
|
|
- __ ldrs(v19, Address(__ post(r30, -104))); // ldr s19, [x30], -104
|
|
|
|
|
- __ strd(v4, Address(__ post(r17, 118))); // str d4, [x17], 118
|
|
|
|
|
- __ strs(v21, Address(__ post(r19, -112))); // str s21, [x19], -112
|
2020-12-24 15:35:16 +08:00
|
|
|
-
|
|
|
|
|
-// base_plus_reg
|
2021-08-13 14:54:30 +08:00
|
|
|
+ __ str(r19, Address(__ post(r1, 109))); // str x19, [x1], 109
|
|
|
|
|
+ __ strw(r4, Address(__ post(r5, -54))); // str w4, [x5], -54
|
|
|
|
|
+ __ strb(r29, Address(__ post(r3, 9))); // strb w29, [x3], 9
|
|
|
|
|
+ __ strh(r0, Address(__ post(r1, -50))); // strh w0, [x1], -50
|
|
|
|
|
+ __ ldr(r2, Address(__ post(r6, -48))); // ldr x2, [x6], -48
|
|
|
|
|
+ __ ldrw(r15, Address(__ post(r6, -115))); // ldr w15, [x6], -115
|
|
|
|
|
+ __ ldrb(r4, Address(__ post(r2, -27))); // ldrb w4, [x2], -27
|
|
|
|
|
+ __ ldrh(r17, Address(__ post(r26, -21))); // ldrh w17, [x26], -21
|
|
|
|
|
+ __ ldrsb(r21, Address(__ post(r24, -13))); // ldrsb x21, [x24], -13
|
|
|
|
|
+ __ ldrsh(r22, Address(__ post(r6, -48))); // ldrsh x22, [x6], -48
|
|
|
|
|
+ __ ldrshw(r11, Address(__ post(r6, -48))); // ldrsh w11, [x6], -48
|
|
|
|
|
+ __ ldrsw(r14, Address(__ post(r30, -5))); // ldrsw x14, [x30], -5
|
|
|
|
|
+ __ ldrd(v2, Address(__ post(r15, -105))); // ldr d2, [x15], -105
|
|
|
|
|
+ __ ldrs(v25, Address(__ post(r19, -91))); // ldr s25, [x19], -91
|
|
|
|
|
+ __ strd(v13, Address(__ post(r23, -191))); // str d13, [x23], -191
|
|
|
|
|
+ __ strs(v22, Address(__ post(r21, 0))); // str s22, [x21], 0
|
2020-12-24 15:35:16 +08:00
|
|
|
+
|
|
|
|
|
+// base_plus_reg
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ str(r26, Address(r2, r19, Address::lsl(3))); // str x26, [x2, x19, lsl #3]
|
|
|
|
|
- __ strw(r9, Address(r0, r15, Address::sxtw(2))); // str w9, [x0, w15, sxtw #2]
|
|
|
|
|
- __ strb(r26, Address(r12, r1, Address::lsl(0))); // strb w26, [x12, x1, lsl #0]
|
|
|
|
|
- __ strh(r21, Address(r11, r10, Address::lsl(1))); // strh w21, [x11, x10, lsl #1]
|
|
|
|
|
- __ ldr(r16, Address(r23, r16, Address::sxtx(0))); // ldr x16, [x23, x16, sxtx #0]
|
|
|
|
|
- __ ldrw(r10, Address(r11, r17, Address::sxtw(2))); // ldr w10, [x11, w17, sxtw #2]
|
|
|
|
|
- __ ldrb(r13, Address(r23, r11, Address::lsl(0))); // ldrb w13, [x23, x11, lsl #0]
|
|
|
|
|
- __ ldrh(r27, Address(r4, r21, Address::lsl(0))); // ldrh w27, [x4, x21, lsl #0]
|
|
|
|
|
- __ ldrsb(r26, Address(r8, r15, Address::sxtw(0))); // ldrsb x26, [x8, w15, sxtw #0]
|
|
|
|
|
- __ ldrsh(r21, Address(r10, r2, Address::sxtw(0))); // ldrsh x21, [x10, w2, sxtw #0]
|
|
|
|
|
- __ ldrshw(r8, Address(r30, r14, Address::lsl(0))); // ldrsh w8, [x30, x14, lsl #0]
|
|
|
|
|
- __ ldrsw(r29, Address(r14, r20, Address::sxtx(2))); // ldrsw x29, [x14, x20, sxtx #2]
|
|
|
|
|
- __ ldrd(v30, Address(r27, r22, Address::sxtx(0))); // ldr d30, [x27, x22, sxtx #0]
|
|
|
|
|
- __ ldrs(v13, Address(r9, r22, Address::lsl(0))); // ldr s13, [x9, x22, lsl #0]
|
|
|
|
|
- __ strd(v8, Address(r25, r17, Address::sxtw(3))); // str d8, [x25, w17, sxtw #3]
|
|
|
|
|
- __ strs(v1, Address(r24, r5, Address::uxtw(2))); // str s1, [x24, w5, uxtw #2]
|
2020-12-24 15:35:16 +08:00
|
|
|
-
|
|
|
|
|
-// base_plus_scaled_offset
|
2021-08-13 14:54:30 +08:00
|
|
|
+ __ str(r22, Address(r12, r18, Address::sxtw(0))); // str x22, [x12, w18, sxtw #0]
|
|
|
|
|
+ __ strw(r30, Address(r27, r12, Address::uxtw(0))); // str w30, [x27, w12, uxtw #0]
|
|
|
|
|
+ __ strb(r7, Address(r4, r22, Address::lsl(0))); // strb w7, [x4, x22, lsl #0]
|
|
|
|
|
+ __ strh(r19, Address(r23, r29, Address::sxtx(1))); // strh w19, [x23, x29, sxtx #1]
|
|
|
|
|
+ __ ldr(r17, Address(r4, r27, Address::sxtx(3))); // ldr x17, [x4, x27, sxtx #3]
|
|
|
|
|
+ __ ldrw(r1, Address(r13, r17, Address::sxtw(0))); // ldr w1, [x13, w17, sxtw #0]
|
|
|
|
|
+ __ ldrb(r16, Address(r27, r29, Address::sxtx(0))); // ldrb w16, [x27, x29, sxtx #0]
|
|
|
|
|
+ __ ldrh(r25, Address(r9, r4, Address::uxtw(1))); // ldrh w25, [x9, w4, uxtw #1]
|
|
|
|
|
+ __ ldrsb(r4, Address(r12, r22, Address::lsl(0))); // ldrsb x4, [x12, x22, lsl #0]
|
|
|
|
|
+ __ ldrsh(r25, Address(r1, r5, Address::uxtw(1))); // ldrsh x25, [x1, w5, uxtw #1]
|
|
|
|
|
+ __ ldrshw(r9, Address(r16, r28, Address::lsl(0))); // ldrsh w9, [x16, x28, lsl #0]
|
|
|
|
|
+ __ ldrsw(r8, Address(r7, r14, Address::sxtx(0))); // ldrsw x8, [x7, x14, sxtx #0]
|
|
|
|
|
+ __ ldrd(v4, Address(r28, r16, Address::uxtw(3))); // ldr d4, [x28, w16, uxtw #3]
|
|
|
|
|
+ __ ldrs(v16, Address(r2, r27, Address::sxtw(2))); // ldr s16, [x2, w27, sxtw #2]
|
|
|
|
|
+ __ strd(v23, Address(r0, r25, Address::lsl(0))); // str d23, [x0, x25, lsl #0]
|
|
|
|
|
+ __ strs(v6, Address(r16, r7, Address::lsl(2))); // str s6, [x16, x7, lsl #2]
|
2020-12-24 15:35:16 +08:00
|
|
|
+
|
|
|
|
|
+// base_plus_scaled_offset
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ str(r10, Address(r21, 14496)); // str x10, [x21, 14496]
|
|
|
|
|
- __ strw(r18, Address(r29, 7228)); // str w18, [x29, 7228]
|
|
|
|
|
- __ strb(r23, Address(r3, 2018)); // strb w23, [x3, 2018]
|
|
|
|
|
- __ strh(r28, Address(r11, 3428)); // strh w28, [x11, 3428]
|
|
|
|
|
- __ ldr(r24, Address(r26, 14376)); // ldr x24, [x26, 14376]
|
|
|
|
|
- __ ldrw(r21, Address(r2, 6972)); // ldr w21, [x2, 6972]
|
|
|
|
|
- __ ldrb(r4, Address(r5, 1848)); // ldrb w4, [x5, 1848]
|
|
|
|
|
- __ ldrh(r14, Address(r14, 3112)); // ldrh w14, [x14, 3112]
|
|
|
|
|
- __ ldrsb(r4, Address(r27, 1959)); // ldrsb x4, [x27, 1959]
|
|
|
|
|
- __ ldrsh(r4, Address(r27, 3226)); // ldrsh x4, [x27, 3226]
|
|
|
|
|
- __ ldrshw(r10, Address(r28, 3286)); // ldrsh w10, [x28, 3286]
|
|
|
|
|
- __ ldrsw(r10, Address(r17, 7912)); // ldrsw x10, [x17, 7912]
|
|
|
|
|
- __ ldrd(v13, Address(r28, 13400)); // ldr d13, [x28, 13400]
|
|
|
|
|
- __ ldrs(v24, Address(r3, 7596)); // ldr s24, [x3, 7596]
|
|
|
|
|
- __ strd(v2, Address(r12, 15360)); // str d2, [x12, 15360]
|
|
|
|
|
- __ strs(v17, Address(r1, 6492)); // str s17, [x1, 6492]
|
2020-12-24 15:35:16 +08:00
|
|
|
-
|
|
|
|
|
-// pcrel
|
2021-08-13 14:54:30 +08:00
|
|
|
+ __ str(r2, Address(r12, 15288)); // str x2, [x12, 15288]
|
|
|
|
|
+ __ strw(r8, Address(r5, 6928)); // str w8, [x5, 6928]
|
|
|
|
|
+ __ strb(r1, Address(r17, 2016)); // strb w1, [x17, 2016]
|
|
|
|
|
+ __ strh(r8, Address(r25, 3258)); // strh w8, [x25, 3258]
|
|
|
|
|
+ __ ldr(r28, Address(r3, 14656)); // ldr x28, [x3, 14656]
|
|
|
|
|
+ __ ldrw(r21, Address(r11, 7004)); // ldr w21, [x11, 7004]
|
|
|
|
|
+ __ ldrb(r15, Address(r5, 1906)); // ldrb w15, [x5, 1906]
|
|
|
|
|
+ __ ldrh(r0, Address(r19, 3668)); // ldrh w0, [x19, 3668]
|
|
|
|
|
+ __ ldrsb(r29, Address(r9, 1909)); // ldrsb x29, [x9, 1909]
|
|
|
|
|
+ __ ldrsh(r23, Address(r28, 3640)); // ldrsh x23, [x28, 3640]
|
|
|
|
|
+ __ ldrshw(r27, Address(r10, 3672)); // ldrsh w27, [x10, 3672]
|
|
|
|
|
+ __ ldrsw(r21, Address(r27, 7736)); // ldrsw x21, [x27, 7736]
|
|
|
|
|
+ __ ldrd(v26, Address(r27, 14584)); // ldr d26, [x27, 14584]
|
|
|
|
|
+ __ ldrs(v2, Address(r4, 7464)); // ldr s2, [x4, 7464]
|
|
|
|
|
+ __ strd(v1, Address(r21, 16224)); // str d1, [x21, 16224]
|
|
|
|
|
+ __ strs(v4, Address(r22, 7552)); // str s4, [x22, 7552]
|
2020-12-24 15:35:16 +08:00
|
|
|
+
|
|
|
|
|
+// pcrel
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ ldr(r16, __ pc()); // ldr x16, .
|
|
|
|
|
- __ ldrw(r13, __ pc()); // ldr w13, .
|
|
|
|
|
+ __ ldr(r1, __ pc()); // ldr x1, .
|
|
|
|
|
+ __ ldrw(r27, __ pc()); // ldr w27, .
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ prfm(Address(r18, -127)); // prfm PLDL1KEEP, [x18, -127]
|
|
|
|
|
+ __ prfm(Address(r4, 45)); // prfm PLDL1KEEP, [x4, 45]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ prfm(back); // prfm PLDL1KEEP, back
|
2020-12-24 15:35:16 +08:00
|
|
|
+ __ prfm(__ pc()); // prfm PLDL1KEEP, .
|
|
|
|
|
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ prfm(Address(r20, r2, Address::lsl(3))); // prfm PLDL1KEEP, [x20, x2, lsl #3]
|
|
|
|
|
+ __ prfm(Address(r30, r0, Address::sxtw(0))); // prfm PLDL1KEEP, [x30, w0, sxtw #0]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStoreOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ prfm(Address(r9, 13808)); // prfm PLDL1KEEP, [x9, 13808]
|
|
|
|
|
+ __ prfm(Address(r24, 16208)); // prfm PLDL1KEEP, [x24, 16208]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// AddSubCarryOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ adcw(r8, r23, r2); // adc w8, w23, w2
|
|
|
|
|
- __ adcsw(r24, r3, r19); // adcs w24, w3, w19
|
|
|
|
|
- __ sbcw(r22, r24, r29); // sbc w22, w24, w29
|
|
|
|
|
- __ sbcsw(r12, r27, r3); // sbcs w12, w27, w3
|
|
|
|
|
- __ adc(r11, r23, r1); // adc x11, x23, x1
|
|
|
|
|
- __ adcs(r29, r5, r23); // adcs x29, x5, x23
|
|
|
|
|
- __ sbc(r9, r25, r12); // sbc x9, x25, x12
|
|
|
|
|
- __ sbcs(r12, r0, r22); // sbcs x12, x0, x22
|
|
|
|
|
+ __ adcw(r0, r29, r24); // adc w0, w29, w24
|
|
|
|
|
+ __ adcsw(r22, r28, r18); // adcs w22, w28, w18
|
|
|
|
|
+ __ sbcw(r23, r16, r30); // sbc w23, w16, w30
|
|
|
|
|
+ __ sbcsw(r7, r29, r14); // sbcs w7, w29, w14
|
|
|
|
|
+ __ adc(r22, r12, r14); // adc x22, x12, x14
|
|
|
|
|
+ __ adcs(r29, r24, r23); // adcs x29, x24, x23
|
|
|
|
|
+ __ sbc(r17, r28, r22); // sbc x17, x28, x22
|
|
|
|
|
+ __ sbcs(r24, r20, r19); // sbcs x24, x20, x19
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// AddSubExtendedOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ addw(r26, r12, r3, ext::uxtw, 1); // add w26, w12, w3, uxtw #1
|
|
|
|
|
- __ addsw(r20, r16, r18, ext::sxtb, 2); // adds w20, w16, w18, sxtb #2
|
|
|
|
|
- __ sub(r30, r30, r7, ext::uxtw, 2); // sub x30, x30, x7, uxtw #2
|
|
|
|
|
- __ subsw(r11, r21, r2, ext::uxth, 3); // subs w11, w21, w2, uxth #3
|
|
|
|
|
- __ add(r2, r26, r1, ext::uxtw, 2); // add x2, x26, x1, uxtw #2
|
|
|
|
|
- __ adds(r18, r29, r20, ext::sxth, 1); // adds x18, x29, x20, sxth #1
|
|
|
|
|
- __ sub(r14, r16, r4, ext::uxtw, 4); // sub x14, x16, x4, uxtw #4
|
|
|
|
|
- __ subs(r0, r17, r23, ext::sxtb, 3); // subs x0, x17, x23, sxtb #3
|
|
|
|
|
+ __ addw(r27, r22, r6, ext::sxtw, 2); // add w27, w22, w6, sxtw #2
|
|
|
|
|
+ __ addsw(r13, r11, r24, ext::uxtw, 4); // adds w13, w11, w24, uxtw #4
|
|
|
|
|
+ __ sub(r16, r8, r4, ext::uxth, 3); // sub x16, x8, x4, uxth #3
|
|
|
|
|
+ __ subsw(r21, r18, r20, ext::sxtx, 2); // subs w21, w18, w20, sxtx #2
|
|
|
|
|
+ __ add(r14, r17, r29, ext::uxtb, 2); // add x14, x17, x29, uxtb #2
|
|
|
|
|
+ __ adds(r17, r17, r14, ext::sxth, 4); // adds x17, x17, x14, sxth #4
|
|
|
|
|
+ __ sub(r22, r3, r26, ext::sxtw, 1); // sub x22, x3, x26, sxtw #1
|
|
|
|
|
+ __ subs(r13, r13, r21, ext::uxth, 4); // subs x13, x13, x21, uxth #4
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// ConditionalCompareOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ ccmnw(r20, r22, 3u, Assembler::PL); // ccmn w20, w22, #3, PL
|
|
|
|
|
- __ ccmpw(r25, r2, 1u, Assembler::EQ); // ccmp w25, w2, #1, EQ
|
|
|
|
|
- __ ccmn(r18, r24, 7u, Assembler::GT); // ccmn x18, x24, #7, GT
|
|
|
|
|
- __ ccmp(r8, r13, 6u, Assembler::PL); // ccmp x8, x13, #6, PL
|
|
|
|
|
+ __ ccmnw(r17, r26, 15u, Assembler::VC); // ccmn w17, w26, #15, VC
|
|
|
|
|
+ __ ccmpw(r25, r6, 5u, Assembler::LO); // ccmp w25, w6, #5, LO
|
|
|
|
|
+ __ ccmn(r1, r30, 1u, Assembler::LS); // ccmn x1, x30, #1, LS
|
|
|
|
|
+ __ ccmp(r17, r7, 2u, Assembler::GE); // ccmp x17, x7, #2, GE
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// ConditionalCompareImmedOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ ccmnw(r9, 2, 4, Assembler::VS); // ccmn w9, #2, #4, VS
|
|
|
|
|
- __ ccmpw(r2, 27, 7, Assembler::EQ); // ccmp w2, #27, #7, EQ
|
|
|
|
|
- __ ccmn(r16, 1, 2, Assembler::CC); // ccmn x16, #1, #2, CC
|
|
|
|
|
- __ ccmp(r17, 31, 3, Assembler::LT); // ccmp x17, #31, #3, LT
|
|
|
|
|
+ __ ccmnw(r17, 25, 6, Assembler::EQ); // ccmn w17, #25, #6, EQ
|
|
|
|
|
+ __ ccmpw(r2, 5, 5, Assembler::EQ); // ccmp w2, #5, #5, EQ
|
|
|
|
|
+ __ ccmn(r19, 17, 10, Assembler::MI); // ccmn x19, #17, #10, MI
|
|
|
|
|
+ __ ccmp(r14, 8, 3, Assembler::GT); // ccmp x14, #8, #3, GT
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// ConditionalSelectOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ cselw(r23, r27, r23, Assembler::LS); // csel w23, w27, w23, LS
|
|
|
|
|
- __ csincw(r10, r0, r6, Assembler::VS); // csinc w10, w0, w6, VS
|
|
|
|
|
- __ csinvw(r11, r0, r9, Assembler::CC); // csinv w11, w0, w9, CC
|
|
|
|
|
- __ csnegw(r17, r27, r18, Assembler::LO); // csneg w17, w27, w18, LO
|
|
|
|
|
- __ csel(r12, r16, r11, Assembler::VC); // csel x12, x16, x11, VC
|
|
|
|
|
- __ csinc(r6, r28, r6, Assembler::HI); // csinc x6, x28, x6, HI
|
|
|
|
|
- __ csinv(r13, r27, r26, Assembler::VC); // csinv x13, x27, x26, VC
|
|
|
|
|
- __ csneg(r29, r22, r18, Assembler::PL); // csneg x29, x22, x18, PL
|
|
|
|
|
+ __ cselw(r9, r8, r14, Assembler::LS); // csel w9, w8, w14, LS
|
|
|
|
|
+ __ csincw(r27, r11, r5, Assembler::LE); // csinc w27, w11, w5, LE
|
|
|
|
|
+ __ csinvw(r11, r23, r22, Assembler::LO); // csinv w11, w23, w22, LO
|
|
|
|
|
+ __ csnegw(r19, r28, r27, Assembler::CS); // csneg w19, w28, w27, CS
|
|
|
|
|
+ __ csel(r16, r9, r1, Assembler::PL); // csel x16, x9, x1, PL
|
|
|
|
|
+ __ csinc(r28, r14, r12, Assembler::EQ); // csinc x28, x14, x12, EQ
|
|
|
|
|
+ __ csinv(r22, r4, r14, Assembler::PL); // csinv x22, x4, x14, PL
|
|
|
|
|
+ __ csneg(r26, r11, r27, Assembler::HS); // csneg x26, x11, x27, HS
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// TwoRegOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ rbitw(r12, r19); // rbit w12, w19
|
|
|
|
|
- __ rev16w(r23, r18); // rev16 w23, w18
|
|
|
|
|
- __ revw(r9, r28); // rev w9, w28
|
|
|
|
|
- __ clzw(r2, r19); // clz w2, w19
|
|
|
|
|
- __ clsw(r25, r29); // cls w25, w29
|
|
|
|
|
- __ rbit(r4, r23); // rbit x4, x23
|
|
|
|
|
- __ rev16(r29, r18); // rev16 x29, x18
|
|
|
|
|
- __ rev32(r7, r8); // rev32 x7, x8
|
|
|
|
|
- __ rev(r13, r17); // rev x13, x17
|
|
|
|
|
- __ clz(r17, r0); // clz x17, x0
|
|
|
|
|
- __ cls(r18, r26); // cls x18, x26
|
|
|
|
|
+ __ rbitw(r24, r11); // rbit w24, w11
|
|
|
|
|
+ __ rev16w(r10, r14); // rev16 w10, w14
|
|
|
|
|
+ __ revw(r9, r5); // rev w9, w5
|
|
|
|
|
+ __ clzw(r18, r20); // clz w18, w20
|
|
|
|
|
+ __ clsw(r25, r8); // cls w25, w8
|
|
|
|
|
+ __ rbit(r22, r24); // rbit x22, x24
|
|
|
|
|
+ __ rev16(r28, r27); // rev16 x28, x27
|
|
|
|
|
+ __ rev32(r8, r29); // rev32 x8, x29
|
|
|
|
|
+ __ rev(r17, r10); // rev x17, x10
|
|
|
|
|
+ __ clz(r23, r11); // clz x23, x11
|
|
|
|
|
+ __ cls(r26, r14); // cls x26, x14
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// ThreeRegOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ udivw(r11, r12, r16); // udiv w11, w12, w16
|
|
|
|
|
- __ sdivw(r4, r9, r7); // sdiv w4, w9, w7
|
|
|
|
|
- __ lslvw(r12, r7, r16); // lslv w12, w7, w16
|
|
|
|
|
- __ lsrvw(r19, r16, r23); // lsrv w19, w16, w23
|
|
|
|
|
- __ asrvw(r7, r4, r6); // asrv w7, w4, w6
|
|
|
|
|
- __ rorvw(r21, r20, r23); // rorv w21, w20, w23
|
|
|
|
|
- __ udiv(r16, r12, r28); // udiv x16, x12, x28
|
|
|
|
|
- __ sdiv(r4, r12, r13); // sdiv x4, x12, x13
|
|
|
|
|
- __ lslv(r9, r13, r7); // lslv x9, x13, x7
|
|
|
|
|
- __ lsrv(r28, r27, r15); // lsrv x28, x27, x15
|
|
|
|
|
- __ asrv(r20, r30, r14); // asrv x20, x30, x14
|
|
|
|
|
- __ rorv(r14, r18, r30); // rorv x14, x18, x30
|
|
|
|
|
- __ umulh(r3, r11, r7); // umulh x3, x11, x7
|
|
|
|
|
- __ smulh(r23, r20, r24); // smulh x23, x20, x24
|
|
|
|
|
+ __ udivw(r21, r4, r28); // udiv w21, w4, w28
|
|
|
|
|
+ __ sdivw(r30, r10, r22); // sdiv w30, w10, w22
|
|
|
|
|
+ __ lslvw(r29, r2, r26); // lslv w29, w2, w26
|
|
|
|
|
+ __ lsrvw(r28, r22, r10); // lsrv w28, w22, w10
|
|
|
|
|
+ __ asrvw(r11, r24, r12); // asrv w11, w24, w12
|
|
|
|
|
+ __ rorvw(r21, r30, r16); // rorv w21, w30, w16
|
|
|
|
|
+ __ udiv(r1, r0, r13); // udiv x1, x0, x13
|
|
|
|
|
+ __ sdiv(r2, r17, r6); // sdiv x2, x17, x6
|
|
|
|
|
+ __ lslv(r10, r24, r21); // lslv x10, x24, x21
|
|
|
|
|
+ __ lsrv(r5, r9, r6); // lsrv x5, x9, x6
|
|
|
|
|
+ __ asrv(r0, r27, r4); // asrv x0, x27, x4
|
|
|
|
|
+ __ rorv(r28, r4, r2); // rorv x28, x4, x2
|
|
|
|
|
+ __ umulh(r1, r30, r7); // umulh x1, x30, x7
|
|
|
|
|
+ __ smulh(r30, r5, r10); // smulh x30, x5, x10
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// FourRegMulOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ maddw(r2, r5, r21, r9); // madd w2, w5, w21, w9
|
|
|
|
|
- __ msubw(r24, r24, r4, r8); // msub w24, w24, w4, w8
|
|
|
|
|
- __ madd(r11, r12, r15, r19); // madd x11, x12, x15, x19
|
|
|
|
|
- __ msub(r29, r25, r12, r25); // msub x29, x25, x12, x25
|
|
|
|
|
- __ smaddl(r17, r11, r12, r22); // smaddl x17, w11, w12, x22
|
|
|
|
|
- __ smsubl(r28, r3, r20, r18); // smsubl x28, w3, w20, x18
|
|
|
|
|
- __ umaddl(r7, r4, r28, r26); // umaddl x7, w4, w28, x26
|
|
|
|
|
- __ umsubl(r22, r10, r17, r5); // umsubl x22, w10, w17, x5
|
|
|
|
|
+ __ maddw(r7, r15, r13, r17); // madd w7, w15, w13, w17
|
|
|
|
|
+ __ msubw(r25, r1, r12, r28); // msub w25, w1, w12, w28
|
|
|
|
|
+ __ madd(r2, r11, r30, r9); // madd x2, x11, x30, x9
|
|
|
|
|
+ __ msub(r5, r23, r3, r22); // msub x5, x23, x3, x22
|
|
|
|
|
+ __ smaddl(r25, r10, r9, r4); // smaddl x25, w10, w9, x4
|
|
|
|
|
+ __ smsubl(r5, r8, r7, r18); // smsubl x5, w8, w7, x18
|
|
|
|
|
+ __ umaddl(r24, r5, r26, r25); // umaddl x24, w5, w26, x25
|
|
|
|
|
+ __ umsubl(r14, r1, r26, r28); // umsubl x14, w1, w26, x28
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// ThreeRegFloatOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ fmuls(v17, v3, v17); // fmul s17, s3, s17
|
|
|
|
|
- __ fdivs(v11, v17, v6); // fdiv s11, s17, s6
|
|
|
|
|
- __ fadds(v29, v7, v9); // fadd s29, s7, s9
|
|
|
|
|
- __ fsubs(v7, v12, v19); // fsub s7, s12, s19
|
|
|
|
|
- __ fmuls(v0, v23, v3); // fmul s0, s23, s3
|
|
|
|
|
- __ fmuld(v26, v3, v21); // fmul d26, d3, d21
|
|
|
|
|
- __ fdivd(v0, v19, v5); // fdiv d0, d19, d5
|
|
|
|
|
- __ faddd(v0, v26, v9); // fadd d0, d26, d9
|
|
|
|
|
- __ fsubd(v25, v21, v21); // fsub d25, d21, d21
|
|
|
|
|
- __ fmuld(v16, v13, v19); // fmul d16, d13, d19
|
|
|
|
|
+ __ fmuls(v24, v22, v8); // fmul s24, s22, s8
|
|
|
|
|
+ __ fdivs(v16, v3, v6); // fdiv s16, s3, s6
|
|
|
|
|
+ __ fadds(v16, v21, v25); // fadd s16, s21, s25
|
|
|
|
|
+ __ fsubs(v0, v26, v27); // fsub s0, s26, s27
|
|
|
|
|
+ __ fmuls(v24, v3, v17); // fmul s24, s3, s17
|
|
|
|
|
+ __ fmuld(v9, v8, v6); // fmul d9, d8, d6
|
|
|
|
|
+ __ fdivd(v22, v19, v30); // fdiv d22, d19, d30
|
|
|
|
|
+ __ faddd(v14, v17, v3); // fadd d14, d17, d3
|
|
|
|
|
+ __ fsubd(v24, v27, v20); // fsub d24, d27, d20
|
|
|
|
|
+ __ fmuld(v12, v1, v22); // fmul d12, d1, d22
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// FourRegFloatOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ fmadds(v29, v18, v0, v16); // fmadd s29, s18, s0, s16
|
|
|
|
|
- __ fmsubs(v23, v13, v29, v5); // fmsub s23, s13, s29, s5
|
|
|
|
|
- __ fnmadds(v9, v7, v10, v14); // fnmadd s9, s7, s10, s14
|
|
|
|
|
- __ fnmadds(v25, v28, v15, v23); // fnmadd s25, s28, s15, s23
|
|
|
|
|
- __ fmaddd(v6, v13, v21, v17); // fmadd d6, d13, d21, d17
|
|
|
|
|
- __ fmsubd(v3, v21, v2, v7); // fmsub d3, d21, d2, d7
|
|
|
|
|
- __ fnmaddd(v10, v25, v5, v17); // fnmadd d10, d25, d5, d17
|
|
|
|
|
- __ fnmaddd(v14, v14, v20, v18); // fnmadd d14, d14, d20, d18
|
|
|
|
|
+ __ fmadds(v16, v8, v11, v29); // fmadd s16, s8, s11, s29
|
|
|
|
|
+ __ fmsubs(v22, v19, v18, v1); // fmsub s22, s19, s18, s1
|
|
|
|
|
+ __ fnmadds(v15, v24, v24, v9); // fnmadd s15, s24, s24, s9
|
|
|
|
|
+ __ fnmadds(v27, v19, v13, v23); // fnmadd s27, s19, s13, s23
|
|
|
|
|
+ __ fmaddd(v3, v0, v16, v12); // fmadd d3, d0, d16, d12
|
|
|
|
|
+ __ fmsubd(v19, v3, v18, v10); // fmsub d19, d3, d18, d10
|
|
|
|
|
+ __ fnmaddd(v1, v2, v11, v20); // fnmadd d1, d2, d11, d20
|
|
|
|
|
+ __ fnmaddd(v12, v9, v25, v14); // fnmadd d12, d9, d25, d14
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// TwoRegFloatOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ fmovs(v15, v2); // fmov s15, s2
|
|
|
|
|
- __ fabss(v18, v7); // fabs s18, s7
|
|
|
|
|
- __ fnegs(v3, v6); // fneg s3, s6
|
|
|
|
|
- __ fsqrts(v12, v1); // fsqrt s12, s1
|
|
|
|
|
- __ fcvts(v9, v0); // fcvt d9, s0
|
|
|
|
|
- __ fmovd(v4, v5); // fmov d4, d5
|
|
|
|
|
- __ fabsd(v3, v15); // fabs d3, d15
|
|
|
|
|
- __ fnegd(v17, v25); // fneg d17, d25
|
|
|
|
|
- __ fsqrtd(v12, v24); // fsqrt d12, d24
|
|
|
|
|
- __ fcvtd(v21, v5); // fcvt s21, d5
|
|
|
|
|
+ __ fmovs(v0, v23); // fmov s0, s23
|
|
|
|
|
+ __ fabss(v23, v6); // fabs s23, s6
|
|
|
|
|
+ __ fnegs(v4, v4); // fneg s4, s4
|
|
|
|
|
+ __ fsqrts(v5, v28); // fsqrt s5, s28
|
|
|
|
|
+ __ fcvts(v21, v15); // fcvt d21, s15
|
|
|
|
|
+ __ fmovd(v11, v5); // fmov d11, d5
|
|
|
|
|
+ __ fabsd(v18, v4); // fabs d18, d4
|
|
|
|
|
+ __ fnegd(v11, v12); // fneg d11, d12
|
|
|
|
|
+ __ fsqrtd(v15, v8); // fsqrt d15, d8
|
|
|
|
|
+ __ fcvtd(v8, v2); // fcvt s8, d2
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// FloatConvertOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ fcvtzsw(r4, v21); // fcvtzs w4, s21
|
|
|
|
|
- __ fcvtzs(r27, v3); // fcvtzs x27, s3
|
|
|
|
|
- __ fcvtzdw(r29, v8); // fcvtzs w29, d8
|
|
|
|
|
- __ fcvtzd(r9, v21); // fcvtzs x9, d21
|
|
|
|
|
- __ scvtfws(v20, r29); // scvtf s20, w29
|
|
|
|
|
- __ scvtfs(v7, r8); // scvtf s7, x8
|
|
|
|
|
- __ scvtfwd(v12, r21); // scvtf d12, w21
|
|
|
|
|
- __ scvtfd(v16, r21); // scvtf d16, x21
|
|
|
|
|
- __ fmovs(r18, v5); // fmov w18, s5
|
|
|
|
|
- __ fmovd(r25, v8); // fmov x25, d8
|
|
|
|
|
- __ fmovs(v18, r26); // fmov s18, w26
|
|
|
|
|
- __ fmovd(v0, r11); // fmov d0, x11
|
|
|
|
|
+ __ fcvtzsw(r19, v18); // fcvtzs w19, s18
|
|
|
|
|
+ __ fcvtzs(r17, v0); // fcvtzs x17, s0
|
|
|
|
|
+ __ fcvtzdw(r0, v13); // fcvtzs w0, d13
|
|
|
|
|
+ __ fcvtzd(r22, v9); // fcvtzs x22, d9
|
|
|
|
|
+ __ scvtfws(v6, r29); // scvtf s6, w29
|
|
|
|
|
+ __ scvtfs(v12, r14); // scvtf s12, x14
|
|
|
|
|
+ __ scvtfwd(v16, r22); // scvtf d16, w22
|
|
|
|
|
+ __ scvtfd(v14, r5); // scvtf d14, x5
|
|
|
|
|
+ __ fmovs(r7, v0); // fmov w7, s0
|
|
|
|
|
+ __ fmovd(r28, v6); // fmov x28, d6
|
|
|
|
|
+ __ fmovs(v2, r26); // fmov s2, w26
|
|
|
|
|
+ __ fmovd(v4, r0); // fmov d4, x0
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// TwoRegFloatOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ fcmps(v16, v6); // fcmp s16, s6
|
|
|
|
|
- __ fcmpd(v16, v29); // fcmp d16, d29
|
|
|
|
|
- __ fcmps(v30, 0.0); // fcmp s30, #0.0
|
|
|
|
|
- __ fcmpd(v9, 0.0); // fcmp d9, #0.0
|
|
|
|
|
+ __ fcmps(v1, v11); // fcmp s1, s11
|
|
|
|
|
+ __ fcmpd(v6, v21); // fcmp d6, d21
|
|
|
|
|
+ __ fcmps(v16, 0.0); // fcmp s16, #0.0
|
|
|
|
|
+ __ fcmpd(v22, 0.0); // fcmp d22, #0.0
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStorePairOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ stpw(r27, r4, Address(r12, -16)); // stp w27, w4, [x12, #-16]
|
|
|
|
|
- __ ldpw(r3, r9, Address(r10, 80)); // ldp w3, w9, [x10, #80]
|
|
|
|
|
- __ ldpsw(r16, r3, Address(r3, 64)); // ldpsw x16, x3, [x3, #64]
|
|
|
|
|
- __ stp(r10, r28, Address(r19, -192)); // stp x10, x28, [x19, #-192]
|
|
|
|
|
- __ ldp(r19, r18, Address(r7, -192)); // ldp x19, x18, [x7, #-192]
|
|
|
|
|
+ __ stpw(r5, r0, Address(r2, 96)); // stp w5, w0, [x2, #96]
|
|
|
|
|
+ __ ldpw(r14, r29, Address(r19, -64)); // ldp w14, w29, [x19, #-64]
|
|
|
|
|
+ __ ldpsw(r15, r3, Address(r3, -160)); // ldpsw x15, x3, [x3, #-160]
|
|
|
|
|
+ __ stp(r7, r13, Address(r27, -224)); // stp x7, x13, [x27, #-224]
|
|
|
|
|
+ __ ldp(r17, r14, Address(r1, 128)); // ldp x17, x14, [x1, #128]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStorePairOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ stpw(r10, r16, Address(__ pre(r30, 16))); // stp w10, w16, [x30, #16]!
|
|
|
|
|
- __ ldpw(r2, r4, Address(__ pre(r18, -240))); // ldp w2, w4, [x18, #-240]!
|
|
|
|
|
- __ ldpsw(r24, r19, Address(__ pre(r13, 48))); // ldpsw x24, x19, [x13, #48]!
|
|
|
|
|
- __ stp(r17, r0, Address(__ pre(r24, 0))); // stp x17, x0, [x24, #0]!
|
|
|
|
|
- __ ldp(r14, r26, Address(__ pre(r3, -192))); // ldp x14, x26, [x3, #-192]!
|
|
|
|
|
+ __ stpw(r21, r22, Address(__ pre(r4, 128))); // stp w21, w22, [x4, #128]!
|
|
|
|
|
+ __ ldpw(r17, r13, Address(__ pre(r2, -96))); // ldp w17, w13, [x2, #-96]!
|
|
|
|
|
+ __ ldpsw(r21, r25, Address(__ pre(r23, -144))); // ldpsw x21, x25, [x23, #-144]!
|
|
|
|
|
+ __ stp(r4, r16, Address(__ pre(r15, -16))); // stp x4, x16, [x15, #-16]!
|
|
|
|
|
+ __ ldp(r29, r21, Address(__ pre(r25, -160))); // ldp x29, x21, [x25, #-160]!
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStorePairOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ stpw(r22, r1, Address(__ post(r0, 80))); // stp w22, w1, [x0], #80
|
|
|
|
|
- __ ldpw(r18, r10, Address(__ post(r0, -16))); // ldp w18, w10, [x0], #-16
|
|
|
|
|
- __ ldpsw(r24, r24, Address(__ post(r22, -16))); // ldpsw x24, x24, [x22], #-16
|
|
|
|
|
- __ stp(r12, r12, Address(__ post(r4, 80))); // stp x12, x12, [x4], #80
|
|
|
|
|
- __ ldp(r4, r9, Address(__ post(r19, -240))); // ldp x4, x9, [x19], #-240
|
|
|
|
|
+ __ stpw(r24, r17, Address(__ post(r26, 80))); // stp w24, w17, [x26], #80
|
|
|
|
|
+ __ ldpw(r3, r30, Address(__ post(r30, -240))); // ldp w3, w30, [x30], #-240
|
|
|
|
|
+ __ ldpsw(r3, r19, Address(__ post(r30, -32))); // ldpsw x3, x19, [x30], #-32
|
|
|
|
|
+ __ stp(r25, r1, Address(__ post(r27, -144))); // stp x25, x1, [x27], #-144
|
|
|
|
|
+ __ ldp(r26, r20, Address(__ post(r28, -64))); // ldp x26, x20, [x28], #-64
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// LoadStorePairOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ stnpw(r18, r26, Address(r6, -224)); // stnp w18, w26, [x6, #-224]
|
|
|
|
|
- __ ldnpw(r21, r20, Address(r1, 112)); // ldnp w21, w20, [x1, #112]
|
|
|
|
|
- __ stnp(r25, r29, Address(r20, -224)); // stnp x25, x29, [x20, #-224]
|
|
|
|
|
- __ ldnp(r1, r5, Address(r23, 112)); // ldnp x1, x5, [x23, #112]
|
|
|
|
|
+ __ stnpw(r29, r25, Address(r9, -48)); // stnp w29, w25, [x9, #-48]
|
|
|
|
|
+ __ ldnpw(r25, r14, Address(r19, -128)); // ldnp w25, w14, [x19, #-128]
|
|
|
|
|
+ __ stnp(r25, r22, Address(r3, 32)); // stnp x25, x22, [x3, #32]
|
|
|
|
|
+ __ ldnp(r9, r18, Address(r29, -208)); // ldnp x9, x18, [x29, #-208]
|
|
|
|
|
|
|
|
|
|
// LdStSIMDOp
|
|
|
|
|
- __ ld1(v4, __ T8B, Address(r20)); // ld1 {v4.8B}, [x20]
|
|
|
|
|
- __ ld1(v24, v25, __ T16B, Address(__ post(r10, 32))); // ld1 {v24.16B, v25.16B}, [x10], 32
|
|
|
|
|
- __ ld1(v24, v25, v26, __ T1D, Address(__ post(r6, r15))); // ld1 {v24.1D, v25.1D, v26.1D}, [x6], x15
|
|
|
|
|
- __ ld1(v3, v4, v5, v6, __ T8H, Address(__ post(r4, 64))); // ld1 {v3.8H, v4.8H, v5.8H, v6.8H}, [x4], 64
|
|
|
|
|
- __ ld1r(v2, __ T8B, Address(r6)); // ld1r {v2.8B}, [x6]
|
|
|
|
|
- __ ld1r(v13, __ T4S, Address(__ post(r14, 4))); // ld1r {v13.4S}, [x14], 4
|
|
|
|
|
- __ ld1r(v15, __ T1D, Address(__ post(r21, r24))); // ld1r {v15.1D}, [x21], x24
|
|
|
|
|
- __ ld2(v9, v10, __ T2D, Address(r21)); // ld2 {v9.2D, v10.2D}, [x21]
|
|
|
|
|
- __ ld2(v29, v30, __ T4H, Address(__ post(r21, 16))); // ld2 {v29.4H, v30.4H}, [x21], 16
|
|
|
|
|
- __ ld2r(v8, v9, __ T16B, Address(r14)); // ld2r {v8.16B, v9.16B}, [x14]
|
|
|
|
|
- __ ld2r(v7, v8, __ T2S, Address(__ post(r20, 8))); // ld2r {v7.2S, v8.2S}, [x20], 8
|
|
|
|
|
- __ ld2r(v28, v29, __ T2D, Address(__ post(r3, r3))); // ld2r {v28.2D, v29.2D}, [x3], x3
|
|
|
|
|
- __ ld3(v27, v28, v29, __ T4S, Address(__ post(r11, r29))); // ld3 {v27.4S, v28.4S, v29.4S}, [x11], x29
|
|
|
|
|
- __ ld3(v16, v17, v18, __ T2S, Address(r10)); // ld3 {v16.2S, v17.2S, v18.2S}, [x10]
|
|
|
|
|
- __ ld3r(v21, v22, v23, __ T8H, Address(r12)); // ld3r {v21.8H, v22.8H, v23.8H}, [x12]
|
|
|
|
|
- __ ld3r(v4, v5, v6, __ T4S, Address(__ post(r29, 12))); // ld3r {v4.4S, v5.4S, v6.4S}, [x29], 12
|
|
|
|
|
- __ ld3r(v24, v25, v26, __ T1D, Address(__ post(r9, r19))); // ld3r {v24.1D, v25.1D, v26.1D}, [x9], x19
|
|
|
|
|
- __ ld4(v10, v11, v12, v13, __ T8H, Address(__ post(r3, 64))); // ld4 {v10.8H, v11.8H, v12.8H, v13.8H}, [x3], 64
|
|
|
|
|
- __ ld4(v27, v28, v29, v30, __ T8B, Address(__ post(r28, r9))); // ld4 {v27.8B, v28.8B, v29.8B, v30.8B}, [x28], x9
|
|
|
|
|
- __ ld4r(v21, v22, v23, v24, __ T8B, Address(r30)); // ld4r {v21.8B, v22.8B, v23.8B, v24.8B}, [x30]
|
|
|
|
|
- __ ld4r(v23, v24, v25, v26, __ T4H, Address(__ post(r14, 8))); // ld4r {v23.4H, v24.4H, v25.4H, v26.4H}, [x14], 8
|
|
|
|
|
- __ ld4r(v4, v5, v6, v7, __ T2S, Address(__ post(r13, r20))); // ld4r {v4.2S, v5.2S, v6.2S, v7.2S}, [x13], x20
|
|
|
|
|
+ __ ld1(v21, __ T8B, Address(r19)); // ld1 {v21.8B}, [x19]
|
|
|
|
|
+ __ ld1(v27, v28, __ T16B, Address(__ post(r20, 32))); // ld1 {v27.16B, v28.16B}, [x20], 32
|
|
|
|
|
+ __ ld1(v5, v6, v7, __ T1D, Address(__ post(r22, r6))); // ld1 {v5.1D, v6.1D, v7.1D}, [x22], x6
|
|
|
|
|
+ __ ld1(v22, v23, v24, v25, __ T8H, Address(__ post(r12, 64))); // ld1 {v22.8H, v23.8H, v24.8H, v25.8H}, [x12], 64
|
|
|
|
|
+ __ ld1r(v17, __ T8B, Address(r9)); // ld1r {v17.8B}, [x9]
|
|
|
|
|
+ __ ld1r(v5, __ T4S, Address(__ post(r21, 4))); // ld1r {v5.4S}, [x21], 4
|
|
|
|
|
+ __ ld1r(v10, __ T1D, Address(__ post(r28, r18))); // ld1r {v10.1D}, [x28], x18
|
|
|
|
|
+ __ ld2(v26, v27, __ T2D, Address(r15)); // ld2 {v26.2D, v27.2D}, [x15]
|
|
|
|
|
+ __ ld2(v16, v17, __ T4H, Address(__ post(r26, 16))); // ld2 {v16.4H, v17.4H}, [x26], 16
|
|
|
|
|
+ __ ld2r(v14, v15, __ T16B, Address(r2)); // ld2r {v14.16B, v15.16B}, [x2]
|
|
|
|
|
+ __ ld2r(v18, v19, __ T2S, Address(__ post(r28, 8))); // ld2r {v18.2S, v19.2S}, [x28], 8
|
|
|
|
|
+ __ ld2r(v19, v20, __ T2D, Address(__ post(r0, r22))); // ld2r {v19.2D, v20.2D}, [x0], x22
|
|
|
|
|
+ __ ld3(v16, v17, v18, __ T4S, Address(__ post(r2, r18))); // ld3 {v16.4S, v17.4S, v18.4S}, [x2], x18
|
|
|
|
|
+ __ ld3(v24, v25, v26, __ T2S, Address(r0)); // ld3 {v24.2S, v25.2S, v26.2S}, [x0]
|
|
|
|
|
+ __ ld3r(v4, v5, v6, __ T8H, Address(r16)); // ld3r {v4.8H, v5.8H, v6.8H}, [x16]
|
|
|
|
|
+ __ ld3r(v5, v6, v7, __ T4S, Address(__ post(r1, 12))); // ld3r {v5.4S, v6.4S, v7.4S}, [x1], 12
|
|
|
|
|
+ __ ld3r(v7, v8, v9, __ T1D, Address(__ post(r10, r16))); // ld3r {v7.1D, v8.1D, v9.1D}, [x10], x16
|
|
|
|
|
+ __ ld4(v22, v23, v24, v25, __ T8H, Address(__ post(r20, 64))); // ld4 {v22.8H, v23.8H, v24.8H, v25.8H}, [x20], 64
|
|
|
|
|
+ __ ld4(v15, v16, v17, v18, __ T8B, Address(__ post(r4, r25))); // ld4 {v15.8B, v16.8B, v17.8B, v18.8B}, [x4], x25
|
|
|
|
|
+ __ ld4r(v0, v1, v2, v3, __ T8B, Address(r5)); // ld4r {v0.8B, v1.8B, v2.8B, v3.8B}, [x5]
|
|
|
|
|
+ __ ld4r(v0, v1, v2, v3, __ T4H, Address(__ post(r1, 8))); // ld4r {v0.4H, v1.4H, v2.4H, v3.4H}, [x1], 8
|
|
|
|
|
+ __ ld4r(v30, v31, v0, v1, __ T2S, Address(__ post(r28, r14))); // ld4r {v30.2S, v31.2S, v0.2S, v1.2S}, [x28], x14
|
|
|
|
|
|
|
|
|
|
// SpecialCases
|
|
|
|
|
- __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
|
|
|
|
|
- __ ccmnw(zr, zr, 5u, Assembler::EQ); // ccmn wzr, wzr, #5, EQ
|
|
|
|
|
- __ ccmp(zr, 1, 4u, Assembler::NE); // ccmp xzr, 1, #4, NE
|
|
|
|
|
- __ ccmpw(zr, 2, 2, Assembler::GT); // ccmp wzr, 2, #2, GT
|
|
|
|
|
- __ extr(zr, zr, zr, 0); // extr xzr, xzr, xzr, 0
|
|
|
|
|
- __ stlxp(r0, zr, zr, sp); // stlxp w0, xzr, xzr, [sp]
|
|
|
|
|
- __ stlxpw(r2, zr, zr, r3); // stlxp w2, wzr, wzr, [x3]
|
|
|
|
|
- __ stxp(r4, zr, zr, r5); // stxp w4, xzr, xzr, [x5]
|
|
|
|
|
- __ stxpw(r6, zr, zr, sp); // stxp w6, wzr, wzr, [sp]
|
|
|
|
|
- __ dup(v0, __ T16B, zr); // dup v0.16b, wzr
|
|
|
|
|
- __ mov(v1, __ T1D, 0, zr); // mov v1.d[0], xzr
|
|
|
|
|
- __ mov(v1, __ T2S, 1, zr); // mov v1.s[1], wzr
|
|
|
|
|
- __ mov(v1, __ T4H, 2, zr); // mov v1.h[2], wzr
|
|
|
|
|
- __ mov(v1, __ T8B, 3, zr); // mov v1.b[3], wzr
|
|
|
|
|
- __ ld1(v31, v0, __ T2D, Address(__ post(r1, r0))); // ld1 {v31.2d, v0.2d}, [x1], x0
|
|
|
|
|
+ __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
|
|
|
|
|
+ __ ccmnw(zr, zr, 5u, Assembler::EQ); // ccmn wzr, wzr, #5, EQ
|
|
|
|
|
+ __ ccmp(zr, 1, 4u, Assembler::NE); // ccmp xzr, 1, #4, NE
|
|
|
|
|
+ __ ccmpw(zr, 2, 2, Assembler::GT); // ccmp wzr, 2, #2, GT
|
|
|
|
|
+ __ extr(zr, zr, zr, 0); // extr xzr, xzr, xzr, 0
|
|
|
|
|
+ __ stlxp(r0, zr, zr, sp); // stlxp w0, xzr, xzr, [sp]
|
|
|
|
|
+ __ stlxpw(r2, zr, zr, r3); // stlxp w2, wzr, wzr, [x3]
|
|
|
|
|
+ __ stxp(r4, zr, zr, r5); // stxp w4, xzr, xzr, [x5]
|
|
|
|
|
+ __ stxpw(r6, zr, zr, sp); // stxp w6, wzr, wzr, [sp]
|
|
|
|
|
+ __ dup(v0, __ T16B, zr); // dup v0.16b, wzr
|
|
|
|
|
+ __ mov(v1, __ T1D, 0, zr); // mov v1.d[0], xzr
|
|
|
|
|
+ __ mov(v1, __ T2S, 1, zr); // mov v1.s[1], wzr
|
|
|
|
|
+ __ mov(v1, __ T4H, 2, zr); // mov v1.h[2], wzr
|
|
|
|
|
+ __ mov(v1, __ T8B, 3, zr); // mov v1.b[3], wzr
|
|
|
|
|
+ __ ld1(v31, v0, __ T2D, Address(__ post(r1, r0))); // ld1 {v31.2d, v0.2d}, [x1], x0
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// FloatImmediateOp
|
2021-08-13 14:54:30 +08:00
|
|
|
- __ fmovd(v0, 2.0); // fmov d0, #2.0
|
|
|
|
|
- __ fmovd(v0, 2.125); // fmov d0, #2.125
|
|
|
|
|
- __ fmovd(v0, 4.0); // fmov d0, #4.0
|
|
|
|
|
- __ fmovd(v0, 4.25); // fmov d0, #4.25
|
|
|
|
|
- __ fmovd(v0, 8.0); // fmov d0, #8.0
|
|
|
|
|
- __ fmovd(v0, 8.5); // fmov d0, #8.5
|
|
|
|
|
- __ fmovd(v0, 16.0); // fmov d0, #16.0
|
|
|
|
|
- __ fmovd(v0, 17.0); // fmov d0, #17.0
|
|
|
|
|
- __ fmovd(v0, 0.125); // fmov d0, #0.125
|
|
|
|
|
- __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125
|
|
|
|
|
- __ fmovd(v0, 0.25); // fmov d0, #0.25
|
|
|
|
|
- __ fmovd(v0, 0.265625); // fmov d0, #0.265625
|
|
|
|
|
- __ fmovd(v0, 0.5); // fmov d0, #0.5
|
|
|
|
|
- __ fmovd(v0, 0.53125); // fmov d0, #0.53125
|
|
|
|
|
- __ fmovd(v0, 1.0); // fmov d0, #1.0
|
|
|
|
|
- __ fmovd(v0, 1.0625); // fmov d0, #1.0625
|
|
|
|
|
- __ fmovd(v0, -2.0); // fmov d0, #-2.0
|
|
|
|
|
- __ fmovd(v0, -2.125); // fmov d0, #-2.125
|
|
|
|
|
- __ fmovd(v0, -4.0); // fmov d0, #-4.0
|
|
|
|
|
- __ fmovd(v0, -4.25); // fmov d0, #-4.25
|
|
|
|
|
- __ fmovd(v0, -8.0); // fmov d0, #-8.0
|
|
|
|
|
- __ fmovd(v0, -8.5); // fmov d0, #-8.5
|
|
|
|
|
- __ fmovd(v0, -16.0); // fmov d0, #-16.0
|
|
|
|
|
- __ fmovd(v0, -17.0); // fmov d0, #-17.0
|
|
|
|
|
- __ fmovd(v0, -0.125); // fmov d0, #-0.125
|
|
|
|
|
- __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125
|
|
|
|
|
- __ fmovd(v0, -0.25); // fmov d0, #-0.25
|
|
|
|
|
- __ fmovd(v0, -0.265625); // fmov d0, #-0.265625
|
|
|
|
|
- __ fmovd(v0, -0.5); // fmov d0, #-0.5
|
|
|
|
|
- __ fmovd(v0, -0.53125); // fmov d0, #-0.53125
|
|
|
|
|
- __ fmovd(v0, -1.0); // fmov d0, #-1.0
|
|
|
|
|
- __ fmovd(v0, -1.0625); // fmov d0, #-1.0625
|
2020-12-24 15:35:16 +08:00
|
|
|
+ __ fmovd(v0, 2.0); // fmov d0, #2.0
|
|
|
|
|
+ __ fmovd(v0, 2.125); // fmov d0, #2.125
|
|
|
|
|
+ __ fmovd(v0, 4.0); // fmov d0, #4.0
|
|
|
|
|
+ __ fmovd(v0, 4.25); // fmov d0, #4.25
|
|
|
|
|
+ __ fmovd(v0, 8.0); // fmov d0, #8.0
|
|
|
|
|
+ __ fmovd(v0, 8.5); // fmov d0, #8.5
|
|
|
|
|
+ __ fmovd(v0, 16.0); // fmov d0, #16.0
|
|
|
|
|
+ __ fmovd(v0, 17.0); // fmov d0, #17.0
|
|
|
|
|
+ __ fmovd(v0, 0.125); // fmov d0, #0.125
|
|
|
|
|
+ __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125
|
|
|
|
|
+ __ fmovd(v0, 0.25); // fmov d0, #0.25
|
|
|
|
|
+ __ fmovd(v0, 0.265625); // fmov d0, #0.265625
|
|
|
|
|
+ __ fmovd(v0, 0.5); // fmov d0, #0.5
|
|
|
|
|
+ __ fmovd(v0, 0.53125); // fmov d0, #0.53125
|
|
|
|
|
+ __ fmovd(v0, 1.0); // fmov d0, #1.0
|
|
|
|
|
+ __ fmovd(v0, 1.0625); // fmov d0, #1.0625
|
|
|
|
|
+ __ fmovd(v0, -2.0); // fmov d0, #-2.0
|
|
|
|
|
+ __ fmovd(v0, -2.125); // fmov d0, #-2.125
|
|
|
|
|
+ __ fmovd(v0, -4.0); // fmov d0, #-4.0
|
|
|
|
|
+ __ fmovd(v0, -4.25); // fmov d0, #-4.25
|
|
|
|
|
+ __ fmovd(v0, -8.0); // fmov d0, #-8.0
|
|
|
|
|
+ __ fmovd(v0, -8.5); // fmov d0, #-8.5
|
|
|
|
|
+ __ fmovd(v0, -16.0); // fmov d0, #-16.0
|
|
|
|
|
+ __ fmovd(v0, -17.0); // fmov d0, #-17.0
|
|
|
|
|
+ __ fmovd(v0, -0.125); // fmov d0, #-0.125
|
|
|
|
|
+ __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125
|
|
|
|
|
+ __ fmovd(v0, -0.25); // fmov d0, #-0.25
|
|
|
|
|
+ __ fmovd(v0, -0.265625); // fmov d0, #-0.265625
|
|
|
|
|
+ __ fmovd(v0, -0.5); // fmov d0, #-0.5
|
|
|
|
|
+ __ fmovd(v0, -0.53125); // fmov d0, #-0.53125
|
|
|
|
|
+ __ fmovd(v0, -1.0); // fmov d0, #-1.0
|
|
|
|
|
+ __ fmovd(v0, -1.0625); // fmov d0, #-1.0625
|
2021-08-13 14:54:30 +08:00
|
|
|
|
|
|
|
|
// LSEOp
|
|
|
|
|
- __ swp(Assembler::xword, r21, r5, r24); // swp x21, x5, [x24]
|
|
|
|
|
- __ ldadd(Assembler::xword, r13, r13, r15); // ldadd x13, x13, [x15]
|
|
|
|
|
- __ ldbic(Assembler::xword, r22, r19, r26); // ldclr x22, x19, [x26]
|
|
|
|
|
- __ ldeor(Assembler::xword, r25, r10, r26); // ldeor x25, x10, [x26]
|
|
|
|
|
- __ ldorr(Assembler::xword, r5, r27, r15); // ldset x5, x27, [x15]
|
|
|
|
|
- __ ldsmin(Assembler::xword, r19, r5, r11); // ldsmin x19, x5, [x11]
|
|
|
|
|
- __ ldsmax(Assembler::xword, r26, r0, r4); // ldsmax x26, x0, [x4]
|
|
|
|
|
- __ ldumin(Assembler::xword, r22, r23, r30); // ldumin x22, x23, [x30]
|
|
|
|
|
- __ ldumax(Assembler::xword, r18, r28, r8); // ldumax x18, x28, [x8]
|
|
|
|
|
+ __ swp(Assembler::xword, r26, r9, r17); // swp x26, x9, [x17]
|
|
|
|
|
+ __ ldadd(Assembler::xword, r28, r23, r2); // ldadd x28, x23, [x2]
|
|
|
|
|
+ __ ldbic(Assembler::xword, r22, r2, r3); // ldclr x22, x2, [x3]
|
|
|
|
|
+ __ ldeor(Assembler::xword, r11, r25, r30); // ldeor x11, x25, [x30]
|
|
|
|
|
+ __ ldorr(Assembler::xword, r22, r28, r4); // ldset x22, x28, [x4]
|
|
|
|
|
+ __ ldsmin(Assembler::xword, r6, r11, r24); // ldsmin x6, x11, [x24]
|
|
|
|
|
+ __ ldsmax(Assembler::xword, r12, zr, sp); // ldsmax x12, xzr, [sp]
|
|
|
|
|
+ __ ldumin(Assembler::xword, r23, r30, r9); // ldumin x23, x30, [x9]
|
|
|
|
|
+ __ ldumax(Assembler::xword, r6, r1, r20); // ldumax x6, x1, [x20]
|
|
|
|
|
|
|
|
|
|
// LSEOp
|
|
|
|
|
- __ swpa(Assembler::xword, r13, r29, r27); // swpa x13, x29, [x27]
|
|
|
|
|
- __ ldadda(Assembler::xword, r11, r5, r13); // ldadda x11, x5, [x13]
|
|
|
|
|
- __ ldbica(Assembler::xword, r1, r24, r21); // ldclra x1, x24, [x21]
|
|
|
|
|
- __ ldeora(Assembler::xword, r27, r17, r24); // ldeora x27, x17, [x24]
|
|
|
|
|
- __ ldorra(Assembler::xword, r18, r30, r5); // ldseta x18, x30, [x5]
|
|
|
|
|
- __ ldsmina(Assembler::xword, r7, r22, r25); // ldsmina x7, x22, [x25]
|
|
|
|
|
- __ ldsmaxa(Assembler::xword, r4, r26, r19); // ldsmaxa x4, x26, [x19]
|
|
|
|
|
- __ ldumina(Assembler::xword, r6, r30, r3); // ldumina x6, x30, [x3]
|
|
|
|
|
- __ ldumaxa(Assembler::xword, r24, r23, r5); // ldumaxa x24, x23, [x5]
|
|
|
|
|
+ __ swpa(Assembler::xword, r17, r2, r22); // swpa x17, x2, [x22]
|
|
|
|
|
+ __ ldadda(Assembler::xword, r14, r27, r10); // ldadda x14, x27, [x10]
|
|
|
|
|
+ __ ldbica(Assembler::xword, r6, r30, r19); // ldclra x6, x30, [x19]
|
|
|
|
|
+ __ ldeora(Assembler::xword, r0, r25, r11); // ldeora x0, x25, [x11]
|
|
|
|
|
+ __ ldorra(Assembler::xword, r23, r0, r30); // ldseta x23, x0, [x30]
|
|
|
|
|
+ __ ldsmina(Assembler::xword, r21, r3, r10); // ldsmina x21, x3, [x10]
|
|
|
|
|
+ __ ldsmaxa(Assembler::xword, r15, r22, r0); // ldsmaxa x15, x22, [x0]
|
|
|
|
|
+ __ ldumina(Assembler::xword, r17, r0, r20); // ldumina x17, x0, [x20]
|
|
|
|
|
+ __ ldumaxa(Assembler::xword, r16, r13, r1); // ldumaxa x16, x13, [x1]
|
|
|
|
|
|
|
|
|
|
// LSEOp
|
|
|
|
|
- __ swpal(Assembler::xword, r24, r18, r28); // swpal x24, x18, [x28]
|
|
|
|
|
- __ ldaddal(Assembler::xword, r19, zr, r7); // ldaddal x19, xzr, [x7]
|
|
|
|
|
- __ ldbical(Assembler::xword, r13, r6, r28); // ldclral x13, x6, [x28]
|
|
|
|
|
- __ ldeoral(Assembler::xword, r8, r15, r21); // ldeoral x8, x15, [x21]
|
|
|
|
|
- __ ldorral(Assembler::xword, r2, r13, r1); // ldsetal x2, x13, [x1]
|
|
|
|
|
- __ ldsminal(Assembler::xword, r17, r29, r25); // ldsminal x17, x29, [x25]
|
|
|
|
|
- __ ldsmaxal(Assembler::xword, r25, r18, r14); // ldsmaxal x25, x18, [x14]
|
|
|
|
|
- __ lduminal(Assembler::xword, zr, r6, r27); // lduminal xzr, x6, [x27]
|
|
|
|
|
- __ ldumaxal(Assembler::xword, r16, r5, r15); // ldumaxal x16, x5, [x15]
|
|
|
|
|
+ __ swpal(Assembler::xword, r27, r15, r23); // swpal x27, x15, [x23]
|
|
|
|
|
+ __ ldaddal(Assembler::xword, r19, r30, r1); // ldaddal x19, x30, [x1]
|
|
|
|
|
+ __ ldbical(Assembler::xword, r15, r28, r23); // ldclral x15, x28, [x23]
|
|
|
|
|
+ __ ldeoral(Assembler::xword, r7, r15, r19); // ldeoral x7, x15, [x19]
|
|
|
|
|
+ __ ldorral(Assembler::xword, r11, r12, r10); // ldsetal x11, x12, [x10]
|
|
|
|
|
+ __ ldsminal(Assembler::xword, r6, r7, r12); // ldsminal x6, x7, [x12]
|
|
|
|
|
+ __ ldsmaxal(Assembler::xword, r28, r5, r13); // ldsmaxal x28, x5, [x13]
|
|
|
|
|
+ __ lduminal(Assembler::xword, r9, r20, r17); // lduminal x9, x20, [x17]
|
|
|
|
|
+ __ ldumaxal(Assembler::xword, r21, r25, r11); // ldumaxal x21, x25, [x11]
|
|
|
|
|
|
|
|
|
|
// LSEOp
|
|
|
|
|
- __ swpl(Assembler::xword, r11, r18, r3); // swpl x11, x18, [x3]
|
|
|
|
|
- __ ldaddl(Assembler::xword, r26, r20, r2); // ldaddl x26, x20, [x2]
|
|
|
|
|
- __ ldbicl(Assembler::xword, r11, r4, r11); // ldclrl x11, x4, [x11]
|
|
|
|
|
- __ ldeorl(Assembler::xword, r30, r19, r23); // ldeorl x30, x19, [x23]
|
|
|
|
|
- __ ldorrl(Assembler::xword, r3, r15, r14); // ldsetl x3, x15, [x14]
|
|
|
|
|
- __ ldsminl(Assembler::xword, r30, r22, r20); // ldsminl x30, x22, [x20]
|
|
|
|
|
- __ ldsmaxl(Assembler::xword, r7, r5, r24); // ldsmaxl x7, x5, [x24]
|
|
|
|
|
- __ lduminl(Assembler::xword, r23, r16, r15); // lduminl x23, x16, [x15]
|
|
|
|
|
- __ ldumaxl(Assembler::xword, r11, r19, r0); // ldumaxl x11, x19, [x0]
|
|
|
|
|
+ __ swpl(Assembler::xword, r19, r24, r24); // swpl x19, x24, [x24]
|
|
|
|
|
+ __ ldaddl(Assembler::xword, r8, r26, r30); // ldaddl x8, x26, [x30]
|
|
|
|
|
+ __ ldbicl(Assembler::xword, r17, r18, r8); // ldclrl x17, x18, [x8]
|
|
|
|
|
+ __ ldeorl(Assembler::xword, r2, r3, r3); // ldeorl x2, x3, [x3]
|
|
|
|
|
+ __ ldorrl(Assembler::xword, r26, r7, r16); // ldsetl x26, x7, [x16]
|
|
|
|
|
+ __ ldsminl(Assembler::xword, r27, r6, r5); // ldsminl x27, x6, [x5]
|
|
|
|
|
+ __ ldsmaxl(Assembler::xword, r22, r0, r20); // ldsmaxl x22, x0, [x20]
|
|
|
|
|
+ __ lduminl(Assembler::xword, r11, r26, r2); // lduminl x11, x26, [x2]
|
|
|
|
|
+ __ ldumaxl(Assembler::xword, r30, r29, r4); // ldumaxl x30, x29, [x4]
|
|
|
|
|
|
|
|
|
|
// LSEOp
|
|
|
|
|
- __ swp(Assembler::word, r28, r28, r1); // swp w28, w28, [x1]
|
|
|
|
|
- __ ldadd(Assembler::word, r11, r21, r12); // ldadd w11, w21, [x12]
|
|
|
|
|
- __ ldbic(Assembler::word, r29, r0, r18); // ldclr w29, w0, [x18]
|
|
|
|
|
- __ ldeor(Assembler::word, r5, r0, r25); // ldeor w5, w0, [x25]
|
|
|
|
|
- __ ldorr(Assembler::word, r14, r0, r26); // ldset w14, w0, [x26]
|
|
|
|
|
- __ ldsmin(Assembler::word, r28, r18, r29); // ldsmin w28, w18, [x29]
|
|
|
|
|
- __ ldsmax(Assembler::word, r15, r1, r29); // ldsmax w15, w1, [x29]
|
|
|
|
|
- __ ldumin(Assembler::word, r8, r26, r28); // ldumin w8, w26, [x28]
|
|
|
|
|
- __ ldumax(Assembler::word, r17, r14, r4); // ldumax w17, w14, [x4]
|
|
|
|
|
+ __ swp(Assembler::word, r4, r5, r7); // swp w4, w5, [x7]
|
|
|
|
|
+ __ ldadd(Assembler::word, r10, r26, r2); // ldadd w10, w26, [x2]
|
|
|
|
|
+ __ ldbic(Assembler::word, r27, r16, r27); // ldclr w27, w16, [x27]
|
|
|
|
|
+ __ ldeor(Assembler::word, zr, r23, r10); // ldeor wzr, w23, [x10]
|
|
|
|
|
+ __ ldorr(Assembler::word, r4, r2, r13); // ldset w4, w2, [x13]
|
|
|
|
|
+ __ ldsmin(Assembler::word, r3, r15, r3); // ldsmin w3, w15, [x3]
|
|
|
|
|
+ __ ldsmax(Assembler::word, r3, r10, r6); // ldsmax w3, w10, [x6]
|
|
|
|
|
+ __ ldumin(Assembler::word, r8, r11, r10); // ldumin w8, w11, [x10]
|
|
|
|
|
+ __ ldumax(Assembler::word, r29, r30, r13); // ldumax w29, w30, [x13]
|
|
|
|
|
|
|
|
|
|
// LSEOp
|
|
|
|
|
- __ swpa(Assembler::word, r24, r25, r1); // swpa w24, w25, [x1]
|
|
|
|
|
- __ ldadda(Assembler::word, r10, r17, r17); // ldadda w10, w17, [x17]
|
|
|
|
|
- __ ldbica(Assembler::word, r29, r20, r21); // ldclra w29, w20, [x21]
|
|
|
|
|
- __ ldeora(Assembler::word, r29, r9, r12); // ldeora w29, w9, [x12]
|
|
|
|
|
- __ ldorra(Assembler::word, r11, r6, r5); // ldseta w11, w6, [x5]
|
|
|
|
|
- __ ldsmina(Assembler::word, r21, r7, r21); // ldsmina w21, w7, [x21]
|
|
|
|
|
- __ ldsmaxa(Assembler::word, r10, r23, r12); // ldsmaxa w10, w23, [x12]
|
|
|
|
|
- __ ldumina(Assembler::word, r21, r5, r10); // ldumina w21, w5, [x10]
|
|
|
|
|
- __ ldumaxa(Assembler::word, r30, r20, r18); // ldumaxa w30, w20, [x18]
|
|
|
|
|
+ __ swpa(Assembler::word, r11, r17, r20); // swpa w11, w17, [x20]
|
|
|
|
|
+ __ ldadda(Assembler::word, r26, r16, r6); // ldadda w26, w16, [x6]
|
|
|
|
|
+ __ ldbica(Assembler::word, r21, r10, r1); // ldclra w21, w10, [x1]
|
|
|
|
|
+ __ ldeora(Assembler::word, r29, r12, r23); // ldeora w29, w12, [x23]
|
|
|
|
|
+ __ ldorra(Assembler::word, r29, r8, r8); // ldseta w29, w8, [x8]
|
|
|
|
|
+ __ ldsmina(Assembler::word, r11, r10, r14); // ldsmina w11, w10, [x14]
|
|
|
|
|
+ __ ldsmaxa(Assembler::word, r4, r13, r22); // ldsmaxa w4, w13, [x22]
|
|
|
|
|
+ __ ldumina(Assembler::word, r7, r13, r7); // ldumina w7, w13, [x7]
|
|
|
|
|
+ __ ldumaxa(Assembler::word, r14, r0, sp); // ldumaxa w14, w0, [sp]
|
|
|
|
|
|
|
|
|
|
// LSEOp
|
|
|
|
|
- __ swpal(Assembler::word, r13, r23, r5); // swpal w13, w23, [x5]
|
|
|
|
|
- __ ldaddal(Assembler::word, r15, r24, r5); // ldaddal w15, w24, [x5]
|
|
|
|
|
- __ ldbical(Assembler::word, r9, r10, r25); // ldclral w9, w10, [x25]
|
|
|
|
|
- __ ldeoral(Assembler::word, r20, r17, r17); // ldeoral w20, w17, [x17]
|
|
|
|
|
- __ ldorral(Assembler::word, r12, r18, r30); // ldsetal w12, w18, [x30]
|
|
|
|
|
- __ ldsminal(Assembler::word, r3, r3, r25); // ldsminal w3, w3, [x25]
|
|
|
|
|
- __ ldsmaxal(Assembler::word, r26, r25, r10); // ldsmaxal w26, w25, [x10]
|
|
|
|
|
- __ lduminal(Assembler::word, r2, r11, sp); // lduminal w2, w11, [sp]
|
|
|
|
|
- __ ldumaxal(Assembler::word, r7, r2, r5); // ldumaxal w7, w2, [x5]
|
|
|
|
|
+ __ swpal(Assembler::word, r17, r2, r28); // swpal w17, w2, [x28]
|
|
|
|
|
+ __ ldaddal(Assembler::word, r19, r11, r10); // ldaddal w19, w11, [x10]
|
|
|
|
|
+ __ ldbical(Assembler::word, r12, r19, r20); // ldclral w12, w19, [x20]
|
|
|
|
|
+ __ ldeoral(Assembler::word, r0, r8, r8); // ldeoral w0, w8, [x8]
|
|
|
|
|
+ __ ldorral(Assembler::word, r17, r3, r24); // ldsetal w17, w3, [x24]
|
|
|
|
|
+ __ ldsminal(Assembler::word, r25, r5, r7); // ldsminal w25, w5, [x7]
|
|
|
|
|
+ __ ldsmaxal(Assembler::word, r16, r30, r9); // ldsmaxal w16, w30, [x9]
|
|
|
|
|
+ __ lduminal(Assembler::word, r10, zr, r14); // lduminal w10, wzr, [x14]
|
|
|
|
|
+ __ ldumaxal(Assembler::word, r17, r19, r11); // ldumaxal w17, w19, [x11]
|
|
|
|
|
|
|
|
|
|
// LSEOp
|
|
|
|
|
- __ swpl(Assembler::word, r0, r7, r20); // swpl w0, w7, [x20]
|
|
|
|
|
- __ ldaddl(Assembler::word, r5, zr, r2); // ldaddl w5, wzr, [x2]
|
|
|
|
|
- __ ldbicl(Assembler::word, r27, r25, r27); // ldclrl w27, w25, [x27]
|
|
|
|
|
- __ ldeorl(Assembler::word, r30, r24, r26); // ldeorl w30, w24, [x26]
|
|
|
|
|
- __ ldorrl(Assembler::word, r15, r2, r22); // ldsetl w15, w2, [x22]
|
|
|
|
|
- __ ldsminl(Assembler::word, r0, r3, sp); // ldsminl w0, w3, [sp]
|
|
|
|
|
- __ ldsmaxl(Assembler::word, r15, r20, r10); // ldsmaxl w15, w20, [x10]
|
|
|
|
|
- __ lduminl(Assembler::word, r22, r21, r14); // lduminl w22, w21, [x14]
|
|
|
|
|
- __ ldumaxl(Assembler::word, r6, r30, r2); // ldumaxl w6, w30, [x2]
|
|
|
|
|
+ __ swpl(Assembler::word, r20, r1, r13); // swpl w20, w1, [x13]
|
|
|
|
|
+ __ ldaddl(Assembler::word, r26, r11, r20); // ldaddl w26, w11, [x20]
|
|
|
|
|
+ __ ldbicl(Assembler::word, r18, r24, r30); // ldclrl w18, w24, [x30]
|
|
|
|
|
+ __ ldeorl(Assembler::word, r12, r25, r20); // ldeorl w12, w25, [x20]
|
|
|
|
|
+ __ ldorrl(Assembler::word, r14, r29, r5); // ldsetl w14, w29, [x5]
|
|
|
|
|
+ __ ldsminl(Assembler::word, r2, r26, r27); // ldsminl w2, w26, [x27]
|
|
|
|
|
+ __ ldsmaxl(Assembler::word, r25, r27, r11); // ldsmaxl w25, w27, [x11]
|
|
|
|
|
+ __ lduminl(Assembler::word, r4, r29, r7); // lduminl w4, w29, [x7]
|
|
|
|
|
+ __ ldumaxl(Assembler::word, r16, r29, r10); // ldumaxl w16, w29, [x10]
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
__ bind(forth);
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -762,567 +762,567 @@ aarch64ops.o: file format elf64-littleaarch64
|
2020-12-24 15:35:16 +08:00
|
|
|
Disassembly of section .text:
|
|
|
|
|
|
|
|
|
|
0000000000000000 <back>:
|
2021-08-13 14:54:30 +08:00
|
|
|
- 0: 8b50798f add x15, x12, x16, lsr #30
|
|
|
|
|
- 4: cb4381e1 sub x1, x15, x3, lsr #32
|
|
|
|
|
- 8: ab05372d adds x13, x25, x5, lsl #13
|
|
|
|
|
- c: eb864796 subs x22, x28, x6, asr #17
|
|
|
|
|
- 10: 0b961920 add w0, w9, w22, asr #6
|
|
|
|
|
- 14: 4b195473 sub w19, w3, w25, lsl #21
|
|
|
|
|
- 18: 2b0b5264 adds w4, w19, w11, lsl #20
|
|
|
|
|
- 1c: 6b9300f8 subs w24, w7, w19, asr #0
|
|
|
|
|
- 20: 8a0bc0fe and x30, x7, x11, lsl #48
|
|
|
|
|
- 24: aa0f3118 orr x24, x8, x15, lsl #12
|
|
|
|
|
- 28: ca170531 eor x17, x9, x23, lsl #1
|
|
|
|
|
- 2c: ea44dd6e ands x14, x11, x4, lsr #55
|
|
|
|
|
- 30: 0a4c44f3 and w19, w7, w12, lsr #17
|
|
|
|
|
- 34: 2a8b7373 orr w19, w27, w11, asr #28
|
|
|
|
|
- 38: 4a567c7e eor w30, w3, w22, lsr #31
|
|
|
|
|
- 3c: 6a9c0353 ands w19, w26, w28, asr #0
|
|
|
|
|
- 40: 8a3accdd bic x29, x6, x26, lsl #51
|
|
|
|
|
- 44: aa318f7a orn x26, x27, x17, lsl #35
|
|
|
|
|
- 48: ca2e1495 eon x21, x4, x14, lsl #5
|
|
|
|
|
- 4c: eaa015e2 bics x2, x15, x0, asr #5
|
|
|
|
|
- 50: 0a2274e2 bic w2, w7, w2, lsl #29
|
|
|
|
|
- 54: 2a751598 orn w24, w12, w21, lsr #5
|
|
|
|
|
- 58: 4a3309fe eon w30, w15, w19, lsl #2
|
|
|
|
|
- 5c: 6ab172fe bics w30, w23, w17, asr #28
|
|
|
|
|
- 60: 110a5284 add w4, w20, #0x294
|
|
|
|
|
- 64: 310b1942 adds w2, w10, #0x2c6
|
|
|
|
|
- 68: 5103d353 sub w19, w26, #0xf4
|
|
|
|
|
- 6c: 710125bc subs w28, w13, #0x49
|
|
|
|
|
- 70: 910d7bc2 add x2, x30, #0x35e
|
|
|
|
|
- 74: b108fa1b adds x27, x16, #0x23e
|
|
|
|
|
- 78: d1093536 sub x22, x9, #0x24d
|
|
|
|
|
- 7c: f10ae824 subs x4, x1, #0x2ba
|
|
|
|
|
- 80: 120e667c and w28, w19, #0xfffc0fff
|
|
|
|
|
- 84: 321f6cbb orr w27, w5, #0x1ffffffe
|
|
|
|
|
- 88: 520f6a9e eor w30, w20, #0xfffe0fff
|
|
|
|
|
- 8c: 72136f56 ands w22, w26, #0xffffe1ff
|
|
|
|
|
- 90: 927e4ce5 and x5, x7, #0x3ffffc
|
|
|
|
|
- 94: b278b4ed orr x13, x7, #0x3fffffffffff00
|
|
|
|
|
- 98: d24c6527 eor x7, x9, #0xfff0000000003fff
|
|
|
|
|
- 9c: f2485803 ands x3, x0, #0xff00000000007fff
|
|
|
|
|
- a0: 14000000 b a0 <back+0xa0>
|
|
|
|
|
- a4: 17ffffd7 b 0 <back>
|
|
|
|
|
- a8: 140001ee b 860 <forth>
|
|
|
|
|
- ac: 94000000 bl ac <back+0xac>
|
|
|
|
|
- b0: 97ffffd4 bl 0 <back>
|
|
|
|
|
- b4: 940001eb bl 860 <forth>
|
|
|
|
|
- b8: 34000010 cbz w16, b8 <back+0xb8>
|
|
|
|
|
- bc: 34fffa30 cbz w16, 0 <back>
|
|
|
|
|
- c0: 34003d10 cbz w16, 860 <forth>
|
|
|
|
|
- c4: 35000013 cbnz w19, c4 <back+0xc4>
|
|
|
|
|
- c8: 35fff9d3 cbnz w19, 0 <back>
|
|
|
|
|
- cc: 35003cb3 cbnz w19, 860 <forth>
|
|
|
|
|
- d0: b4000005 cbz x5, d0 <back+0xd0>
|
|
|
|
|
- d4: b4fff965 cbz x5, 0 <back>
|
|
|
|
|
- d8: b4003c45 cbz x5, 860 <forth>
|
|
|
|
|
- dc: b5000004 cbnz x4, dc <back+0xdc>
|
|
|
|
|
- e0: b5fff904 cbnz x4, 0 <back>
|
|
|
|
|
- e4: b5003be4 cbnz x4, 860 <forth>
|
|
|
|
|
- e8: 1000001b adr x27, e8 <back+0xe8>
|
|
|
|
|
- ec: 10fff8bb adr x27, 0 <back>
|
|
|
|
|
- f0: 10003b9b adr x27, 860 <forth>
|
|
|
|
|
- f4: 90000010 adrp x16, 0 <back>
|
|
|
|
|
- f8: 3640001c tbz w28, #8, f8 <back+0xf8>
|
|
|
|
|
- fc: 3647f83c tbz w28, #8, 0 <back>
|
|
|
|
|
- 100: 36403b1c tbz w28, #8, 860 <forth>
|
|
|
|
|
- 104: 37080001 tbnz w1, #1, 104 <back+0x104>
|
|
|
|
|
- 108: 370ff7c1 tbnz w1, #1, 0 <back>
|
|
|
|
|
- 10c: 37083aa1 tbnz w1, #1, 860 <forth>
|
|
|
|
|
- 110: 12a437f4 mov w20, #0xde40ffff // #-566165505
|
|
|
|
|
- 114: 528c9d67 mov w7, #0x64eb // #25835
|
|
|
|
|
- 118: 72838bb1 movk w17, #0x1c5d
|
|
|
|
|
- 11c: 92c1062e mov x14, #0xfffff7ceffffffff // #-9006546419713
|
|
|
|
|
- 120: d287da49 mov x9, #0x3ed2 // #16082
|
|
|
|
|
- 124: f2a6d153 movk x19, #0x368a, lsl #16
|
|
|
|
|
- 128: 93465ac9 sbfx x9, x22, #6, #17
|
|
|
|
|
- 12c: 330b0013 bfi w19, w0, #21, #1
|
|
|
|
|
- 130: 530b4e6a ubfx w10, w19, #11, #9
|
|
|
|
|
- 134: 934545e4 sbfx x4, x15, #5, #13
|
|
|
|
|
- 138: b35370a3 bfxil x3, x5, #19, #10
|
|
|
|
|
- 13c: d3510b8c ubfiz x12, x28, #47, #3
|
|
|
|
|
- 140: 13960c0f extr w15, w0, w22, #3
|
|
|
|
|
- 144: 93ceddc6 ror x6, x14, #55
|
|
|
|
|
- 148: 54000000 b.eq 148 <back+0x148> // b.none
|
|
|
|
|
- 14c: 54fff5a0 b.eq 0 <back> // b.none
|
|
|
|
|
- 150: 54003880 b.eq 860 <forth> // b.none
|
|
|
|
|
- 154: 54000001 b.ne 154 <back+0x154> // b.any
|
|
|
|
|
- 158: 54fff541 b.ne 0 <back> // b.any
|
|
|
|
|
- 15c: 54003821 b.ne 860 <forth> // b.any
|
|
|
|
|
- 160: 54000002 b.cs 160 <back+0x160> // b.hs, b.nlast
|
|
|
|
|
- 164: 54fff4e2 b.cs 0 <back> // b.hs, b.nlast
|
|
|
|
|
- 168: 540037c2 b.cs 860 <forth> // b.hs, b.nlast
|
|
|
|
|
- 16c: 54000002 b.cs 16c <back+0x16c> // b.hs, b.nlast
|
|
|
|
|
- 170: 54fff482 b.cs 0 <back> // b.hs, b.nlast
|
|
|
|
|
- 174: 54003762 b.cs 860 <forth> // b.hs, b.nlast
|
|
|
|
|
- 178: 54000003 b.cc 178 <back+0x178> // b.lo, b.ul, b.last
|
|
|
|
|
- 17c: 54fff423 b.cc 0 <back> // b.lo, b.ul, b.last
|
|
|
|
|
- 180: 54003703 b.cc 860 <forth> // b.lo, b.ul, b.last
|
|
|
|
|
- 184: 54000003 b.cc 184 <back+0x184> // b.lo, b.ul, b.last
|
|
|
|
|
- 188: 54fff3c3 b.cc 0 <back> // b.lo, b.ul, b.last
|
|
|
|
|
- 18c: 540036a3 b.cc 860 <forth> // b.lo, b.ul, b.last
|
|
|
|
|
- 190: 54000004 b.mi 190 <back+0x190> // b.first
|
|
|
|
|
- 194: 54fff364 b.mi 0 <back> // b.first
|
|
|
|
|
- 198: 54003644 b.mi 860 <forth> // b.first
|
|
|
|
|
- 19c: 54000005 b.pl 19c <back+0x19c> // b.nfrst
|
|
|
|
|
- 1a0: 54fff305 b.pl 0 <back> // b.nfrst
|
|
|
|
|
- 1a4: 540035e5 b.pl 860 <forth> // b.nfrst
|
|
|
|
|
- 1a8: 54000006 b.vs 1a8 <back+0x1a8>
|
|
|
|
|
- 1ac: 54fff2a6 b.vs 0 <back>
|
|
|
|
|
- 1b0: 54003586 b.vs 860 <forth>
|
|
|
|
|
- 1b4: 54000007 b.vc 1b4 <back+0x1b4>
|
|
|
|
|
- 1b8: 54fff247 b.vc 0 <back>
|
|
|
|
|
- 1bc: 54003527 b.vc 860 <forth>
|
|
|
|
|
- 1c0: 54000008 b.hi 1c0 <back+0x1c0> // b.pmore
|
|
|
|
|
- 1c4: 54fff1e8 b.hi 0 <back> // b.pmore
|
|
|
|
|
- 1c8: 540034c8 b.hi 860 <forth> // b.pmore
|
|
|
|
|
- 1cc: 54000009 b.ls 1cc <back+0x1cc> // b.plast
|
|
|
|
|
- 1d0: 54fff189 b.ls 0 <back> // b.plast
|
|
|
|
|
- 1d4: 54003469 b.ls 860 <forth> // b.plast
|
|
|
|
|
- 1d8: 5400000a b.ge 1d8 <back+0x1d8> // b.tcont
|
|
|
|
|
- 1dc: 54fff12a b.ge 0 <back> // b.tcont
|
|
|
|
|
- 1e0: 5400340a b.ge 860 <forth> // b.tcont
|
|
|
|
|
- 1e4: 5400000b b.lt 1e4 <back+0x1e4> // b.tstop
|
|
|
|
|
- 1e8: 54fff0cb b.lt 0 <back> // b.tstop
|
|
|
|
|
- 1ec: 540033ab b.lt 860 <forth> // b.tstop
|
|
|
|
|
- 1f0: 5400000c b.gt 1f0 <back+0x1f0>
|
|
|
|
|
- 1f4: 54fff06c b.gt 0 <back>
|
|
|
|
|
- 1f8: 5400334c b.gt 860 <forth>
|
|
|
|
|
- 1fc: 5400000d b.le 1fc <back+0x1fc>
|
|
|
|
|
- 200: 54fff00d b.le 0 <back>
|
|
|
|
|
- 204: 540032ed b.le 860 <forth>
|
|
|
|
|
- 208: 5400000e b.al 208 <back+0x208>
|
|
|
|
|
- 20c: 54ffefae b.al 0 <back>
|
|
|
|
|
- 210: 5400328e b.al 860 <forth>
|
|
|
|
|
- 214: 5400000f b.nv 214 <back+0x214>
|
|
|
|
|
- 218: 54ffef4f b.nv 0 <back>
|
|
|
|
|
- 21c: 5400322f b.nv 860 <forth>
|
|
|
|
|
- 220: d40ac601 svc #0x5630
|
|
|
|
|
- 224: d40042a2 hvc #0x215
|
|
|
|
|
- 228: d404dac3 smc #0x26d6
|
|
|
|
|
- 22c: d4224d40 brk #0x126a
|
|
|
|
|
- 230: d44219c0 hlt #0x10ce
|
|
|
|
|
- 234: d503201f nop
|
|
|
|
|
- 238: d69f03e0 eret
|
|
|
|
|
- 23c: d6bf03e0 drps
|
|
|
|
|
- 240: d5033fdf isb
|
|
|
|
|
- 244: d503339f dsb osh
|
|
|
|
|
- 248: d50335bf dmb nshld
|
|
|
|
|
- 24c: d61f0280 br x20
|
|
|
|
|
- 250: d63f0040 blr x2
|
|
|
|
|
- 254: c8127c17 stxr w18, x23, [x0]
|
|
|
|
|
- 258: c81efec5 stlxr w30, x5, [x22]
|
|
|
|
|
- 25c: c85f7d05 ldxr x5, [x8]
|
|
|
|
|
- 260: c85ffe14 ldaxr x20, [x16]
|
|
|
|
|
- 264: c89ffd66 stlr x6, [x11]
|
|
|
|
|
- 268: c8dfff66 ldar x6, [x27]
|
|
|
|
|
- 26c: 880a7cb1 stxr w10, w17, [x5]
|
|
|
|
|
- 270: 8816fd89 stlxr w22, w9, [x12]
|
|
|
|
|
- 274: 885f7d1b ldxr w27, [x8]
|
|
|
|
|
- 278: 885ffc57 ldaxr w23, [x2]
|
|
|
|
|
- 27c: 889fffba stlr w26, [x29]
|
|
|
|
|
- 280: 88dffd4d ldar w13, [x10]
|
|
|
|
|
- 284: 48197f7c stxrh w25, w28, [x27]
|
|
|
|
|
- 288: 481dfd96 stlxrh w29, w22, [x12]
|
|
|
|
|
- 28c: 485f7f96 ldxrh w22, [x28]
|
|
|
|
|
- 290: 485fffc3 ldaxrh w3, [x30]
|
|
|
|
|
- 294: 489ffdf8 stlrh w24, [x15]
|
|
|
|
|
- 298: 48dfff5b ldarh w27, [x26]
|
|
|
|
|
- 29c: 080b7e6a stxrb w11, w10, [x19]
|
|
|
|
|
- 2a0: 0817fedb stlxrb w23, w27, [x22]
|
|
|
|
|
- 2a4: 085f7e18 ldxrb w24, [x16]
|
|
|
|
|
- 2a8: 085ffc38 ldaxrb w24, [x1]
|
|
|
|
|
- 2ac: 089fffa5 stlrb w5, [x29]
|
|
|
|
|
- 2b0: 08dffe18 ldarb w24, [x16]
|
|
|
|
|
- 2b4: c87f6239 ldxp x25, x24, [x17]
|
|
|
|
|
- 2b8: c87fb276 ldaxp x22, x12, [x19]
|
|
|
|
|
- 2bc: c820573a stxp w0, x26, x21, [x25]
|
|
|
|
|
- 2c0: c821aca6 stlxp w1, x6, x11, [x5]
|
|
|
|
|
- 2c4: 887f388d ldxp w13, w14, [x4]
|
|
|
|
|
- 2c8: 887f88d1 ldaxp w17, w2, [x6]
|
|
|
|
|
- 2cc: 882f2643 stxp w15, w3, w9, [x18]
|
|
|
|
|
- 2d0: 88329131 stlxp w18, w17, w4, [x9]
|
|
|
|
|
- 2d4: f81cf2b7 stur x23, [x21, #-49]
|
|
|
|
|
- 2d8: b803f055 stur w21, [x2, #63]
|
|
|
|
|
- 2dc: 39002f9b strb w27, [x28, #11]
|
|
|
|
|
- 2e0: 781f31fd sturh w29, [x15, #-13]
|
|
|
|
|
- 2e4: f85d33ce ldur x14, [x30, #-45]
|
|
|
|
|
- 2e8: b843539d ldur w29, [x28, #53]
|
|
|
|
|
- 2ec: 39401f54 ldrb w20, [x26, #7]
|
|
|
|
|
- 2f0: 785ce059 ldurh w25, [x2, #-50]
|
|
|
|
|
- 2f4: 389f1143 ldursb x3, [x10, #-15]
|
|
|
|
|
- 2f8: 788131ee ldursh x14, [x15, #19]
|
|
|
|
|
- 2fc: 78dfb17d ldursh w29, [x11, #-5]
|
|
|
|
|
- 300: b89b90af ldursw x15, [x5, #-71]
|
|
|
|
|
- 304: fc403193 ldur d19, [x12, #3]
|
|
|
|
|
- 308: bc42a36c ldur s12, [x27, #42]
|
|
|
|
|
- 30c: fc07d396 stur d22, [x28, #125]
|
|
|
|
|
- 310: bc1ec1f8 stur s24, [x15, #-20]
|
|
|
|
|
- 314: f81e8f88 str x8, [x28, #-24]!
|
|
|
|
|
- 318: b8025de6 str w6, [x15, #37]!
|
|
|
|
|
- 31c: 38007c27 strb w7, [x1, #7]!
|
|
|
|
|
- 320: 7801ee20 strh w0, [x17, #30]!
|
|
|
|
|
- 324: f8454fb9 ldr x25, [x29, #84]!
|
|
|
|
|
- 328: b85cce9a ldr w26, [x20, #-52]!
|
|
|
|
|
- 32c: 385e7fba ldrb w26, [x29, #-25]!
|
|
|
|
|
- 330: 7841af24 ldrh w4, [x25, #26]!
|
|
|
|
|
- 334: 389ebd1c ldrsb x28, [x8, #-21]!
|
|
|
|
|
- 338: 789fadd1 ldrsh x17, [x14, #-6]!
|
|
|
|
|
- 33c: 78c0aefc ldrsh w28, [x23, #10]!
|
|
|
|
|
- 340: b89c0f7e ldrsw x30, [x27, #-64]!
|
|
|
|
|
- 344: fc50efd4 ldr d20, [x30, #-242]!
|
|
|
|
|
- 348: bc414f71 ldr s17, [x27, #20]!
|
|
|
|
|
- 34c: fc011c67 str d7, [x3, #17]!
|
|
|
|
|
- 350: bc1f0d6d str s13, [x11, #-16]!
|
|
|
|
|
- 354: f81c3526 str x6, [x9], #-61
|
|
|
|
|
- 358: b81e34b0 str w16, [x5], #-29
|
|
|
|
|
- 35c: 3800f7bd strb w29, [x29], #15
|
|
|
|
|
- 360: 78012684 strh w4, [x20], #18
|
|
|
|
|
- 364: f842e653 ldr x19, [x18], #46
|
|
|
|
|
- 368: b8417456 ldr w22, [x2], #23
|
|
|
|
|
- 36c: 385e2467 ldrb w7, [x3], #-30
|
|
|
|
|
- 370: 785e358b ldrh w11, [x12], #-29
|
|
|
|
|
- 374: 389e34c8 ldrsb x8, [x6], #-29
|
|
|
|
|
- 378: 788046f8 ldrsh x24, [x23], #4
|
|
|
|
|
- 37c: 78c00611 ldrsh w17, [x16], #0
|
|
|
|
|
- 380: b89f8680 ldrsw x0, [x20], #-8
|
|
|
|
|
- 384: fc582454 ldr d20, [x2], #-126
|
|
|
|
|
- 388: bc5987d3 ldr s19, [x30], #-104
|
|
|
|
|
- 38c: fc076624 str d4, [x17], #118
|
|
|
|
|
- 390: bc190675 str s21, [x19], #-112
|
|
|
|
|
- 394: f833785a str x26, [x2, x19, lsl #3]
|
|
|
|
|
- 398: b82fd809 str w9, [x0, w15, sxtw #2]
|
|
|
|
|
- 39c: 3821799a strb w26, [x12, x1, lsl #0]
|
|
|
|
|
- 3a0: 782a7975 strh w21, [x11, x10, lsl #1]
|
|
|
|
|
- 3a4: f870eaf0 ldr x16, [x23, x16, sxtx]
|
|
|
|
|
- 3a8: b871d96a ldr w10, [x11, w17, sxtw #2]
|
|
|
|
|
- 3ac: 386b7aed ldrb w13, [x23, x11, lsl #0]
|
|
|
|
|
- 3b0: 7875689b ldrh w27, [x4, x21]
|
|
|
|
|
- 3b4: 38afd91a ldrsb x26, [x8, w15, sxtw #0]
|
|
|
|
|
- 3b8: 78a2c955 ldrsh x21, [x10, w2, sxtw]
|
|
|
|
|
- 3bc: 78ee6bc8 ldrsh w8, [x30, x14]
|
|
|
|
|
- 3c0: b8b4f9dd ldrsw x29, [x14, x20, sxtx #2]
|
|
|
|
|
- 3c4: fc76eb7e ldr d30, [x27, x22, sxtx]
|
|
|
|
|
- 3c8: bc76692d ldr s13, [x9, x22]
|
|
|
|
|
- 3cc: fc31db28 str d8, [x25, w17, sxtw #3]
|
|
|
|
|
- 3d0: bc255b01 str s1, [x24, w5, uxtw #2]
|
|
|
|
|
- 3d4: f91c52aa str x10, [x21, #14496]
|
|
|
|
|
- 3d8: b91c3fb2 str w18, [x29, #7228]
|
|
|
|
|
- 3dc: 391f8877 strb w23, [x3, #2018]
|
|
|
|
|
- 3e0: 791ac97c strh w28, [x11, #3428]
|
|
|
|
|
- 3e4: f95c1758 ldr x24, [x26, #14376]
|
|
|
|
|
- 3e8: b95b3c55 ldr w21, [x2, #6972]
|
|
|
|
|
- 3ec: 395ce0a4 ldrb w4, [x5, #1848]
|
|
|
|
|
- 3f0: 795851ce ldrh w14, [x14, #3112]
|
|
|
|
|
- 3f4: 399e9f64 ldrsb x4, [x27, #1959]
|
|
|
|
|
- 3f8: 79993764 ldrsh x4, [x27, #3226]
|
|
|
|
|
- 3fc: 79d9af8a ldrsh w10, [x28, #3286]
|
|
|
|
|
- 400: b99eea2a ldrsw x10, [x17, #7912]
|
|
|
|
|
- 404: fd5a2f8d ldr d13, [x28, #13400]
|
|
|
|
|
- 408: bd5dac78 ldr s24, [x3, #7596]
|
|
|
|
|
- 40c: fd1e0182 str d2, [x12, #15360]
|
|
|
|
|
- 410: bd195c31 str s17, [x1, #6492]
|
|
|
|
|
- 414: 58000010 ldr x16, 414 <back+0x414>
|
|
|
|
|
- 418: 1800000d ldr w13, 418 <back+0x418>
|
|
|
|
|
- 41c: f8981240 prfum pldl1keep, [x18, #-127]
|
|
|
|
|
- 420: d8ffdf00 prfm pldl1keep, 0 <back>
|
|
|
|
|
- 424: f8a27a80 prfm pldl1keep, [x20, x2, lsl #3]
|
|
|
|
|
- 428: f99af920 prfm pldl1keep, [x9, #13808]
|
|
|
|
|
- 42c: 1a0202e8 adc w8, w23, w2
|
|
|
|
|
- 430: 3a130078 adcs w24, w3, w19
|
|
|
|
|
- 434: 5a1d0316 sbc w22, w24, w29
|
|
|
|
|
- 438: 7a03036c sbcs w12, w27, w3
|
|
|
|
|
- 43c: 9a0102eb adc x11, x23, x1
|
|
|
|
|
- 440: ba1700bd adcs x29, x5, x23
|
|
|
|
|
- 444: da0c0329 sbc x9, x25, x12
|
|
|
|
|
- 448: fa16000c sbcs x12, x0, x22
|
|
|
|
|
- 44c: 0b23459a add w26, w12, w3, uxtw #1
|
|
|
|
|
- 450: 2b328a14 adds w20, w16, w18, sxtb #2
|
|
|
|
|
- 454: cb274bde sub x30, x30, w7, uxtw #2
|
|
|
|
|
- 458: 6b222eab subs w11, w21, w2, uxth #3
|
|
|
|
|
- 45c: 8b214b42 add x2, x26, w1, uxtw #2
|
|
|
|
|
- 460: ab34a7b2 adds x18, x29, w20, sxth #1
|
|
|
|
|
- 464: cb24520e sub x14, x16, w4, uxtw #4
|
|
|
|
|
- 468: eb378e20 subs x0, x17, w23, sxtb #3
|
|
|
|
|
- 46c: 3a565283 ccmn w20, w22, #0x3, pl // pl = nfrst
|
|
|
|
|
- 470: 7a420321 ccmp w25, w2, #0x1, eq // eq = none
|
|
|
|
|
- 474: ba58c247 ccmn x18, x24, #0x7, gt
|
|
|
|
|
- 478: fa4d5106 ccmp x8, x13, #0x6, pl // pl = nfrst
|
|
|
|
|
- 47c: 3a426924 ccmn w9, #0x2, #0x4, vs
|
|
|
|
|
- 480: 7a5b0847 ccmp w2, #0x1b, #0x7, eq // eq = none
|
|
|
|
|
- 484: ba413a02 ccmn x16, #0x1, #0x2, cc // cc = lo, ul, last
|
|
|
|
|
- 488: fa5fba23 ccmp x17, #0x1f, #0x3, lt // lt = tstop
|
|
|
|
|
- 48c: 1a979377 csel w23, w27, w23, ls // ls = plast
|
|
|
|
|
- 490: 1a86640a csinc w10, w0, w6, vs
|
|
|
|
|
- 494: 5a89300b csinv w11, w0, w9, cc // cc = lo, ul, last
|
|
|
|
|
- 498: 5a923771 csneg w17, w27, w18, cc // cc = lo, ul, last
|
|
|
|
|
- 49c: 9a8b720c csel x12, x16, x11, vc
|
|
|
|
|
- 4a0: 9a868786 csinc x6, x28, x6, hi // hi = pmore
|
|
|
|
|
- 4a4: da9a736d csinv x13, x27, x26, vc
|
|
|
|
|
- 4a8: da9256dd csneg x29, x22, x18, pl // pl = nfrst
|
|
|
|
|
- 4ac: 5ac0026c rbit w12, w19
|
|
|
|
|
- 4b0: 5ac00657 rev16 w23, w18
|
|
|
|
|
- 4b4: 5ac00b89 rev w9, w28
|
|
|
|
|
- 4b8: 5ac01262 clz w2, w19
|
|
|
|
|
- 4bc: 5ac017b9 cls w25, w29
|
|
|
|
|
- 4c0: dac002e4 rbit x4, x23
|
|
|
|
|
- 4c4: dac0065d rev16 x29, x18
|
|
|
|
|
- 4c8: dac00907 rev32 x7, x8
|
|
|
|
|
- 4cc: dac00e2d rev x13, x17
|
|
|
|
|
- 4d0: dac01011 clz x17, x0
|
|
|
|
|
- 4d4: dac01752 cls x18, x26
|
|
|
|
|
- 4d8: 1ad0098b udiv w11, w12, w16
|
|
|
|
|
- 4dc: 1ac70d24 sdiv w4, w9, w7
|
|
|
|
|
- 4e0: 1ad020ec lsl w12, w7, w16
|
|
|
|
|
- 4e4: 1ad72613 lsr w19, w16, w23
|
|
|
|
|
- 4e8: 1ac62887 asr w7, w4, w6
|
|
|
|
|
- 4ec: 1ad72e95 ror w21, w20, w23
|
|
|
|
|
- 4f0: 9adc0990 udiv x16, x12, x28
|
|
|
|
|
- 4f4: 9acd0d84 sdiv x4, x12, x13
|
|
|
|
|
- 4f8: 9ac721a9 lsl x9, x13, x7
|
|
|
|
|
- 4fc: 9acf277c lsr x28, x27, x15
|
|
|
|
|
- 500: 9ace2bd4 asr x20, x30, x14
|
|
|
|
|
- 504: 9ade2e4e ror x14, x18, x30
|
|
|
|
|
- 508: 9bc77d63 umulh x3, x11, x7
|
|
|
|
|
- 50c: 9b587e97 smulh x23, x20, x24
|
|
|
|
|
- 510: 1b1524a2 madd w2, w5, w21, w9
|
|
|
|
|
- 514: 1b04a318 msub w24, w24, w4, w8
|
|
|
|
|
- 518: 9b0f4d8b madd x11, x12, x15, x19
|
|
|
|
|
- 51c: 9b0ce73d msub x29, x25, x12, x25
|
|
|
|
|
- 520: 9b2c5971 smaddl x17, w11, w12, x22
|
|
|
|
|
- 524: 9b34c87c smsubl x28, w3, w20, x18
|
|
|
|
|
- 528: 9bbc6887 umaddl x7, w4, w28, x26
|
|
|
|
|
- 52c: 9bb19556 umsubl x22, w10, w17, x5
|
|
|
|
|
- 530: 1e310871 fmul s17, s3, s17
|
|
|
|
|
- 534: 1e261a2b fdiv s11, s17, s6
|
|
|
|
|
- 538: 1e2928fd fadd s29, s7, s9
|
|
|
|
|
- 53c: 1e333987 fsub s7, s12, s19
|
|
|
|
|
- 540: 1e230ae0 fmul s0, s23, s3
|
|
|
|
|
- 544: 1e75087a fmul d26, d3, d21
|
|
|
|
|
- 548: 1e651a60 fdiv d0, d19, d5
|
|
|
|
|
- 54c: 1e692b40 fadd d0, d26, d9
|
|
|
|
|
- 550: 1e753ab9 fsub d25, d21, d21
|
|
|
|
|
- 554: 1e7309b0 fmul d16, d13, d19
|
|
|
|
|
- 558: 1f00425d fmadd s29, s18, s0, s16
|
|
|
|
|
- 55c: 1f1d95b7 fmsub s23, s13, s29, s5
|
|
|
|
|
- 560: 1f2a38e9 fnmadd s9, s7, s10, s14
|
|
|
|
|
- 564: 1f2f5f99 fnmadd s25, s28, s15, s23
|
|
|
|
|
- 568: 1f5545a6 fmadd d6, d13, d21, d17
|
|
|
|
|
- 56c: 1f429ea3 fmsub d3, d21, d2, d7
|
|
|
|
|
- 570: 1f65472a fnmadd d10, d25, d5, d17
|
|
|
|
|
- 574: 1f7449ce fnmadd d14, d14, d20, d18
|
|
|
|
|
- 578: 1e20404f fmov s15, s2
|
|
|
|
|
- 57c: 1e20c0f2 fabs s18, s7
|
|
|
|
|
- 580: 1e2140c3 fneg s3, s6
|
|
|
|
|
- 584: 1e21c02c fsqrt s12, s1
|
|
|
|
|
- 588: 1e22c009 fcvt d9, s0
|
|
|
|
|
- 58c: 1e6040a4 fmov d4, d5
|
|
|
|
|
- 590: 1e60c1e3 fabs d3, d15
|
|
|
|
|
- 594: 1e614331 fneg d17, d25
|
|
|
|
|
- 598: 1e61c30c fsqrt d12, d24
|
|
|
|
|
- 59c: 1e6240b5 fcvt s21, d5
|
|
|
|
|
- 5a0: 1e3802a4 fcvtzs w4, s21
|
|
|
|
|
- 5a4: 9e38007b fcvtzs x27, s3
|
|
|
|
|
- 5a8: 1e78011d fcvtzs w29, d8
|
|
|
|
|
- 5ac: 9e7802a9 fcvtzs x9, d21
|
|
|
|
|
- 5b0: 1e2203b4 scvtf s20, w29
|
|
|
|
|
- 5b4: 9e220107 scvtf s7, x8
|
|
|
|
|
- 5b8: 1e6202ac scvtf d12, w21
|
|
|
|
|
- 5bc: 9e6202b0 scvtf d16, x21
|
|
|
|
|
- 5c0: 1e2600b2 fmov w18, s5
|
|
|
|
|
- 5c4: 9e660119 fmov x25, d8
|
|
|
|
|
- 5c8: 1e270352 fmov s18, w26
|
|
|
|
|
- 5cc: 9e670160 fmov d0, x11
|
|
|
|
|
- 5d0: 1e262200 fcmp s16, s6
|
|
|
|
|
- 5d4: 1e7d2200 fcmp d16, d29
|
|
|
|
|
- 5d8: 1e2023c8 fcmp s30, #0.0
|
|
|
|
|
- 5dc: 1e602128 fcmp d9, #0.0
|
|
|
|
|
- 5e0: 293e119b stp w27, w4, [x12, #-16]
|
|
|
|
|
- 5e4: 294a2543 ldp w3, w9, [x10, #80]
|
|
|
|
|
- 5e8: 69480c70 ldpsw x16, x3, [x3, #64]
|
|
|
|
|
- 5ec: a934726a stp x10, x28, [x19, #-192]
|
|
|
|
|
- 5f0: a97448f3 ldp x19, x18, [x7, #-192]
|
|
|
|
|
- 5f4: 298243ca stp w10, w16, [x30, #16]!
|
|
|
|
|
- 5f8: 29e21242 ldp w2, w4, [x18, #-240]!
|
|
|
|
|
- 5fc: 69c64db8 ldpsw x24, x19, [x13, #48]!
|
|
|
|
|
- 600: a9800311 stp x17, x0, [x24, #0]!
|
|
|
|
|
- 604: a9f4686e ldp x14, x26, [x3, #-192]!
|
|
|
|
|
- 608: 288a0416 stp w22, w1, [x0], #80
|
|
|
|
|
- 60c: 28fe2812 ldp w18, w10, [x0], #-16
|
|
|
|
|
- 610: 68fe62d8 .inst 0x68fe62d8 ; undefined
|
|
|
|
|
- 614: a885308c stp x12, x12, [x4], #80
|
|
|
|
|
- 618: a8f12664 ldp x4, x9, [x19], #-240
|
|
|
|
|
- 61c: 282468d2 stnp w18, w26, [x6, #-224]
|
|
|
|
|
- 620: 284e5035 ldnp w21, w20, [x1, #112]
|
|
|
|
|
- 624: a8327699 stnp x25, x29, [x20, #-224]
|
|
|
|
|
- 628: a84716e1 ldnp x1, x5, [x23, #112]
|
|
|
|
|
- 62c: 0c407284 ld1 {v4.8b}, [x20]
|
|
|
|
|
- 630: 4cdfa158 ld1 {v24.16b, v25.16b}, [x10], #32
|
|
|
|
|
- 634: 0ccf6cd8 ld1 {v24.1d-v26.1d}, [x6], x15
|
|
|
|
|
- 638: 4cdf2483 ld1 {v3.8h-v6.8h}, [x4], #64
|
|
|
|
|
- 63c: 0d40c0c2 ld1r {v2.8b}, [x6]
|
|
|
|
|
- 640: 4ddfc9cd ld1r {v13.4s}, [x14], #4
|
|
|
|
|
- 644: 0dd8ceaf ld1r {v15.1d}, [x21], x24
|
|
|
|
|
- 648: 4c408ea9 ld2 {v9.2d, v10.2d}, [x21]
|
|
|
|
|
- 64c: 0cdf86bd ld2 {v29.4h, v30.4h}, [x21], #16
|
|
|
|
|
- 650: 4d60c1c8 ld2r {v8.16b, v9.16b}, [x14]
|
|
|
|
|
- 654: 0dffca87 ld2r {v7.2s, v8.2s}, [x20], #8
|
|
|
|
|
- 658: 4de3cc7c ld2r {v28.2d, v29.2d}, [x3], x3
|
|
|
|
|
- 65c: 4cdd497b ld3 {v27.4s-v29.4s}, [x11], x29
|
|
|
|
|
- 660: 0c404950 ld3 {v16.2s-v18.2s}, [x10]
|
|
|
|
|
- 664: 4d40e595 ld3r {v21.8h-v23.8h}, [x12]
|
|
|
|
|
- 668: 4ddfeba4 ld3r {v4.4s-v6.4s}, [x29], #12
|
|
|
|
|
- 66c: 0dd3ed38 ld3r {v24.1d-v26.1d}, [x9], x19
|
|
|
|
|
- 670: 4cdf046a ld4 {v10.8h-v13.8h}, [x3], #64
|
|
|
|
|
- 674: 0cc9039b ld4 {v27.8b-v30.8b}, [x28], x9
|
|
|
|
|
- 678: 0d60e3d5 ld4r {v21.8b-v24.8b}, [x30]
|
|
|
|
|
- 67c: 0dffe5d7 ld4r {v23.4h-v26.4h}, [x14], #8
|
|
|
|
|
- 680: 0df4e9a4 ld4r {v4.2s-v7.2s}, [x13], x20
|
|
|
|
|
- 684: ba5fd3e3 ccmn xzr, xzr, #0x3, le
|
|
|
|
|
- 688: 3a5f03e5 ccmn wzr, wzr, #0x5, eq // eq = none
|
|
|
|
|
- 68c: fa411be4 ccmp xzr, #0x1, #0x4, ne // ne = any
|
|
|
|
|
- 690: 7a42cbe2 ccmp wzr, #0x2, #0x2, gt
|
|
|
|
|
- 694: 93df03ff ror xzr, xzr, #0
|
|
|
|
|
- 698: c820ffff stlxp w0, xzr, xzr, [sp]
|
|
|
|
|
- 69c: 8822fc7f stlxp w2, wzr, wzr, [x3]
|
|
|
|
|
- 6a0: c8247cbf stxp w4, xzr, xzr, [x5]
|
|
|
|
|
- 6a4: 88267fff stxp w6, wzr, wzr, [sp]
|
|
|
|
|
- 6a8: 4e010fe0 dup v0.16b, wzr
|
|
|
|
|
- 6ac: 4e081fe1 mov v1.d[0], xzr
|
|
|
|
|
- 6b0: 4e0c1fe1 mov v1.s[1], wzr
|
|
|
|
|
- 6b4: 4e0a1fe1 mov v1.h[2], wzr
|
|
|
|
|
- 6b8: 4e071fe1 mov v1.b[3], wzr
|
|
|
|
|
- 6bc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0
|
|
|
|
|
- 6c0: 1e601000 fmov d0, #2.000000000000000000e+00
|
|
|
|
|
- 6c4: 1e603000 fmov d0, #2.125000000000000000e+00
|
|
|
|
|
- 6c8: 1e621000 fmov d0, #4.000000000000000000e+00
|
|
|
|
|
- 6cc: 1e623000 fmov d0, #4.250000000000000000e+00
|
|
|
|
|
- 6d0: 1e641000 fmov d0, #8.000000000000000000e+00
|
|
|
|
|
- 6d4: 1e643000 fmov d0, #8.500000000000000000e+00
|
|
|
|
|
- 6d8: 1e661000 fmov d0, #1.600000000000000000e+01
|
|
|
|
|
- 6dc: 1e663000 fmov d0, #1.700000000000000000e+01
|
|
|
|
|
- 6e0: 1e681000 fmov d0, #1.250000000000000000e-01
|
|
|
|
|
- 6e4: 1e683000 fmov d0, #1.328125000000000000e-01
|
|
|
|
|
- 6e8: 1e6a1000 fmov d0, #2.500000000000000000e-01
|
|
|
|
|
- 6ec: 1e6a3000 fmov d0, #2.656250000000000000e-01
|
|
|
|
|
- 6f0: 1e6c1000 fmov d0, #5.000000000000000000e-01
|
|
|
|
|
- 6f4: 1e6c3000 fmov d0, #5.312500000000000000e-01
|
|
|
|
|
- 6f8: 1e6e1000 fmov d0, #1.000000000000000000e+00
|
|
|
|
|
- 6fc: 1e6e3000 fmov d0, #1.062500000000000000e+00
|
|
|
|
|
- 700: 1e701000 fmov d0, #-2.000000000000000000e+00
|
|
|
|
|
- 704: 1e703000 fmov d0, #-2.125000000000000000e+00
|
|
|
|
|
- 708: 1e721000 fmov d0, #-4.000000000000000000e+00
|
|
|
|
|
- 70c: 1e723000 fmov d0, #-4.250000000000000000e+00
|
|
|
|
|
- 710: 1e741000 fmov d0, #-8.000000000000000000e+00
|
|
|
|
|
- 714: 1e743000 fmov d0, #-8.500000000000000000e+00
|
|
|
|
|
- 718: 1e761000 fmov d0, #-1.600000000000000000e+01
|
|
|
|
|
- 71c: 1e763000 fmov d0, #-1.700000000000000000e+01
|
|
|
|
|
- 720: 1e781000 fmov d0, #-1.250000000000000000e-01
|
|
|
|
|
- 724: 1e783000 fmov d0, #-1.328125000000000000e-01
|
|
|
|
|
- 728: 1e7a1000 fmov d0, #-2.500000000000000000e-01
|
|
|
|
|
- 72c: 1e7a3000 fmov d0, #-2.656250000000000000e-01
|
|
|
|
|
- 730: 1e7c1000 fmov d0, #-5.000000000000000000e-01
|
|
|
|
|
- 734: 1e7c3000 fmov d0, #-5.312500000000000000e-01
|
|
|
|
|
- 738: 1e7e1000 fmov d0, #-1.000000000000000000e+00
|
|
|
|
|
- 73c: 1e7e3000 fmov d0, #-1.062500000000000000e+00
|
|
|
|
|
- 740: f8358305 swp x21, x5, [x24]
|
|
|
|
|
- 744: f82d01ed ldadd x13, x13, [x15]
|
|
|
|
|
- 748: f8361353 ldclr x22, x19, [x26]
|
|
|
|
|
- 74c: f839234a ldeor x25, x10, [x26]
|
|
|
|
|
- 750: f82531fb ldset x5, x27, [x15]
|
|
|
|
|
- 754: f8335165 ldsmin x19, x5, [x11]
|
|
|
|
|
- 758: f83a4080 ldsmax x26, x0, [x4]
|
|
|
|
|
- 75c: f83673d7 ldumin x22, x23, [x30]
|
|
|
|
|
- 760: f832611c ldumax x18, x28, [x8]
|
|
|
|
|
- 764: f8ad837d swpa x13, x29, [x27]
|
|
|
|
|
- 768: f8ab01a5 ldadda x11, x5, [x13]
|
|
|
|
|
- 76c: f8a112b8 ldclra x1, x24, [x21]
|
|
|
|
|
- 770: f8bb2311 ldeora x27, x17, [x24]
|
|
|
|
|
- 774: f8b230be ldseta x18, x30, [x5]
|
|
|
|
|
- 778: f8a75336 ldsmina x7, x22, [x25]
|
|
|
|
|
- 77c: f8a4427a ldsmaxa x4, x26, [x19]
|
|
|
|
|
- 780: f8a6707e ldumina x6, x30, [x3]
|
|
|
|
|
- 784: f8b860b7 ldumaxa x24, x23, [x5]
|
|
|
|
|
- 788: f8f88392 swpal x24, x18, [x28]
|
|
|
|
|
- 78c: f8f300ff ldaddal x19, xzr, [x7]
|
|
|
|
|
- 790: f8ed1386 ldclral x13, x6, [x28]
|
|
|
|
|
- 794: f8e822af ldeoral x8, x15, [x21]
|
|
|
|
|
- 798: f8e2302d ldsetal x2, x13, [x1]
|
|
|
|
|
- 79c: f8f1533d ldsminal x17, x29, [x25]
|
|
|
|
|
- 7a0: f8f941d2 ldsmaxal x25, x18, [x14]
|
|
|
|
|
- 7a4: f8ff7366 lduminal xzr, x6, [x27]
|
|
|
|
|
- 7a8: f8f061e5 ldumaxal x16, x5, [x15]
|
|
|
|
|
- 7ac: f86b8072 swpl x11, x18, [x3]
|
|
|
|
|
- 7b0: f87a0054 ldaddl x26, x20, [x2]
|
|
|
|
|
- 7b4: f86b1164 ldclrl x11, x4, [x11]
|
|
|
|
|
- 7b8: f87e22f3 ldeorl x30, x19, [x23]
|
|
|
|
|
- 7bc: f86331cf ldsetl x3, x15, [x14]
|
|
|
|
|
- 7c0: f87e5296 ldsminl x30, x22, [x20]
|
|
|
|
|
- 7c4: f8674305 ldsmaxl x7, x5, [x24]
|
|
|
|
|
- 7c8: f87771f0 lduminl x23, x16, [x15]
|
|
|
|
|
- 7cc: f86b6013 ldumaxl x11, x19, [x0]
|
|
|
|
|
- 7d0: b83c803c swp w28, w28, [x1]
|
|
|
|
|
- 7d4: b82b0195 ldadd w11, w21, [x12]
|
|
|
|
|
- 7d8: b83d1240 ldclr w29, w0, [x18]
|
|
|
|
|
- 7dc: b8252320 ldeor w5, w0, [x25]
|
|
|
|
|
- 7e0: b82e3340 ldset w14, w0, [x26]
|
|
|
|
|
- 7e4: b83c53b2 ldsmin w28, w18, [x29]
|
|
|
|
|
- 7e8: b82f43a1 ldsmax w15, w1, [x29]
|
|
|
|
|
- 7ec: b828739a ldumin w8, w26, [x28]
|
|
|
|
|
- 7f0: b831608e ldumax w17, w14, [x4]
|
|
|
|
|
- 7f4: b8b88039 swpa w24, w25, [x1]
|
|
|
|
|
- 7f8: b8aa0231 ldadda w10, w17, [x17]
|
|
|
|
|
- 7fc: b8bd12b4 ldclra w29, w20, [x21]
|
|
|
|
|
- 800: b8bd2189 ldeora w29, w9, [x12]
|
|
|
|
|
- 804: b8ab30a6 ldseta w11, w6, [x5]
|
|
|
|
|
- 808: b8b552a7 ldsmina w21, w7, [x21]
|
|
|
|
|
- 80c: b8aa4197 ldsmaxa w10, w23, [x12]
|
|
|
|
|
- 810: b8b57145 ldumina w21, w5, [x10]
|
|
|
|
|
- 814: b8be6254 ldumaxa w30, w20, [x18]
|
|
|
|
|
- 818: b8ed80b7 swpal w13, w23, [x5]
|
|
|
|
|
- 81c: b8ef00b8 ldaddal w15, w24, [x5]
|
|
|
|
|
- 820: b8e9132a ldclral w9, w10, [x25]
|
|
|
|
|
- 824: b8f42231 ldeoral w20, w17, [x17]
|
|
|
|
|
- 828: b8ec33d2 ldsetal w12, w18, [x30]
|
|
|
|
|
- 82c: b8e35323 ldsminal w3, w3, [x25]
|
|
|
|
|
- 830: b8fa4159 ldsmaxal w26, w25, [x10]
|
|
|
|
|
- 834: b8e273eb lduminal w2, w11, [sp]
|
|
|
|
|
- 838: b8e760a2 ldumaxal w7, w2, [x5]
|
|
|
|
|
- 83c: b8608287 swpl w0, w7, [x20]
|
|
|
|
|
- 840: b865005f staddl w5, [x2]
|
|
|
|
|
- 844: b87b1379 ldclrl w27, w25, [x27]
|
|
|
|
|
- 848: b87e2358 ldeorl w30, w24, [x26]
|
|
|
|
|
- 84c: b86f32c2 ldsetl w15, w2, [x22]
|
|
|
|
|
- 850: b86053e3 ldsminl w0, w3, [sp]
|
|
|
|
|
- 854: b86f4154 ldsmaxl w15, w20, [x10]
|
|
|
|
|
- 858: b87671d5 lduminl w22, w21, [x14]
|
|
|
|
|
- 85c: b866605e ldumaxl w6, w30, [x2]
|
|
|
|
|
+ 0: 8b4db437 add x23, x1, x13, lsr #45
|
|
|
|
|
+ 4: cb8ce3c8 sub x8, x30, x12, asr #56
|
|
|
|
|
+ 8: ab0edafb adds x27, x23, x14, lsl #54
|
|
|
|
|
+ c: eb5499f5 subs x21, x15, x20, lsr #38
|
|
|
|
|
+ 10: 0b040e39 add w25, w17, w4, lsl #3
|
|
|
|
|
+ 14: 4b89503d sub w29, w1, w9, asr #20
|
|
|
|
|
+ 18: 2b89274a adds w10, w26, w9, asr #9
|
|
|
|
|
+ 1c: 6b870fd5 subs w21, w30, w7, asr #3
|
|
|
|
|
+ 20: 8a4b1109 and x9, x8, x11, lsr #4
|
|
|
|
|
+ 24: aa810643 orr x3, x18, x1, asr #1
|
|
|
|
|
+ 28: ca026e8a eor x10, x20, x2, lsl #27
|
|
|
|
|
+ 2c: ea8b7d2c ands x12, x9, x11, asr #31
|
|
|
|
|
+ 30: 0a9e6934 and w20, w9, w30, asr #26
|
|
|
|
|
+ 34: 2a9a4555 orr w21, w10, w26, asr #17
|
|
|
|
|
+ 38: 4a871d00 eor w0, w8, w7, asr #7
|
|
|
|
|
+ 3c: 6a084973 ands w19, w11, w8, lsl #18
|
|
|
|
|
+ 40: 8a23d497 bic x23, x4, x3, lsl #53
|
|
|
|
|
+ 44: aa3360c9 orn x9, x6, x19, lsl #24
|
|
|
|
|
+ 48: ca7ad8cc eon x12, x6, x26, lsr #54
|
|
|
|
|
+ 4c: ea2c3a76 bics x22, x19, x12, lsl #14
|
|
|
|
|
+ 50: 0a362dbd bic w29, w13, w22, lsl #11
|
|
|
|
|
+ 54: 2ab417d1 orn w17, w30, w20, asr #5
|
|
|
|
|
+ 58: 4a2b23a1 eon w1, w29, w11, lsl #8
|
|
|
|
|
+ 5c: 6a667684 bics w4, w20, w6, lsr #29
|
|
|
|
|
+ 60: 1107e0de add w30, w6, #0x1f8
|
|
|
|
|
+ 64: 310ebd13 adds w19, w8, #0x3af
|
|
|
|
|
+ 68: 5105b55d sub w29, w10, #0x16d
|
|
|
|
|
+ 6c: 71047104 subs w4, w8, #0x11c
|
|
|
|
|
+ 70: 910ef9c3 add x3, x14, #0x3be
|
|
|
|
|
+ 74: b1029e96 adds x22, x20, #0xa7
|
|
|
|
|
+ 78: d10b55fb sub x27, x15, #0x2d5
|
|
|
|
|
+ 7c: f10ecf98 subs x24, x28, #0x3b3
|
|
|
|
|
+ 80: 12099f39 and w25, w25, #0x7f807f80
|
|
|
|
|
+ 84: 321b3f4d orr w13, w26, #0x1fffe0
|
|
|
|
|
+ 88: 520309b5 eor w21, w13, #0xe0000000
|
|
|
|
|
+ 8c: 72134062 ands w2, w3, #0x3fffe000
|
|
|
|
|
+ 90: 92004548 and x8, x10, #0x3ffff0003ffff
|
|
|
|
|
+ 94: b24d861b orr x27, x16, #0xfff80000001fffff
|
|
|
|
|
+ 98: d219587b eor x27, x3, #0x3fffff803fffff80
|
|
|
|
|
+ 9c: f25eaee4 ands x4, x23, #0xfffffffc00003fff
|
2020-12-24 15:35:16 +08:00
|
|
|
+ a0: 14000000 b a0 <back+0xa0>
|
|
|
|
|
+ a4: 17ffffd7 b 0 <back>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ a8: 140001ee b 860 <forth>
|
2020-12-24 15:35:16 +08:00
|
|
|
+ ac: 94000000 bl ac <back+0xac>
|
|
|
|
|
+ b0: 97ffffd4 bl 0 <back>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ b4: 940001eb bl 860 <forth>
|
|
|
|
|
+ b8: 34000003 cbz w3, b8 <back+0xb8>
|
|
|
|
|
+ bc: 34fffa23 cbz w3, 0 <back>
|
|
|
|
|
+ c0: 34003d03 cbz w3, 860 <forth>
|
|
|
|
|
+ c4: 35000002 cbnz w2, c4 <back+0xc4>
|
|
|
|
|
+ c8: 35fff9c2 cbnz w2, 0 <back>
|
|
|
|
|
+ cc: 35003ca2 cbnz w2, 860 <forth>
|
|
|
|
|
+ d0: b4000019 cbz x25, d0 <back+0xd0>
|
|
|
|
|
+ d4: b4fff979 cbz x25, 0 <back>
|
|
|
|
|
+ d8: b4003c59 cbz x25, 860 <forth>
|
|
|
|
|
+ dc: b5000012 cbnz x18, dc <back+0xdc>
|
|
|
|
|
+ e0: b5fff912 cbnz x18, 0 <back>
|
|
|
|
|
+ e4: b5003bf2 cbnz x18, 860 <forth>
|
|
|
|
|
+ e8: 10000008 adr x8, e8 <back+0xe8>
|
|
|
|
|
+ ec: 10fff8a8 adr x8, 0 <back>
|
|
|
|
|
+ f0: 10003b88 adr x8, 860 <forth>
|
|
|
|
|
+ f4: 9000000f adrp x15, 0 <back>
|
|
|
|
|
+ f8: 36700012 tbz w18, #14, f8 <back+0xf8>
|
|
|
|
|
+ fc: 3677f832 tbz w18, #14, 0 <back>
|
|
|
|
|
+ 100: 36703b12 tbz w18, #14, 860 <forth>
|
|
|
|
|
+ 104: 37780019 tbnz w25, #15, 104 <back+0x104>
|
|
|
|
|
+ 108: 377ff7d9 tbnz w25, #15, 0 <back>
|
|
|
|
|
+ 10c: 37783ab9 tbnz w25, #15, 860 <forth>
|
|
|
|
|
+ 110: 12a203d2 mov w18, #0xefe1ffff // #-270401537
|
|
|
|
|
+ 114: 5286b21e mov w30, #0x3590 // #13712
|
|
|
|
|
+ 118: 72a66d35 movk w21, #0x3369, lsl #16
|
|
|
|
|
+ 11c: 92eded92 mov x18, #0x9093ffffffffffff // #-8028792235694751745
|
|
|
|
|
+ 120: d2eefecd mov x13, #0x77f6000000000000 // #8644096534784245760
|
|
|
|
|
+ 124: f2ef69a3 movk x3, #0x7b4d, lsl #48
|
|
|
|
|
+ 128: 93400c2a sbfx x10, x1, #0, #4
|
|
|
|
|
+ 12c: 330562cc bfxil w12, w22, #5, #20
|
|
|
|
|
+ 130: 530b2071 ubfiz w17, w3, #21, #9
|
|
|
|
|
+ 134: 934b3860 sbfx x0, x3, #11, #4
|
|
|
|
|
+ 138: b3473cdc bfxil x28, x6, #7, #9
|
|
|
|
|
+ 13c: d3416549 ubfx x9, x10, #1, #25
|
|
|
|
|
+ 140: 13995f75 extr w21, w27, w25, #23
|
|
|
|
|
+ 144: 93d6462e extr x14, x17, x22, #17
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 148: 54000000 b.eq 148 <back+0x148> // b.none
|
|
|
|
|
+ 14c: 54fff5a0 b.eq 0 <back> // b.none
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 150: 54003880 b.eq 860 <forth> // b.none
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 154: 54000001 b.ne 154 <back+0x154> // b.any
|
|
|
|
|
+ 158: 54fff541 b.ne 0 <back> // b.any
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 15c: 54003821 b.ne 860 <forth> // b.any
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 160: 54000002 b.cs 160 <back+0x160> // b.hs, b.nlast
|
|
|
|
|
+ 164: 54fff4e2 b.cs 0 <back> // b.hs, b.nlast
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 168: 540037c2 b.cs 860 <forth> // b.hs, b.nlast
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 16c: 54000002 b.cs 16c <back+0x16c> // b.hs, b.nlast
|
|
|
|
|
+ 170: 54fff482 b.cs 0 <back> // b.hs, b.nlast
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 174: 54003762 b.cs 860 <forth> // b.hs, b.nlast
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 178: 54000003 b.cc 178 <back+0x178> // b.lo, b.ul, b.last
|
|
|
|
|
+ 17c: 54fff423 b.cc 0 <back> // b.lo, b.ul, b.last
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 180: 54003703 b.cc 860 <forth> // b.lo, b.ul, b.last
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 184: 54000003 b.cc 184 <back+0x184> // b.lo, b.ul, b.last
|
|
|
|
|
+ 188: 54fff3c3 b.cc 0 <back> // b.lo, b.ul, b.last
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 18c: 540036a3 b.cc 860 <forth> // b.lo, b.ul, b.last
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 190: 54000004 b.mi 190 <back+0x190> // b.first
|
|
|
|
|
+ 194: 54fff364 b.mi 0 <back> // b.first
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 198: 54003644 b.mi 860 <forth> // b.first
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 19c: 54000005 b.pl 19c <back+0x19c> // b.nfrst
|
|
|
|
|
+ 1a0: 54fff305 b.pl 0 <back> // b.nfrst
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 1a4: 540035e5 b.pl 860 <forth> // b.nfrst
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 1a8: 54000006 b.vs 1a8 <back+0x1a8>
|
|
|
|
|
+ 1ac: 54fff2a6 b.vs 0 <back>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 1b0: 54003586 b.vs 860 <forth>
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 1b4: 54000007 b.vc 1b4 <back+0x1b4>
|
|
|
|
|
+ 1b8: 54fff247 b.vc 0 <back>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 1bc: 54003527 b.vc 860 <forth>
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 1c0: 54000008 b.hi 1c0 <back+0x1c0> // b.pmore
|
|
|
|
|
+ 1c4: 54fff1e8 b.hi 0 <back> // b.pmore
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 1c8: 540034c8 b.hi 860 <forth> // b.pmore
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 1cc: 54000009 b.ls 1cc <back+0x1cc> // b.plast
|
|
|
|
|
+ 1d0: 54fff189 b.ls 0 <back> // b.plast
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 1d4: 54003469 b.ls 860 <forth> // b.plast
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 1d8: 5400000a b.ge 1d8 <back+0x1d8> // b.tcont
|
|
|
|
|
+ 1dc: 54fff12a b.ge 0 <back> // b.tcont
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 1e0: 5400340a b.ge 860 <forth> // b.tcont
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 1e4: 5400000b b.lt 1e4 <back+0x1e4> // b.tstop
|
|
|
|
|
+ 1e8: 54fff0cb b.lt 0 <back> // b.tstop
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 1ec: 540033ab b.lt 860 <forth> // b.tstop
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 1f0: 5400000c b.gt 1f0 <back+0x1f0>
|
|
|
|
|
+ 1f4: 54fff06c b.gt 0 <back>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 1f8: 5400334c b.gt 860 <forth>
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 1fc: 5400000d b.le 1fc <back+0x1fc>
|
|
|
|
|
+ 200: 54fff00d b.le 0 <back>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 204: 540032ed b.le 860 <forth>
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 208: 5400000e b.al 208 <back+0x208>
|
|
|
|
|
+ 20c: 54ffefae b.al 0 <back>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 210: 5400328e b.al 860 <forth>
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 214: 5400000f b.nv 214 <back+0x214>
|
|
|
|
|
+ 218: 54ffef4f b.nv 0 <back>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 21c: 5400322f b.nv 860 <forth>
|
|
|
|
|
+ 220: d40f9ca1 svc #0x7ce5
|
|
|
|
|
+ 224: d4008b22 hvc #0x459
|
|
|
|
|
+ 228: d40be1c3 smc #0x5f0e
|
|
|
|
|
+ 22c: d423d0e0 brk #0x1e87
|
|
|
|
|
+ 230: d44dee20 hlt #0x6f71
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 234: d503201f nop
|
|
|
|
|
+ 238: d69f03e0 eret
|
|
|
|
|
+ 23c: d6bf03e0 drps
|
|
|
|
|
+ 240: d5033fdf isb
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 244: d503359f dsb nshld
|
|
|
|
|
+ 248: d50337bf dmb nsh
|
|
|
|
|
+ 24c: d61f0380 br x28
|
|
|
|
|
+ 250: d63f0220 blr x17
|
|
|
|
|
+ 254: c8127f47 stxr w18, x7, [x26]
|
|
|
|
|
+ 258: c819fccc stlxr w25, x12, [x6]
|
|
|
|
|
+ 25c: c85f7e00 ldxr x0, [x16]
|
|
|
|
|
+ 260: c85ffc66 ldaxr x6, [x3]
|
|
|
|
|
+ 264: c89ffc2e stlr x14, [x1]
|
|
|
|
|
+ 268: c8dfff1d ldar x29, [x24]
|
|
|
|
|
+ 26c: 881c7eef stxr w28, w15, [x23]
|
|
|
|
|
+ 270: 8809fc67 stlxr w9, w7, [x3]
|
|
|
|
|
+ 274: 885f7e81 ldxr w1, [x20]
|
|
|
|
|
+ 278: 885ffdf4 ldaxr w20, [x15]
|
|
|
|
|
+ 27c: 889ffd35 stlr w21, [x9]
|
|
|
|
|
+ 280: 88dffe25 ldar w5, [x17]
|
|
|
|
|
+ 284: 480d7fd4 stxrh w13, w20, [x30]
|
|
|
|
|
+ 288: 480afe4c stlxrh w10, w12, [x18]
|
|
|
|
|
+ 28c: 485f7e64 ldxrh w4, [x19]
|
|
|
|
|
+ 290: 485ffd56 ldaxrh w22, [x10]
|
|
|
|
|
+ 294: 489ffdfe stlrh w30, [x15]
|
|
|
|
|
+ 298: 48dfff04 ldarh w4, [x24]
|
|
|
|
|
+ 29c: 080a7d94 stxrb w10, w20, [x12]
|
|
|
|
|
+ 2a0: 0814fd7d stlxrb w20, w29, [x11]
|
|
|
|
|
+ 2a4: 085f7cb5 ldxrb w21, [x5]
|
|
|
|
|
+ 2a8: 085ffd24 ldaxrb w4, [x9]
|
|
|
|
|
+ 2ac: 089fff9e stlrb w30, [x28]
|
|
|
|
|
+ 2b0: 08dfff13 ldarb w19, [x24]
|
|
|
|
|
+ 2b4: c87f424b ldxp x11, x16, [x18]
|
|
|
|
|
+ 2b8: c87f9de8 ldaxp x8, x7, [x15]
|
|
|
|
|
+ 2bc: c83c4154 stxp w28, x20, x16, [x10]
|
|
|
|
|
+ 2c0: c827d469 stlxp w7, x9, x21, [x3]
|
|
|
|
|
+ 2c4: 887f1a79 ldxp w25, w6, [x19]
|
|
|
|
|
+ 2c8: 887fa45e ldaxp w30, w9, [x2]
|
|
|
|
|
+ 2cc: 88305180 stxp w16, w0, w20, [x12]
|
|
|
|
|
+ 2d0: 88259f82 stlxp w5, w2, w7, [x28]
|
|
|
|
|
+ 2d4: f81b5270 stur x16, [x19, #-75]
|
|
|
|
|
+ 2d8: b801e381 stur w1, [x28, #30]
|
|
|
|
|
+ 2dc: 381e61bc sturb w28, [x13, #-26]
|
|
|
|
|
+ 2e0: 781cd0c8 sturh w8, [x6, #-51]
|
|
|
|
|
+ 2e4: f851d380 ldur x0, [x28, #-227]
|
|
|
|
|
+ 2e8: b85e615c ldur w28, [x10, #-26]
|
|
|
|
|
+ 2ec: 39403164 ldrb w4, [x11, #12]
|
|
|
|
|
+ 2f0: 78405221 ldurh w1, [x17, #5]
|
|
|
|
|
+ 2f4: 3980312b ldrsb x11, [x9, #12]
|
|
|
|
|
+ 2f8: 789ef108 ldursh x8, [x8, #-17]
|
|
|
|
|
+ 2fc: 78ddd1b4 ldursh w20, [x13, #-35]
|
|
|
|
|
+ 300: b8831137 ldursw x23, [x9, #49]
|
|
|
|
|
+ 304: fc41d089 ldur d9, [x4, #29]
|
|
|
|
|
+ 308: bd402a6b ldr s11, [x19, #40]
|
|
|
|
|
+ 30c: fc1d5299 stur d25, [x20, #-43]
|
|
|
|
|
+ 310: bc1b0039 stur s25, [x1, #-80]
|
|
|
|
|
+ 314: f8019c14 str x20, [x0, #25]!
|
|
|
|
|
+ 318: b81cfd8c str w12, [x12, #-49]!
|
|
|
|
|
+ 31c: 381f6e7c strb w28, [x19, #-10]!
|
|
|
|
|
+ 320: 781c1f8d strh w13, [x28, #-63]!
|
|
|
|
|
+ 324: f85d2eeb ldr x11, [x23, #-46]!
|
|
|
|
|
+ 328: b8411f1b ldr w27, [x24, #17]!
|
|
|
|
|
+ 32c: 385f4f4e ldrb w14, [x26, #-12]!
|
|
|
|
|
+ 330: 785d3ed8 ldrh w24, [x22, #-45]!
|
|
|
|
|
+ 334: 389f5d39 ldrsb x25, [x9, #-11]!
|
|
|
|
|
+ 338: 7881dcc5 ldrsh x5, [x6, #29]!
|
|
|
|
|
+ 33c: 78dffee7 ldrsh w7, [x23, #-1]!
|
|
|
|
|
+ 340: b89c3dba ldrsw x26, [x13, #-61]!
|
|
|
|
|
+ 344: fc50bf18 ldr d24, [x24, #-245]!
|
|
|
|
|
+ 348: bc5c9f34 ldr s20, [x25, #-55]!
|
|
|
|
|
+ 34c: fc135c49 str d9, [x2, #-203]!
|
|
|
|
|
+ 350: bc1c5c2e str s14, [x1, #-59]!
|
|
|
|
|
+ 354: f806d433 str x19, [x1], #109
|
|
|
|
|
+ 358: b81ca4a4 str w4, [x5], #-54
|
|
|
|
|
+ 35c: 3800947d strb w29, [x3], #9
|
|
|
|
|
+ 360: 781ce420 strh w0, [x1], #-50
|
|
|
|
|
+ 364: f85d04c2 ldr x2, [x6], #-48
|
|
|
|
|
+ 368: b858d4cf ldr w15, [x6], #-115
|
|
|
|
|
+ 36c: 385e5444 ldrb w4, [x2], #-27
|
|
|
|
|
+ 370: 785eb751 ldrh w17, [x26], #-21
|
|
|
|
|
+ 374: 389f3715 ldrsb x21, [x24], #-13
|
|
|
|
|
+ 378: 789d04d6 ldrsh x22, [x6], #-48
|
|
|
|
|
+ 37c: 78dd04cb ldrsh w11, [x6], #-48
|
|
|
|
|
+ 380: b89fb7ce ldrsw x14, [x30], #-5
|
|
|
|
|
+ 384: fc5975e2 ldr d2, [x15], #-105
|
|
|
|
|
+ 388: bc5a5679 ldr s25, [x19], #-91
|
|
|
|
|
+ 38c: fc1416ed str d13, [x23], #-191
|
|
|
|
|
+ 390: bc0006b6 str s22, [x21], #0
|
|
|
|
|
+ 394: f832c996 str x22, [x12, w18, sxtw]
|
|
|
|
|
+ 398: b82c4b7e str w30, [x27, w12, uxtw]
|
|
|
|
|
+ 39c: 38367887 strb w7, [x4, x22, lsl #0]
|
|
|
|
|
+ 3a0: 783dfaf3 strh w19, [x23, x29, sxtx #1]
|
|
|
|
|
+ 3a4: f87bf891 ldr x17, [x4, x27, sxtx #3]
|
|
|
|
|
+ 3a8: b871c9a1 ldr w1, [x13, w17, sxtw]
|
|
|
|
|
+ 3ac: 387dfb70 ldrb w16, [x27, x29, sxtx #0]
|
|
|
|
|
+ 3b0: 78645939 ldrh w25, [x9, w4, uxtw #1]
|
|
|
|
|
+ 3b4: 38b67984 ldrsb x4, [x12, x22, lsl #0]
|
|
|
|
|
+ 3b8: 78a55839 ldrsh x25, [x1, w5, uxtw #1]
|
|
|
|
|
+ 3bc: 78fc6a09 ldrsh w9, [x16, x28]
|
|
|
|
|
+ 3c0: b8aee8e8 ldrsw x8, [x7, x14, sxtx]
|
|
|
|
|
+ 3c4: fc705b84 ldr d4, [x28, w16, uxtw #3]
|
|
|
|
|
+ 3c8: bc7bd850 ldr s16, [x2, w27, sxtw #2]
|
|
|
|
|
+ 3cc: fc396817 str d23, [x0, x25]
|
|
|
|
|
+ 3d0: bc277a06 str s6, [x16, x7, lsl #2]
|
|
|
|
|
+ 3d4: f91ddd82 str x2, [x12, #15288]
|
|
|
|
|
+ 3d8: b91b10a8 str w8, [x5, #6928]
|
|
|
|
|
+ 3dc: 391f8221 strb w1, [x17, #2016]
|
|
|
|
|
+ 3e0: 79197728 strh w8, [x25, #3258]
|
|
|
|
|
+ 3e4: f95ca07c ldr x28, [x3, #14656]
|
|
|
|
|
+ 3e8: b95b5d75 ldr w21, [x11, #7004]
|
|
|
|
|
+ 3ec: 395dc8af ldrb w15, [x5, #1906]
|
|
|
|
|
+ 3f0: 795caa60 ldrh w0, [x19, #3668]
|
|
|
|
|
+ 3f4: 399dd53d ldrsb x29, [x9, #1909]
|
|
|
|
|
+ 3f8: 799c7397 ldrsh x23, [x28, #3640]
|
|
|
|
|
+ 3fc: 79dcb15b ldrsh w27, [x10, #3672]
|
|
|
|
|
+ 400: b99e3b75 ldrsw x21, [x27, #7736]
|
|
|
|
|
+ 404: fd5c7f7a ldr d26, [x27, #14584]
|
|
|
|
|
+ 408: bd5d2882 ldr s2, [x4, #7464]
|
|
|
|
|
+ 40c: fd1fb2a1 str d1, [x21, #16224]
|
|
|
|
|
+ 410: bd1d82c4 str s4, [x22, #7552]
|
|
|
|
|
+ 414: 58000001 ldr x1, 414 <back+0x414>
|
|
|
|
|
+ 418: 1800001b ldr w27, 418 <back+0x418>
|
|
|
|
|
+ 41c: f882d080 prfum pldl1keep, [x4, #45]
|
2020-12-24 15:35:16 +08:00
|
|
|
+ 420: d8000000 prfm pldl1keep, 420 <back+0x420>
|
2021-08-13 14:54:30 +08:00
|
|
|
+ 424: f8a0cbc0 prfm pldl1keep, [x30, w0, sxtw]
|
|
|
|
|
+ 428: f99fab00 prfm pldl1keep, [x24, #16208]
|
|
|
|
|
+ 42c: 1a1803a0 adc w0, w29, w24
|
|
|
|
|
+ 430: 3a120396 adcs w22, w28, w18
|
|
|
|
|
+ 434: 5a1e0217 sbc w23, w16, w30
|
|
|
|
|
+ 438: 7a0e03a7 sbcs w7, w29, w14
|
|
|
|
|
+ 43c: 9a0e0196 adc x22, x12, x14
|
|
|
|
|
+ 440: ba17031d adcs x29, x24, x23
|
|
|
|
|
+ 444: da160391 sbc x17, x28, x22
|
|
|
|
|
+ 448: fa130298 sbcs x24, x20, x19
|
|
|
|
|
+ 44c: 0b26cadb add w27, w22, w6, sxtw #2
|
|
|
|
|
+ 450: 2b38516d adds w13, w11, w24, uxtw #4
|
|
|
|
|
+ 454: cb242d10 sub x16, x8, w4, uxth #3
|
|
|
|
|
+ 458: 6b34ea55 subs w21, w18, w20, sxtx #2
|
|
|
|
|
+ 45c: 8b3d0a2e add x14, x17, w29, uxtb #2
|
|
|
|
|
+ 460: ab2eb231 adds x17, x17, w14, sxth #4
|
|
|
|
|
+ 464: cb3ac476 sub x22, x3, w26, sxtw #1
|
|
|
|
|
+ 468: eb3531ad subs x13, x13, w21, uxth #4
|
|
|
|
|
+ 46c: 3a5a722f ccmn w17, w26, #0xf, vc
|
|
|
|
|
+ 470: 7a463325 ccmp w25, w6, #0x5, cc // cc = lo, ul, last
|
|
|
|
|
+ 474: ba5e9021 ccmn x1, x30, #0x1, ls // ls = plast
|
|
|
|
|
+ 478: fa47a222 ccmp x17, x7, #0x2, ge // ge = tcont
|
|
|
|
|
+ 47c: 3a590a26 ccmn w17, #0x19, #0x6, eq // eq = none
|
|
|
|
|
+ 480: 7a450845 ccmp w2, #0x5, #0x5, eq // eq = none
|
|
|
|
|
+ 484: ba514a6a ccmn x19, #0x11, #0xa, mi // mi = first
|
|
|
|
|
+ 488: fa48c9c3 ccmp x14, #0x8, #0x3, gt
|
|
|
|
|
+ 48c: 1a8e9109 csel w9, w8, w14, ls // ls = plast
|
|
|
|
|
+ 490: 1a85d57b csinc w27, w11, w5, le
|
|
|
|
|
+ 494: 5a9632eb csinv w11, w23, w22, cc // cc = lo, ul, last
|
|
|
|
|
+ 498: 5a9b2793 csneg w19, w28, w27, cs // cs = hs, nlast
|
|
|
|
|
+ 49c: 9a815130 csel x16, x9, x1, pl // pl = nfrst
|
|
|
|
|
+ 4a0: 9a8c05dc csinc x28, x14, x12, eq // eq = none
|
|
|
|
|
+ 4a4: da8e5096 csinv x22, x4, x14, pl // pl = nfrst
|
|
|
|
|
+ 4a8: da9b257a csneg x26, x11, x27, cs // cs = hs, nlast
|
|
|
|
|
+ 4ac: 5ac00178 rbit w24, w11
|
|
|
|
|
+ 4b0: 5ac005ca rev16 w10, w14
|
|
|
|
|
+ 4b4: 5ac008a9 rev w9, w5
|
|
|
|
|
+ 4b8: 5ac01292 clz w18, w20
|
|
|
|
|
+ 4bc: 5ac01519 cls w25, w8
|
|
|
|
|
+ 4c0: dac00316 rbit x22, x24
|
|
|
|
|
+ 4c4: dac0077c rev16 x28, x27
|
|
|
|
|
+ 4c8: dac00ba8 rev32 x8, x29
|
|
|
|
|
+ 4cc: dac00d51 rev x17, x10
|
|
|
|
|
+ 4d0: dac01177 clz x23, x11
|
|
|
|
|
+ 4d4: dac015da cls x26, x14
|
|
|
|
|
+ 4d8: 1adc0895 udiv w21, w4, w28
|
|
|
|
|
+ 4dc: 1ad60d5e sdiv w30, w10, w22
|
|
|
|
|
+ 4e0: 1ada205d lsl w29, w2, w26
|
|
|
|
|
+ 4e4: 1aca26dc lsr w28, w22, w10
|
|
|
|
|
+ 4e8: 1acc2b0b asr w11, w24, w12
|
|
|
|
|
+ 4ec: 1ad02fd5 ror w21, w30, w16
|
|
|
|
|
+ 4f0: 9acd0801 udiv x1, x0, x13
|
|
|
|
|
+ 4f4: 9ac60e22 sdiv x2, x17, x6
|
|
|
|
|
+ 4f8: 9ad5230a lsl x10, x24, x21
|
|
|
|
|
+ 4fc: 9ac62525 lsr x5, x9, x6
|
|
|
|
|
+ 500: 9ac42b60 asr x0, x27, x4
|
|
|
|
|
+ 504: 9ac22c9c ror x28, x4, x2
|
|
|
|
|
+ 508: 9bc77fc1 umulh x1, x30, x7
|
|
|
|
|
+ 50c: 9b4a7cbe smulh x30, x5, x10
|
|
|
|
|
+ 510: 1b0d45e7 madd w7, w15, w13, w17
|
|
|
|
|
+ 514: 1b0cf039 msub w25, w1, w12, w28
|
|
|
|
|
+ 518: 9b1e2562 madd x2, x11, x30, x9
|
|
|
|
|
+ 51c: 9b03dae5 msub x5, x23, x3, x22
|
|
|
|
|
+ 520: 9b291159 smaddl x25, w10, w9, x4
|
|
|
|
|
+ 524: 9b27c905 smsubl x5, w8, w7, x18
|
|
|
|
|
+ 528: 9bba64b8 umaddl x24, w5, w26, x25
|
|
|
|
|
+ 52c: 9bbaf02e umsubl x14, w1, w26, x28
|
|
|
|
|
+ 530: 1e280ad8 fmul s24, s22, s8
|
|
|
|
|
+ 534: 1e261870 fdiv s16, s3, s6
|
|
|
|
|
+ 538: 1e392ab0 fadd s16, s21, s25
|
|
|
|
|
+ 53c: 1e3b3b40 fsub s0, s26, s27
|
|
|
|
|
+ 540: 1e310878 fmul s24, s3, s17
|
|
|
|
|
+ 544: 1e660909 fmul d9, d8, d6
|
|
|
|
|
+ 548: 1e7e1a76 fdiv d22, d19, d30
|
|
|
|
|
+ 54c: 1e632a2e fadd d14, d17, d3
|
|
|
|
|
+ 550: 1e743b78 fsub d24, d27, d20
|
|
|
|
|
+ 554: 1e76082c fmul d12, d1, d22
|
|
|
|
|
+ 558: 1f0b7510 fmadd s16, s8, s11, s29
|
|
|
|
|
+ 55c: 1f128676 fmsub s22, s19, s18, s1
|
|
|
|
|
+ 560: 1f38270f fnmadd s15, s24, s24, s9
|
|
|
|
|
+ 564: 1f2d5e7b fnmadd s27, s19, s13, s23
|
|
|
|
|
+ 568: 1f503003 fmadd d3, d0, d16, d12
|
|
|
|
|
+ 56c: 1f52a873 fmsub d19, d3, d18, d10
|
|
|
|
|
+ 570: 1f6b5041 fnmadd d1, d2, d11, d20
|
|
|
|
|
+ 574: 1f79392c fnmadd d12, d9, d25, d14
|
|
|
|
|
+ 578: 1e2042e0 fmov s0, s23
|
|
|
|
|
+ 57c: 1e20c0d7 fabs s23, s6
|
|
|
|
|
+ 580: 1e214084 fneg s4, s4
|
|
|
|
|
+ 584: 1e21c385 fsqrt s5, s28
|
|
|
|
|
+ 588: 1e22c1f5 fcvt d21, s15
|
|
|
|
|
+ 58c: 1e6040ab fmov d11, d5
|
|
|
|
|
+ 590: 1e60c092 fabs d18, d4
|
|
|
|
|
+ 594: 1e61418b fneg d11, d12
|
|
|
|
|
+ 598: 1e61c10f fsqrt d15, d8
|
|
|
|
|
+ 59c: 1e624048 fcvt s8, d2
|
|
|
|
|
+ 5a0: 1e380253 fcvtzs w19, s18
|
|
|
|
|
+ 5a4: 9e380011 fcvtzs x17, s0
|
|
|
|
|
+ 5a8: 1e7801a0 fcvtzs w0, d13
|
|
|
|
|
+ 5ac: 9e780136 fcvtzs x22, d9
|
|
|
|
|
+ 5b0: 1e2203a6 scvtf s6, w29
|
|
|
|
|
+ 5b4: 9e2201cc scvtf s12, x14
|
|
|
|
|
+ 5b8: 1e6202d0 scvtf d16, w22
|
|
|
|
|
+ 5bc: 9e6200ae scvtf d14, x5
|
|
|
|
|
+ 5c0: 1e260007 fmov w7, s0
|
|
|
|
|
+ 5c4: 9e6600dc fmov x28, d6
|
|
|
|
|
+ 5c8: 1e270342 fmov s2, w26
|
|
|
|
|
+ 5cc: 9e670004 fmov d4, x0
|
|
|
|
|
+ 5d0: 1e2b2020 fcmp s1, s11
|
|
|
|
|
+ 5d4: 1e7520c0 fcmp d6, d21
|
|
|
|
|
+ 5d8: 1e202208 fcmp s16, #0.0
|
|
|
|
|
+ 5dc: 1e6022c8 fcmp d22, #0.0
|
|
|
|
|
+ 5e0: 290c0045 stp w5, w0, [x2, #96]
|
|
|
|
|
+ 5e4: 2978766e ldp w14, w29, [x19, #-64]
|
|
|
|
|
+ 5e8: 696c0c6f ldpsw x15, x3, [x3, #-160]
|
|
|
|
|
+ 5ec: a9323767 stp x7, x13, [x27, #-224]
|
|
|
|
|
+ 5f0: a9483831 ldp x17, x14, [x1, #128]
|
|
|
|
|
+ 5f4: 29905895 stp w21, w22, [x4, #128]!
|
|
|
|
|
+ 5f8: 29f43451 ldp w17, w13, [x2, #-96]!
|
|
|
|
|
+ 5fc: 69ee66f5 ldpsw x21, x25, [x23, #-144]!
|
|
|
|
|
+ 600: a9bf41e4 stp x4, x16, [x15, #-16]!
|
|
|
|
|
+ 604: a9f6573d ldp x29, x21, [x25, #-160]!
|
|
|
|
|
+ 608: 288a4758 stp w24, w17, [x26], #80
|
|
|
|
|
+ 60c: 28e27bc3 ldp w3, w30, [x30], #-240
|
|
|
|
|
+ 610: 68fc4fc3 ldpsw x3, x19, [x30], #-32
|
|
|
|
|
+ 614: a8b70779 stp x25, x1, [x27], #-144
|
|
|
|
|
+ 618: a8fc539a ldp x26, x20, [x28], #-64
|
|
|
|
|
+ 61c: 283a653d stnp w29, w25, [x9, #-48]
|
|
|
|
|
+ 620: 28703a79 ldnp w25, w14, [x19, #-128]
|
|
|
|
|
+ 624: a8025879 stnp x25, x22, [x3, #32]
|
|
|
|
|
+ 628: a8734ba9 ldnp x9, x18, [x29, #-208]
|
|
|
|
|
+ 62c: 0c407275 ld1 {v21.8b}, [x19]
|
|
|
|
|
+ 630: 4cdfa29b ld1 {v27.16b, v28.16b}, [x20], #32
|
|
|
|
|
+ 634: 0cc66ec5 ld1 {v5.1d-v7.1d}, [x22], x6
|
|
|
|
|
+ 638: 4cdf2596 ld1 {v22.8h-v25.8h}, [x12], #64
|
|
|
|
|
+ 63c: 0d40c131 ld1r {v17.8b}, [x9]
|
|
|
|
|
+ 640: 4ddfcaa5 ld1r {v5.4s}, [x21], #4
|
|
|
|
|
+ 644: 0dd2cf8a ld1r {v10.1d}, [x28], x18
|
|
|
|
|
+ 648: 4c408dfa ld2 {v26.2d, v27.2d}, [x15]
|
|
|
|
|
+ 64c: 0cdf8750 ld2 {v16.4h, v17.4h}, [x26], #16
|
|
|
|
|
+ 650: 4d60c04e ld2r {v14.16b, v15.16b}, [x2]
|
|
|
|
|
+ 654: 0dffcb92 ld2r {v18.2s, v19.2s}, [x28], #8
|
|
|
|
|
+ 658: 4df6cc13 ld2r {v19.2d, v20.2d}, [x0], x22
|
|
|
|
|
+ 65c: 4cd24850 ld3 {v16.4s-v18.4s}, [x2], x18
|
|
|
|
|
+ 660: 0c404818 ld3 {v24.2s-v26.2s}, [x0]
|
|
|
|
|
+ 664: 4d40e604 ld3r {v4.8h-v6.8h}, [x16]
|
|
|
|
|
+ 668: 4ddfe825 ld3r {v5.4s-v7.4s}, [x1], #12
|
|
|
|
|
+ 66c: 0dd0ed47 ld3r {v7.1d-v9.1d}, [x10], x16
|
|
|
|
|
+ 670: 4cdf0696 ld4 {v22.8h-v25.8h}, [x20], #64
|
|
|
|
|
+ 674: 0cd9008f ld4 {v15.8b-v18.8b}, [x4], x25
|
|
|
|
|
+ 678: 0d60e0a0 ld4r {v0.8b-v3.8b}, [x5]
|
|
|
|
|
+ 67c: 0dffe420 ld4r {v0.4h-v3.4h}, [x1], #8
|
|
|
|
|
+ 680: 0deeeb9e ld4r {v30.2s, v31.2s, v0.2s, v1.2s}, [x28], x14
|
|
|
|
|
+ 684: ba5fd3e3 ccmn xzr, xzr, #0x3, le
|
|
|
|
|
+ 688: 3a5f03e5 ccmn wzr, wzr, #0x5, eq // eq = none
|
|
|
|
|
+ 68c: fa411be4 ccmp xzr, #0x1, #0x4, ne // ne = any
|
|
|
|
|
+ 690: 7a42cbe2 ccmp wzr, #0x2, #0x2, gt
|
|
|
|
|
+ 694: 93df03ff ror xzr, xzr, #0
|
|
|
|
|
+ 698: c820ffff stlxp w0, xzr, xzr, [sp]
|
|
|
|
|
+ 69c: 8822fc7f stlxp w2, wzr, wzr, [x3]
|
|
|
|
|
+ 6a0: c8247cbf stxp w4, xzr, xzr, [x5]
|
|
|
|
|
+ 6a4: 88267fff stxp w6, wzr, wzr, [sp]
|
|
|
|
|
+ 6a8: 4e010fe0 dup v0.16b, wzr
|
|
|
|
|
+ 6ac: 4e081fe1 mov v1.d[0], xzr
|
|
|
|
|
+ 6b0: 4e0c1fe1 mov v1.s[1], wzr
|
|
|
|
|
+ 6b4: 4e0a1fe1 mov v1.h[2], wzr
|
|
|
|
|
+ 6b8: 4e071fe1 mov v1.b[3], wzr
|
|
|
|
|
+ 6bc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0
|
|
|
|
|
+ 6c0: 1e601000 fmov d0, #2.000000000000000000e+00
|
|
|
|
|
+ 6c4: 1e603000 fmov d0, #2.125000000000000000e+00
|
|
|
|
|
+ 6c8: 1e621000 fmov d0, #4.000000000000000000e+00
|
|
|
|
|
+ 6cc: 1e623000 fmov d0, #4.250000000000000000e+00
|
|
|
|
|
+ 6d0: 1e641000 fmov d0, #8.000000000000000000e+00
|
|
|
|
|
+ 6d4: 1e643000 fmov d0, #8.500000000000000000e+00
|
|
|
|
|
+ 6d8: 1e661000 fmov d0, #1.600000000000000000e+01
|
|
|
|
|
+ 6dc: 1e663000 fmov d0, #1.700000000000000000e+01
|
|
|
|
|
+ 6e0: 1e681000 fmov d0, #1.250000000000000000e-01
|
|
|
|
|
+ 6e4: 1e683000 fmov d0, #1.328125000000000000e-01
|
|
|
|
|
+ 6e8: 1e6a1000 fmov d0, #2.500000000000000000e-01
|
|
|
|
|
+ 6ec: 1e6a3000 fmov d0, #2.656250000000000000e-01
|
|
|
|
|
+ 6f0: 1e6c1000 fmov d0, #5.000000000000000000e-01
|
|
|
|
|
+ 6f4: 1e6c3000 fmov d0, #5.312500000000000000e-01
|
|
|
|
|
+ 6f8: 1e6e1000 fmov d0, #1.000000000000000000e+00
|
|
|
|
|
+ 6fc: 1e6e3000 fmov d0, #1.062500000000000000e+00
|
|
|
|
|
+ 700: 1e701000 fmov d0, #-2.000000000000000000e+00
|
|
|
|
|
+ 704: 1e703000 fmov d0, #-2.125000000000000000e+00
|
|
|
|
|
+ 708: 1e721000 fmov d0, #-4.000000000000000000e+00
|
|
|
|
|
+ 70c: 1e723000 fmov d0, #-4.250000000000000000e+00
|
|
|
|
|
+ 710: 1e741000 fmov d0, #-8.000000000000000000e+00
|
|
|
|
|
+ 714: 1e743000 fmov d0, #-8.500000000000000000e+00
|
|
|
|
|
+ 718: 1e761000 fmov d0, #-1.600000000000000000e+01
|
|
|
|
|
+ 71c: 1e763000 fmov d0, #-1.700000000000000000e+01
|
|
|
|
|
+ 720: 1e781000 fmov d0, #-1.250000000000000000e-01
|
|
|
|
|
+ 724: 1e783000 fmov d0, #-1.328125000000000000e-01
|
|
|
|
|
+ 728: 1e7a1000 fmov d0, #-2.500000000000000000e-01
|
|
|
|
|
+ 72c: 1e7a3000 fmov d0, #-2.656250000000000000e-01
|
|
|
|
|
+ 730: 1e7c1000 fmov d0, #-5.000000000000000000e-01
|
|
|
|
|
+ 734: 1e7c3000 fmov d0, #-5.312500000000000000e-01
|
|
|
|
|
+ 738: 1e7e1000 fmov d0, #-1.000000000000000000e+00
|
|
|
|
|
+ 73c: 1e7e3000 fmov d0, #-1.062500000000000000e+00
|
|
|
|
|
+ 740: f83a8229 swp x26, x9, [x17]
|
|
|
|
|
+ 744: f83c0057 ldadd x28, x23, [x2]
|
|
|
|
|
+ 748: f8361062 ldclr x22, x2, [x3]
|
|
|
|
|
+ 74c: f82b23d9 ldeor x11, x25, [x30]
|
|
|
|
|
+ 750: f836309c ldset x22, x28, [x4]
|
|
|
|
|
+ 754: f826530b ldsmin x6, x11, [x24]
|
|
|
|
|
+ 758: f82c43ff stsmax x12, [sp]
|
|
|
|
|
+ 75c: f837713e ldumin x23, x30, [x9]
|
|
|
|
|
+ 760: f8266281 ldumax x6, x1, [x20]
|
|
|
|
|
+ 764: f8b182c2 swpa x17, x2, [x22]
|
|
|
|
|
+ 768: f8ae015b ldadda x14, x27, [x10]
|
|
|
|
|
+ 76c: f8a6127e ldclra x6, x30, [x19]
|
|
|
|
|
+ 770: f8a02179 ldeora x0, x25, [x11]
|
|
|
|
|
+ 774: f8b733c0 ldseta x23, x0, [x30]
|
|
|
|
|
+ 778: f8b55143 ldsmina x21, x3, [x10]
|
|
|
|
|
+ 77c: f8af4016 ldsmaxa x15, x22, [x0]
|
|
|
|
|
+ 780: f8b17280 ldumina x17, x0, [x20]
|
|
|
|
|
+ 784: f8b0602d ldumaxa x16, x13, [x1]
|
|
|
|
|
+ 788: f8fb82ef swpal x27, x15, [x23]
|
|
|
|
|
+ 78c: f8f3003e ldaddal x19, x30, [x1]
|
|
|
|
|
+ 790: f8ef12fc ldclral x15, x28, [x23]
|
|
|
|
|
+ 794: f8e7226f ldeoral x7, x15, [x19]
|
|
|
|
|
+ 798: f8eb314c ldsetal x11, x12, [x10]
|
|
|
|
|
+ 79c: f8e65187 ldsminal x6, x7, [x12]
|
|
|
|
|
+ 7a0: f8fc41a5 ldsmaxal x28, x5, [x13]
|
|
|
|
|
+ 7a4: f8e97234 lduminal x9, x20, [x17]
|
|
|
|
|
+ 7a8: f8f56179 ldumaxal x21, x25, [x11]
|
|
|
|
|
+ 7ac: f8738318 swpl x19, x24, [x24]
|
|
|
|
|
+ 7b0: f86803da ldaddl x8, x26, [x30]
|
|
|
|
|
+ 7b4: f8711112 ldclrl x17, x18, [x8]
|
|
|
|
|
+ 7b8: f8622063 ldeorl x2, x3, [x3]
|
|
|
|
|
+ 7bc: f87a3207 ldsetl x26, x7, [x16]
|
|
|
|
|
+ 7c0: f87b50a6 ldsminl x27, x6, [x5]
|
|
|
|
|
+ 7c4: f8764280 ldsmaxl x22, x0, [x20]
|
|
|
|
|
+ 7c8: f86b705a lduminl x11, x26, [x2]
|
|
|
|
|
+ 7cc: f87e609d ldumaxl x30, x29, [x4]
|
|
|
|
|
+ 7d0: b82480e5 swp w4, w5, [x7]
|
|
|
|
|
+ 7d4: b82a005a ldadd w10, w26, [x2]
|
|
|
|
|
+ 7d8: b83b1370 ldclr w27, w16, [x27]
|
|
|
|
|
+ 7dc: b83f2157 ldeor wzr, w23, [x10]
|
|
|
|
|
+ 7e0: b82431a2 ldset w4, w2, [x13]
|
|
|
|
|
+ 7e4: b823506f ldsmin w3, w15, [x3]
|
|
|
|
|
+ 7e8: b82340ca ldsmax w3, w10, [x6]
|
|
|
|
|
+ 7ec: b828714b ldumin w8, w11, [x10]
|
|
|
|
|
+ 7f0: b83d61be ldumax w29, w30, [x13]
|
|
|
|
|
+ 7f4: b8ab8291 swpa w11, w17, [x20]
|
|
|
|
|
+ 7f8: b8ba00d0 ldadda w26, w16, [x6]
|
|
|
|
|
+ 7fc: b8b5102a ldclra w21, w10, [x1]
|
|
|
|
|
+ 800: b8bd22ec ldeora w29, w12, [x23]
|
|
|
|
|
+ 804: b8bd3108 ldseta w29, w8, [x8]
|
|
|
|
|
+ 808: b8ab51ca ldsmina w11, w10, [x14]
|
|
|
|
|
+ 80c: b8a442cd ldsmaxa w4, w13, [x22]
|
|
|
|
|
+ 810: b8a770ed ldumina w7, w13, [x7]
|
|
|
|
|
+ 814: b8ae63e0 ldumaxa w14, w0, [sp]
|
|
|
|
|
+ 818: b8f18382 swpal w17, w2, [x28]
|
|
|
|
|
+ 81c: b8f3014b ldaddal w19, w11, [x10]
|
|
|
|
|
+ 820: b8ec1293 ldclral w12, w19, [x20]
|
|
|
|
|
+ 824: b8e02108 ldeoral w0, w8, [x8]
|
|
|
|
|
+ 828: b8f13303 ldsetal w17, w3, [x24]
|
|
|
|
|
+ 82c: b8f950e5 ldsminal w25, w5, [x7]
|
|
|
|
|
+ 830: b8f0413e ldsmaxal w16, w30, [x9]
|
|
|
|
|
+ 834: b8ea71df lduminal w10, wzr, [x14]
|
|
|
|
|
+ 838: b8f16173 ldumaxal w17, w19, [x11]
|
|
|
|
|
+ 83c: b87481a1 swpl w20, w1, [x13]
|
|
|
|
|
+ 840: b87a028b ldaddl w26, w11, [x20]
|
|
|
|
|
+ 844: b87213d8 ldclrl w18, w24, [x30]
|
|
|
|
|
+ 848: b86c2299 ldeorl w12, w25, [x20]
|
|
|
|
|
+ 84c: b86e30bd ldsetl w14, w29, [x5]
|
|
|
|
|
+ 850: b862537a ldsminl w2, w26, [x27]
|
|
|
|
|
+ 854: b879417b ldsmaxl w25, w27, [x11]
|
|
|
|
|
+ 858: b86470fd lduminl w4, w29, [x7]
|
|
|
|
|
+ 85c: b870615d ldumaxl w16, w29, [x10]
|
2020-12-24 15:35:16 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
static const unsigned int insns[] =
|
|
|
|
|
{
|
2021-08-13 14:54:30 +08:00
|
|
|
- 0x8b50798f, 0xcb4381e1, 0xab05372d, 0xeb864796,
|
|
|
|
|
- 0x0b961920, 0x4b195473, 0x2b0b5264, 0x6b9300f8,
|
|
|
|
|
- 0x8a0bc0fe, 0xaa0f3118, 0xca170531, 0xea44dd6e,
|
|
|
|
|
- 0x0a4c44f3, 0x2a8b7373, 0x4a567c7e, 0x6a9c0353,
|
|
|
|
|
- 0x8a3accdd, 0xaa318f7a, 0xca2e1495, 0xeaa015e2,
|
|
|
|
|
- 0x0a2274e2, 0x2a751598, 0x4a3309fe, 0x6ab172fe,
|
|
|
|
|
- 0x110a5284, 0x310b1942, 0x5103d353, 0x710125bc,
|
|
|
|
|
- 0x910d7bc2, 0xb108fa1b, 0xd1093536, 0xf10ae824,
|
|
|
|
|
- 0x120e667c, 0x321f6cbb, 0x520f6a9e, 0x72136f56,
|
|
|
|
|
- 0x927e4ce5, 0xb278b4ed, 0xd24c6527, 0xf2485803,
|
|
|
|
|
+ 0x8b4db437, 0xcb8ce3c8, 0xab0edafb, 0xeb5499f5,
|
|
|
|
|
+ 0x0b040e39, 0x4b89503d, 0x2b89274a, 0x6b870fd5,
|
|
|
|
|
+ 0x8a4b1109, 0xaa810643, 0xca026e8a, 0xea8b7d2c,
|
|
|
|
|
+ 0x0a9e6934, 0x2a9a4555, 0x4a871d00, 0x6a084973,
|
|
|
|
|
+ 0x8a23d497, 0xaa3360c9, 0xca7ad8cc, 0xea2c3a76,
|
|
|
|
|
+ 0x0a362dbd, 0x2ab417d1, 0x4a2b23a1, 0x6a667684,
|
|
|
|
|
+ 0x1107e0de, 0x310ebd13, 0x5105b55d, 0x71047104,
|
|
|
|
|
+ 0x910ef9c3, 0xb1029e96, 0xd10b55fb, 0xf10ecf98,
|
|
|
|
|
+ 0x12099f39, 0x321b3f4d, 0x520309b5, 0x72134062,
|
|
|
|
|
+ 0x92004548, 0xb24d861b, 0xd219587b, 0xf25eaee4,
|
|
|
|
|
0x14000000, 0x17ffffd7, 0x140001ee, 0x94000000,
|
|
|
|
|
- 0x97ffffd4, 0x940001eb, 0x34000010, 0x34fffa30,
|
|
|
|
|
- 0x34003d10, 0x35000013, 0x35fff9d3, 0x35003cb3,
|
|
|
|
|
- 0xb4000005, 0xb4fff965, 0xb4003c45, 0xb5000004,
|
|
|
|
|
- 0xb5fff904, 0xb5003be4, 0x1000001b, 0x10fff8bb,
|
|
|
|
|
- 0x10003b9b, 0x90000010, 0x3640001c, 0x3647f83c,
|
|
|
|
|
- 0x36403b1c, 0x37080001, 0x370ff7c1, 0x37083aa1,
|
|
|
|
|
- 0x12a437f4, 0x528c9d67, 0x72838bb1, 0x92c1062e,
|
|
|
|
|
- 0xd287da49, 0xf2a6d153, 0x93465ac9, 0x330b0013,
|
|
|
|
|
- 0x530b4e6a, 0x934545e4, 0xb35370a3, 0xd3510b8c,
|
|
|
|
|
- 0x13960c0f, 0x93ceddc6, 0x54000000, 0x54fff5a0,
|
|
|
|
|
+ 0x97ffffd4, 0x940001eb, 0x34000003, 0x34fffa23,
|
|
|
|
|
+ 0x34003d03, 0x35000002, 0x35fff9c2, 0x35003ca2,
|
|
|
|
|
+ 0xb4000019, 0xb4fff979, 0xb4003c59, 0xb5000012,
|
|
|
|
|
+ 0xb5fff912, 0xb5003bf2, 0x10000008, 0x10fff8a8,
|
|
|
|
|
+ 0x10003b88, 0x9000000f, 0x36700012, 0x3677f832,
|
|
|
|
|
+ 0x36703b12, 0x37780019, 0x377ff7d9, 0x37783ab9,
|
|
|
|
|
+ 0x12a203d2, 0x5286b21e, 0x72a66d35, 0x92eded92,
|
|
|
|
|
+ 0xd2eefecd, 0xf2ef69a3, 0x93400c2a, 0x330562cc,
|
|
|
|
|
+ 0x530b2071, 0x934b3860, 0xb3473cdc, 0xd3416549,
|
|
|
|
|
+ 0x13995f75, 0x93d6462e, 0x54000000, 0x54fff5a0,
|
|
|
|
|
0x54003880, 0x54000001, 0x54fff541, 0x54003821,
|
|
|
|
|
0x54000002, 0x54fff4e2, 0x540037c2, 0x54000002,
|
|
|
|
|
0x54fff482, 0x54003762, 0x54000003, 0x54fff423,
|
|
|
|
|
@@ -1336,77 +1336,77 @@ Disassembly of section .text:
|
|
|
|
|
0x5400000c, 0x54fff06c, 0x5400334c, 0x5400000d,
|
|
|
|
|
0x54fff00d, 0x540032ed, 0x5400000e, 0x54ffefae,
|
|
|
|
|
0x5400328e, 0x5400000f, 0x54ffef4f, 0x5400322f,
|
|
|
|
|
- 0xd40ac601, 0xd40042a2, 0xd404dac3, 0xd4224d40,
|
|
|
|
|
- 0xd44219c0, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
|
|
|
|
|
- 0xd5033fdf, 0xd503339f, 0xd50335bf, 0xd61f0280,
|
|
|
|
|
- 0xd63f0040, 0xc8127c17, 0xc81efec5, 0xc85f7d05,
|
|
|
|
|
- 0xc85ffe14, 0xc89ffd66, 0xc8dfff66, 0x880a7cb1,
|
|
|
|
|
- 0x8816fd89, 0x885f7d1b, 0x885ffc57, 0x889fffba,
|
|
|
|
|
- 0x88dffd4d, 0x48197f7c, 0x481dfd96, 0x485f7f96,
|
|
|
|
|
- 0x485fffc3, 0x489ffdf8, 0x48dfff5b, 0x080b7e6a,
|
|
|
|
|
- 0x0817fedb, 0x085f7e18, 0x085ffc38, 0x089fffa5,
|
|
|
|
|
- 0x08dffe18, 0xc87f6239, 0xc87fb276, 0xc820573a,
|
|
|
|
|
- 0xc821aca6, 0x887f388d, 0x887f88d1, 0x882f2643,
|
|
|
|
|
- 0x88329131, 0xf81cf2b7, 0xb803f055, 0x39002f9b,
|
|
|
|
|
- 0x781f31fd, 0xf85d33ce, 0xb843539d, 0x39401f54,
|
|
|
|
|
- 0x785ce059, 0x389f1143, 0x788131ee, 0x78dfb17d,
|
|
|
|
|
- 0xb89b90af, 0xfc403193, 0xbc42a36c, 0xfc07d396,
|
|
|
|
|
- 0xbc1ec1f8, 0xf81e8f88, 0xb8025de6, 0x38007c27,
|
|
|
|
|
- 0x7801ee20, 0xf8454fb9, 0xb85cce9a, 0x385e7fba,
|
|
|
|
|
- 0x7841af24, 0x389ebd1c, 0x789fadd1, 0x78c0aefc,
|
|
|
|
|
- 0xb89c0f7e, 0xfc50efd4, 0xbc414f71, 0xfc011c67,
|
|
|
|
|
- 0xbc1f0d6d, 0xf81c3526, 0xb81e34b0, 0x3800f7bd,
|
|
|
|
|
- 0x78012684, 0xf842e653, 0xb8417456, 0x385e2467,
|
|
|
|
|
- 0x785e358b, 0x389e34c8, 0x788046f8, 0x78c00611,
|
|
|
|
|
- 0xb89f8680, 0xfc582454, 0xbc5987d3, 0xfc076624,
|
|
|
|
|
- 0xbc190675, 0xf833785a, 0xb82fd809, 0x3821799a,
|
|
|
|
|
- 0x782a7975, 0xf870eaf0, 0xb871d96a, 0x386b7aed,
|
|
|
|
|
- 0x7875689b, 0x38afd91a, 0x78a2c955, 0x78ee6bc8,
|
|
|
|
|
- 0xb8b4f9dd, 0xfc76eb7e, 0xbc76692d, 0xfc31db28,
|
|
|
|
|
- 0xbc255b01, 0xf91c52aa, 0xb91c3fb2, 0x391f8877,
|
|
|
|
|
- 0x791ac97c, 0xf95c1758, 0xb95b3c55, 0x395ce0a4,
|
|
|
|
|
- 0x795851ce, 0x399e9f64, 0x79993764, 0x79d9af8a,
|
|
|
|
|
- 0xb99eea2a, 0xfd5a2f8d, 0xbd5dac78, 0xfd1e0182,
|
|
|
|
|
- 0xbd195c31, 0x58000010, 0x1800000d, 0xf8981240,
|
|
|
|
|
- 0xd8ffdf00, 0xf8a27a80, 0xf99af920, 0x1a0202e8,
|
|
|
|
|
- 0x3a130078, 0x5a1d0316, 0x7a03036c, 0x9a0102eb,
|
|
|
|
|
- 0xba1700bd, 0xda0c0329, 0xfa16000c, 0x0b23459a,
|
|
|
|
|
- 0x2b328a14, 0xcb274bde, 0x6b222eab, 0x8b214b42,
|
|
|
|
|
- 0xab34a7b2, 0xcb24520e, 0xeb378e20, 0x3a565283,
|
|
|
|
|
- 0x7a420321, 0xba58c247, 0xfa4d5106, 0x3a426924,
|
|
|
|
|
- 0x7a5b0847, 0xba413a02, 0xfa5fba23, 0x1a979377,
|
|
|
|
|
- 0x1a86640a, 0x5a89300b, 0x5a923771, 0x9a8b720c,
|
|
|
|
|
- 0x9a868786, 0xda9a736d, 0xda9256dd, 0x5ac0026c,
|
|
|
|
|
- 0x5ac00657, 0x5ac00b89, 0x5ac01262, 0x5ac017b9,
|
|
|
|
|
- 0xdac002e4, 0xdac0065d, 0xdac00907, 0xdac00e2d,
|
|
|
|
|
- 0xdac01011, 0xdac01752, 0x1ad0098b, 0x1ac70d24,
|
|
|
|
|
- 0x1ad020ec, 0x1ad72613, 0x1ac62887, 0x1ad72e95,
|
|
|
|
|
- 0x9adc0990, 0x9acd0d84, 0x9ac721a9, 0x9acf277c,
|
|
|
|
|
- 0x9ace2bd4, 0x9ade2e4e, 0x9bc77d63, 0x9b587e97,
|
|
|
|
|
- 0x1b1524a2, 0x1b04a318, 0x9b0f4d8b, 0x9b0ce73d,
|
|
|
|
|
- 0x9b2c5971, 0x9b34c87c, 0x9bbc6887, 0x9bb19556,
|
|
|
|
|
- 0x1e310871, 0x1e261a2b, 0x1e2928fd, 0x1e333987,
|
|
|
|
|
- 0x1e230ae0, 0x1e75087a, 0x1e651a60, 0x1e692b40,
|
|
|
|
|
- 0x1e753ab9, 0x1e7309b0, 0x1f00425d, 0x1f1d95b7,
|
|
|
|
|
- 0x1f2a38e9, 0x1f2f5f99, 0x1f5545a6, 0x1f429ea3,
|
|
|
|
|
- 0x1f65472a, 0x1f7449ce, 0x1e20404f, 0x1e20c0f2,
|
|
|
|
|
- 0x1e2140c3, 0x1e21c02c, 0x1e22c009, 0x1e6040a4,
|
|
|
|
|
- 0x1e60c1e3, 0x1e614331, 0x1e61c30c, 0x1e6240b5,
|
|
|
|
|
- 0x1e3802a4, 0x9e38007b, 0x1e78011d, 0x9e7802a9,
|
|
|
|
|
- 0x1e2203b4, 0x9e220107, 0x1e6202ac, 0x9e6202b0,
|
|
|
|
|
- 0x1e2600b2, 0x9e660119, 0x1e270352, 0x9e670160,
|
|
|
|
|
- 0x1e262200, 0x1e7d2200, 0x1e2023c8, 0x1e602128,
|
|
|
|
|
- 0x293e119b, 0x294a2543, 0x69480c70, 0xa934726a,
|
|
|
|
|
- 0xa97448f3, 0x298243ca, 0x29e21242, 0x69c64db8,
|
|
|
|
|
- 0xa9800311, 0xa9f4686e, 0x288a0416, 0x28fe2812,
|
|
|
|
|
- 0x68fe62d8, 0xa885308c, 0xa8f12664, 0x282468d2,
|
|
|
|
|
- 0x284e5035, 0xa8327699, 0xa84716e1, 0x0c407284,
|
|
|
|
|
- 0x4cdfa158, 0x0ccf6cd8, 0x4cdf2483, 0x0d40c0c2,
|
|
|
|
|
- 0x4ddfc9cd, 0x0dd8ceaf, 0x4c408ea9, 0x0cdf86bd,
|
|
|
|
|
- 0x4d60c1c8, 0x0dffca87, 0x4de3cc7c, 0x4cdd497b,
|
|
|
|
|
- 0x0c404950, 0x4d40e595, 0x4ddfeba4, 0x0dd3ed38,
|
|
|
|
|
- 0x4cdf046a, 0x0cc9039b, 0x0d60e3d5, 0x0dffe5d7,
|
|
|
|
|
- 0x0df4e9a4, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
|
|
|
|
|
+ 0xd40f9ca1, 0xd4008b22, 0xd40be1c3, 0xd423d0e0,
|
|
|
|
|
+ 0xd44dee20, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
|
|
|
|
|
+ 0xd5033fdf, 0xd503359f, 0xd50337bf, 0xd61f0380,
|
|
|
|
|
+ 0xd63f0220, 0xc8127f47, 0xc819fccc, 0xc85f7e00,
|
|
|
|
|
+ 0xc85ffc66, 0xc89ffc2e, 0xc8dfff1d, 0x881c7eef,
|
|
|
|
|
+ 0x8809fc67, 0x885f7e81, 0x885ffdf4, 0x889ffd35,
|
|
|
|
|
+ 0x88dffe25, 0x480d7fd4, 0x480afe4c, 0x485f7e64,
|
|
|
|
|
+ 0x485ffd56, 0x489ffdfe, 0x48dfff04, 0x080a7d94,
|
|
|
|
|
+ 0x0814fd7d, 0x085f7cb5, 0x085ffd24, 0x089fff9e,
|
|
|
|
|
+ 0x08dfff13, 0xc87f424b, 0xc87f9de8, 0xc83c4154,
|
|
|
|
|
+ 0xc827d469, 0x887f1a79, 0x887fa45e, 0x88305180,
|
|
|
|
|
+ 0x88259f82, 0xf81b5270, 0xb801e381, 0x381e61bc,
|
|
|
|
|
+ 0x781cd0c8, 0xf851d380, 0xb85e615c, 0x39403164,
|
|
|
|
|
+ 0x78405221, 0x3980312b, 0x789ef108, 0x78ddd1b4,
|
|
|
|
|
+ 0xb8831137, 0xfc41d089, 0xbd402a6b, 0xfc1d5299,
|
|
|
|
|
+ 0xbc1b0039, 0xf8019c14, 0xb81cfd8c, 0x381f6e7c,
|
|
|
|
|
+ 0x781c1f8d, 0xf85d2eeb, 0xb8411f1b, 0x385f4f4e,
|
|
|
|
|
+ 0x785d3ed8, 0x389f5d39, 0x7881dcc5, 0x78dffee7,
|
|
|
|
|
+ 0xb89c3dba, 0xfc50bf18, 0xbc5c9f34, 0xfc135c49,
|
|
|
|
|
+ 0xbc1c5c2e, 0xf806d433, 0xb81ca4a4, 0x3800947d,
|
|
|
|
|
+ 0x781ce420, 0xf85d04c2, 0xb858d4cf, 0x385e5444,
|
|
|
|
|
+ 0x785eb751, 0x389f3715, 0x789d04d6, 0x78dd04cb,
|
|
|
|
|
+ 0xb89fb7ce, 0xfc5975e2, 0xbc5a5679, 0xfc1416ed,
|
|
|
|
|
+ 0xbc0006b6, 0xf832c996, 0xb82c4b7e, 0x38367887,
|
|
|
|
|
+ 0x783dfaf3, 0xf87bf891, 0xb871c9a1, 0x387dfb70,
|
|
|
|
|
+ 0x78645939, 0x38b67984, 0x78a55839, 0x78fc6a09,
|
|
|
|
|
+ 0xb8aee8e8, 0xfc705b84, 0xbc7bd850, 0xfc396817,
|
|
|
|
|
+ 0xbc277a06, 0xf91ddd82, 0xb91b10a8, 0x391f8221,
|
|
|
|
|
+ 0x79197728, 0xf95ca07c, 0xb95b5d75, 0x395dc8af,
|
|
|
|
|
+ 0x795caa60, 0x399dd53d, 0x799c7397, 0x79dcb15b,
|
|
|
|
|
+ 0xb99e3b75, 0xfd5c7f7a, 0xbd5d2882, 0xfd1fb2a1,
|
|
|
|
|
+ 0xbd1d82c4, 0x58000001, 0x1800001b, 0xf882d080,
|
|
|
|
|
+ 0xd8000000, 0xf8a0cbc0, 0xf99fab00, 0x1a1803a0,
|
|
|
|
|
+ 0x3a120396, 0x5a1e0217, 0x7a0e03a7, 0x9a0e0196,
|
|
|
|
|
+ 0xba17031d, 0xda160391, 0xfa130298, 0x0b26cadb,
|
|
|
|
|
+ 0x2b38516d, 0xcb242d10, 0x6b34ea55, 0x8b3d0a2e,
|
|
|
|
|
+ 0xab2eb231, 0xcb3ac476, 0xeb3531ad, 0x3a5a722f,
|
|
|
|
|
+ 0x7a463325, 0xba5e9021, 0xfa47a222, 0x3a590a26,
|
|
|
|
|
+ 0x7a450845, 0xba514a6a, 0xfa48c9c3, 0x1a8e9109,
|
|
|
|
|
+ 0x1a85d57b, 0x5a9632eb, 0x5a9b2793, 0x9a815130,
|
|
|
|
|
+ 0x9a8c05dc, 0xda8e5096, 0xda9b257a, 0x5ac00178,
|
|
|
|
|
+ 0x5ac005ca, 0x5ac008a9, 0x5ac01292, 0x5ac01519,
|
|
|
|
|
+ 0xdac00316, 0xdac0077c, 0xdac00ba8, 0xdac00d51,
|
|
|
|
|
+ 0xdac01177, 0xdac015da, 0x1adc0895, 0x1ad60d5e,
|
|
|
|
|
+ 0x1ada205d, 0x1aca26dc, 0x1acc2b0b, 0x1ad02fd5,
|
|
|
|
|
+ 0x9acd0801, 0x9ac60e22, 0x9ad5230a, 0x9ac62525,
|
|
|
|
|
+ 0x9ac42b60, 0x9ac22c9c, 0x9bc77fc1, 0x9b4a7cbe,
|
|
|
|
|
+ 0x1b0d45e7, 0x1b0cf039, 0x9b1e2562, 0x9b03dae5,
|
|
|
|
|
+ 0x9b291159, 0x9b27c905, 0x9bba64b8, 0x9bbaf02e,
|
|
|
|
|
+ 0x1e280ad8, 0x1e261870, 0x1e392ab0, 0x1e3b3b40,
|
|
|
|
|
+ 0x1e310878, 0x1e660909, 0x1e7e1a76, 0x1e632a2e,
|
|
|
|
|
+ 0x1e743b78, 0x1e76082c, 0x1f0b7510, 0x1f128676,
|
|
|
|
|
+ 0x1f38270f, 0x1f2d5e7b, 0x1f503003, 0x1f52a873,
|
|
|
|
|
+ 0x1f6b5041, 0x1f79392c, 0x1e2042e0, 0x1e20c0d7,
|
|
|
|
|
+ 0x1e214084, 0x1e21c385, 0x1e22c1f5, 0x1e6040ab,
|
|
|
|
|
+ 0x1e60c092, 0x1e61418b, 0x1e61c10f, 0x1e624048,
|
|
|
|
|
+ 0x1e380253, 0x9e380011, 0x1e7801a0, 0x9e780136,
|
|
|
|
|
+ 0x1e2203a6, 0x9e2201cc, 0x1e6202d0, 0x9e6200ae,
|
|
|
|
|
+ 0x1e260007, 0x9e6600dc, 0x1e270342, 0x9e670004,
|
|
|
|
|
+ 0x1e2b2020, 0x1e7520c0, 0x1e202208, 0x1e6022c8,
|
|
|
|
|
+ 0x290c0045, 0x2978766e, 0x696c0c6f, 0xa9323767,
|
|
|
|
|
+ 0xa9483831, 0x29905895, 0x29f43451, 0x69ee66f5,
|
|
|
|
|
+ 0xa9bf41e4, 0xa9f6573d, 0x288a4758, 0x28e27bc3,
|
|
|
|
|
+ 0x68fc4fc3, 0xa8b70779, 0xa8fc539a, 0x283a653d,
|
|
|
|
|
+ 0x28703a79, 0xa8025879, 0xa8734ba9, 0x0c407275,
|
|
|
|
|
+ 0x4cdfa29b, 0x0cc66ec5, 0x4cdf2596, 0x0d40c131,
|
|
|
|
|
+ 0x4ddfcaa5, 0x0dd2cf8a, 0x4c408dfa, 0x0cdf8750,
|
|
|
|
|
+ 0x4d60c04e, 0x0dffcb92, 0x4df6cc13, 0x4cd24850,
|
|
|
|
|
+ 0x0c404818, 0x4d40e604, 0x4ddfe825, 0x0dd0ed47,
|
|
|
|
|
+ 0x4cdf0696, 0x0cd9008f, 0x0d60e0a0, 0x0dffe420,
|
|
|
|
|
+ 0x0deeeb9e, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
|
|
|
|
|
0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,
|
|
|
|
|
0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1,
|
|
|
|
|
0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f,
|
|
|
|
|
@@ -1418,24 +1418,24 @@ Disassembly of section .text:
|
|
|
|
|
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
|
|
|
|
|
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
|
|
|
|
|
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
|
|
|
|
|
- 0xf8358305, 0xf82d01ed, 0xf8361353, 0xf839234a,
|
|
|
|
|
- 0xf82531fb, 0xf8335165, 0xf83a4080, 0xf83673d7,
|
|
|
|
|
- 0xf832611c, 0xf8ad837d, 0xf8ab01a5, 0xf8a112b8,
|
|
|
|
|
- 0xf8bb2311, 0xf8b230be, 0xf8a75336, 0xf8a4427a,
|
|
|
|
|
- 0xf8a6707e, 0xf8b860b7, 0xf8f88392, 0xf8f300ff,
|
|
|
|
|
- 0xf8ed1386, 0xf8e822af, 0xf8e2302d, 0xf8f1533d,
|
|
|
|
|
- 0xf8f941d2, 0xf8ff7366, 0xf8f061e5, 0xf86b8072,
|
|
|
|
|
- 0xf87a0054, 0xf86b1164, 0xf87e22f3, 0xf86331cf,
|
|
|
|
|
- 0xf87e5296, 0xf8674305, 0xf87771f0, 0xf86b6013,
|
|
|
|
|
- 0xb83c803c, 0xb82b0195, 0xb83d1240, 0xb8252320,
|
|
|
|
|
- 0xb82e3340, 0xb83c53b2, 0xb82f43a1, 0xb828739a,
|
|
|
|
|
- 0xb831608e, 0xb8b88039, 0xb8aa0231, 0xb8bd12b4,
|
|
|
|
|
- 0xb8bd2189, 0xb8ab30a6, 0xb8b552a7, 0xb8aa4197,
|
|
|
|
|
- 0xb8b57145, 0xb8be6254, 0xb8ed80b7, 0xb8ef00b8,
|
|
|
|
|
- 0xb8e9132a, 0xb8f42231, 0xb8ec33d2, 0xb8e35323,
|
|
|
|
|
- 0xb8fa4159, 0xb8e273eb, 0xb8e760a2, 0xb8608287,
|
|
|
|
|
- 0xb865005f, 0xb87b1379, 0xb87e2358, 0xb86f32c2,
|
|
|
|
|
- 0xb86053e3, 0xb86f4154, 0xb87671d5, 0xb866605e,
|
|
|
|
|
+ 0xf83a8229, 0xf83c0057, 0xf8361062, 0xf82b23d9,
|
|
|
|
|
+ 0xf836309c, 0xf826530b, 0xf82c43ff, 0xf837713e,
|
|
|
|
|
+ 0xf8266281, 0xf8b182c2, 0xf8ae015b, 0xf8a6127e,
|
|
|
|
|
+ 0xf8a02179, 0xf8b733c0, 0xf8b55143, 0xf8af4016,
|
|
|
|
|
+ 0xf8b17280, 0xf8b0602d, 0xf8fb82ef, 0xf8f3003e,
|
|
|
|
|
+ 0xf8ef12fc, 0xf8e7226f, 0xf8eb314c, 0xf8e65187,
|
|
|
|
|
+ 0xf8fc41a5, 0xf8e97234, 0xf8f56179, 0xf8738318,
|
|
|
|
|
+ 0xf86803da, 0xf8711112, 0xf8622063, 0xf87a3207,
|
|
|
|
|
+ 0xf87b50a6, 0xf8764280, 0xf86b705a, 0xf87e609d,
|
|
|
|
|
+ 0xb82480e5, 0xb82a005a, 0xb83b1370, 0xb83f2157,
|
|
|
|
|
+ 0xb82431a2, 0xb823506f, 0xb82340ca, 0xb828714b,
|
|
|
|
|
+ 0xb83d61be, 0xb8ab8291, 0xb8ba00d0, 0xb8b5102a,
|
|
|
|
|
+ 0xb8bd22ec, 0xb8bd3108, 0xb8ab51ca, 0xb8a442cd,
|
|
|
|
|
+ 0xb8a770ed, 0xb8ae63e0, 0xb8f18382, 0xb8f3014b,
|
|
|
|
|
+ 0xb8ec1293, 0xb8e02108, 0xb8f13303, 0xb8f950e5,
|
|
|
|
|
+ 0xb8f0413e, 0xb8ea71df, 0xb8f16173, 0xb87481a1,
|
|
|
|
|
+ 0xb87a028b, 0xb87213d8, 0xb86c2299, 0xb86e30bd,
|
|
|
|
|
+ 0xb862537a, 0xb879417b, 0xb86470fd, 0xb870615d,
|
|
|
|
|
|
2020-12-24 15:35:16 +08:00
|
|
|
};
|
|
|
|
|
// END Generated code -- do not edit
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 0824ca393..dc2d5e2c9 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
|
|
|
|
|
@@ -139,6 +139,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21);
|
|
|
|
|
// Java stack pointer
|
|
|
|
|
REGISTER_DECLARATION(Register, esp, r20);
|
|
|
|
|
|
|
|
|
|
+// Preserved predicate register with all elements set TRUE.
|
|
|
|
|
+REGISTER_DECLARATION(PRegister, ptrue, p7);
|
|
|
|
|
+
|
|
|
|
|
#define assert_cond(ARG1) assert(ARG1, #ARG1)
|
|
|
|
|
|
|
|
|
|
namespace asm_util {
|
|
|
|
|
@@ -273,6 +276,14 @@ public:
|
|
|
|
|
f(r->encoding_nocheck(), lsb + 4, lsb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ void prf(PRegister r, int lsb) {
|
|
|
|
|
+ f(r->encoding_nocheck(), lsb + 3, lsb);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ void pgrf(PRegister r, int lsb) {
|
|
|
|
|
+ f(r->encoding_nocheck(), lsb + 2, lsb);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
unsigned get(int msb = 31, int lsb = 0) {
|
|
|
|
|
int nbits = msb - lsb + 1;
|
2021-05-14 11:51:17 +08:00
|
|
|
unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb;
|
2020-12-24 15:35:16 +08:00
|
|
|
@@ -554,6 +565,18 @@ class Address {
|
|
|
|
|
void lea(MacroAssembler *, Register) const;
|
|
|
|
|
|
2021-05-14 11:51:17 +08:00
|
|
|
static bool offset_ok_for_immed(int64_t offset, uint shift = 0);
|
2020-12-24 15:35:16 +08:00
|
|
|
+
|
|
|
|
|
+ static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) {
|
|
|
|
|
+ if (offset % vl == 0) {
|
|
|
|
|
+ // Convert address offset into sve imm offset (MUL VL).
|
|
|
|
|
+ int sve_offset = offset / vl;
|
|
|
|
|
+ if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
|
|
|
|
|
+ // sve_offset can be encoded
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Convience classes
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -669,6 +692,12 @@ public:
|
2020-12-24 15:35:16 +08:00
|
|
|
void rf(FloatRegister reg, int lsb) {
|
|
|
|
|
current->rf(reg, lsb);
|
|
|
|
|
}
|
|
|
|
|
+ void prf(PRegister reg, int lsb) {
|
|
|
|
|
+ current->prf(reg, lsb);
|
|
|
|
|
+ }
|
|
|
|
|
+ void pgrf(PRegister reg, int lsb) {
|
|
|
|
|
+ current->pgrf(reg, lsb);
|
|
|
|
|
+ }
|
|
|
|
|
void fixed(unsigned value, unsigned mask) {
|
|
|
|
|
current->fixed(value, mask);
|
|
|
|
|
}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2431,13 +2460,18 @@ public:
|
2020-12-24 15:35:16 +08:00
|
|
|
f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
- void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {
|
|
|
|
|
- starti;
|
|
|
|
|
- f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21);
|
|
|
|
|
- f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10);
|
|
|
|
|
- rf(Vn, 5), rf(Rd, 0);
|
|
|
|
|
+#define INSN(NAME, op) \
|
|
|
|
|
+ void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); \
|
|
|
|
|
+ f(((idx<<1)|1)<<(int)T, 20, 16), f(op, 15, 10); \
|
|
|
|
|
+ rf(Vn, 5), rf(Rd, 0); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ INSN(umov, 0b001111);
|
|
|
|
|
+ INSN(smov, 0b001011);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
#define INSN(NAME, opc, opc2, isSHR) \
|
|
|
|
|
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
|
|
|
|
|
starti; \
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2670,13 +2704,299 @@ public:
|
2020-12-24 15:35:16 +08:00
|
|
|
#undef INSN
|
|
|
|
|
|
|
|
|
|
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
|
|
|
|
|
- {
|
|
|
|
|
+{
|
|
|
|
|
+ starti;
|
|
|
|
|
+ assert(T == T8B || T == T16B, "invalid arrangement");
|
|
|
|
|
+ assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
|
|
|
|
|
+ f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
|
|
|
|
|
+ rf(Vm, 16), f(0, 15), f(index, 14, 11);
|
|
|
|
|
+ f(0, 10), rf(Vn, 5), rf(Vd, 0);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// SVE arithmetics - unpredicated
|
|
|
|
|
+#define INSN(NAME, opcode) \
|
|
|
|
|
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ assert(T != Q, "invalid register variant"); \
|
|
|
|
|
+ f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \
|
|
|
|
|
+ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
|
|
|
|
|
+ }
|
|
|
|
|
+ INSN(sve_add, 0b000);
|
|
|
|
|
+ INSN(sve_sub, 0b001);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+// SVE floating-point arithmetic - unpredicated
|
|
|
|
|
+#define INSN(NAME, opcode) \
|
|
|
|
|
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ assert(T == S || T == D, "invalid register variant"); \
|
|
|
|
|
+ f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \
|
|
|
|
|
+ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_fadd, 0b000);
|
|
|
|
|
+ INSN(sve_fmul, 0b010);
|
|
|
|
|
+ INSN(sve_fsub, 0b001);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+private:
|
|
|
|
|
+ void sve_predicate_reg_insn(unsigned op24, unsigned op13,
|
|
|
|
|
+ FloatRegister Zd_or_Vd, SIMD_RegVariant T,
|
|
|
|
|
+ PRegister Pg, FloatRegister Zn_or_Vn) {
|
|
|
|
|
+ starti;
|
|
|
|
|
+ f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
|
|
|
|
|
+ pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+public:
|
|
|
|
|
+
|
|
|
|
|
+// SVE integer arithmetics - predicate
|
|
|
|
|
+#define INSN(NAME, op1, op2) \
|
|
|
|
|
+ void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \
|
|
|
|
|
+ assert(T != Q, "invalid register variant"); \
|
|
|
|
|
+ sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary
|
|
|
|
|
+ INSN(sve_add, 0b00000100, 0b000000000); // vector add
|
|
|
|
|
+ INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
|
|
|
|
|
+ INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
|
|
|
|
|
+ INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits
|
|
|
|
|
+ INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
|
|
|
|
|
+ INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
|
|
|
|
|
+ INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left
|
|
|
|
|
+ INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right
|
|
|
|
|
+ INSN(sve_mul, 0b00000100, 0b010000000); // vector mul
|
|
|
|
|
+ INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary
|
|
|
|
|
+ INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary
|
|
|
|
|
+ INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar
|
|
|
|
|
+ INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
|
|
|
|
|
+ INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
|
|
|
|
|
+ INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
|
|
|
|
|
+ INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
|
|
|
|
|
+ INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
|
|
|
|
|
+ INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+// SVE floating-point arithmetics - predicate
|
|
|
|
|
+#define INSN(NAME, op1, op2) \
|
|
|
|
|
+ void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
|
|
|
|
|
+ assert(T == S || T == D, "invalid register variant"); \
|
|
|
|
|
+ sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_fabs, 0b00000100, 0b011100101);
|
|
|
|
|
+ INSN(sve_fadd, 0b01100101, 0b000000100);
|
|
|
|
|
+ INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
|
|
|
|
|
+ INSN(sve_fdiv, 0b01100101, 0b001101100);
|
|
|
|
|
+ INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum
|
|
|
|
|
+ INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
|
|
|
|
|
+ INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum
|
|
|
|
|
+ INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
|
|
|
|
|
+ INSN(sve_fmul, 0b01100101, 0b000010100);
|
|
|
|
|
+ INSN(sve_fneg, 0b00000100, 0b011101101);
|
|
|
|
|
+ INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
|
|
|
|
|
+ INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
|
|
|
|
|
+ INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
|
|
|
|
|
+ INSN(sve_fsqrt, 0b01100101, 0b001101101);
|
|
|
|
|
+ INSN(sve_fsub, 0b01100101, 0b000001100);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+ // SVE multiple-add/sub - predicated
|
|
|
|
|
+#define INSN(NAME, op0, op1, op2) \
|
|
|
|
|
+ void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ assert(T != Q, "invalid size"); \
|
|
|
|
|
+ f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \
|
|
|
|
|
+ f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm
|
|
|
|
|
+ INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
|
|
|
|
|
+ INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
|
|
|
|
|
+ INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
|
|
|
|
|
+ INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
|
|
|
|
|
+ INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+// SVE bitwise logical - unpredicated
|
|
|
|
|
+#define INSN(NAME, opc) \
|
|
|
|
|
+ void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \
|
|
|
|
|
+ rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \
|
|
|
|
|
+ }
|
|
|
|
|
+ INSN(sve_and, 0b00);
|
|
|
|
|
+ INSN(sve_eor, 0b10);
|
|
|
|
|
+ INSN(sve_orr, 0b01);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+// SVE shift immediate - unpredicated
|
|
|
|
|
+#define INSN(NAME, opc, isSHR) \
|
|
|
|
|
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ /* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \
|
|
|
|
|
+ * for shift right is calculated as: \
|
|
|
|
|
+ * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \
|
|
|
|
|
+ * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \
|
|
|
|
|
+ * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \
|
|
|
|
|
+ * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \
|
|
|
|
|
+ * for shift left is calculated as: \
|
|
|
|
|
+ * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \
|
|
|
|
|
+ * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \
|
|
|
|
|
+ * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \
|
|
|
|
|
+ * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \
|
|
|
|
|
+ */ \
|
|
|
|
|
+ assert(T != Q, "Invalid register variant"); \
|
|
|
|
|
+ if (isSHR) { \
|
|
|
|
|
+ assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \
|
|
|
|
|
+ } else { \
|
|
|
|
|
+ assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \
|
|
|
|
|
+ } \
|
|
|
|
|
+ int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \
|
|
|
|
|
+ int encodedShift = isSHR ? cVal - shift : cVal + shift; \
|
|
|
|
|
+ int tszh = encodedShift >> 5; \
|
|
|
|
|
+ int tszl_imm = encodedShift & 0x1f; \
|
|
|
|
|
+ f(0b00000100, 31, 24); \
|
|
|
|
|
+ f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \
|
|
|
|
|
+ f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_asr, 0b100, /* isSHR = */ true);
|
|
|
|
|
+ INSN(sve_lsl, 0b111, /* isSHR = */ false);
|
|
|
|
|
+ INSN(sve_lsr, 0b101, /* isSHR = */ true);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+private:
|
|
|
|
|
+
|
|
|
|
|
+ // Scalar base + immediate index
|
|
|
|
|
+ void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
|
|
|
|
|
+ SIMD_RegVariant T, int op1, int type, int op2) {
|
|
|
|
|
+ starti;
|
|
|
|
|
+ assert_cond(T >= type);
|
|
|
|
|
+ f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
|
|
|
|
|
+ f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
|
|
|
|
|
+ pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Scalar base + scalar index
|
|
|
|
|
+ void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
|
|
|
|
|
+ SIMD_RegVariant T, int op1, int type, int op2) {
|
|
|
|
|
+ starti;
|
|
|
|
|
+ assert_cond(T >= type);
|
|
|
|
|
+ f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
|
|
|
|
|
+ rf(Xm, 16), f(op2, 15, 13);
|
|
|
|
|
+ pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ void sve_ld_st1(FloatRegister Zt, PRegister Pg,
|
|
|
|
|
+ SIMD_RegVariant T, const Address &a,
|
|
|
|
|
+ int op1, int type, int imm_op2, int scalar_op2) {
|
|
|
|
|
+ switch (a.getMode()) {
|
|
|
|
|
+ case Address::base_plus_offset:
|
|
|
|
|
+ sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
|
|
|
|
|
+ break;
|
|
|
|
|
+ case Address::base_plus_offset_reg:
|
|
|
|
|
+ sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
|
|
|
|
|
+ break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ ShouldNotReachHere();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+public:
|
|
|
|
|
+
|
|
|
|
|
+// SVE load/store - predicated
|
|
|
|
|
+#define INSN(NAME, op1, type, imm_op2, scalar_op2) \
|
|
|
|
|
+ void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \
|
|
|
|
|
+ assert(T != Q, "invalid register variant"); \
|
|
|
|
|
+ sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
|
|
|
|
|
+ INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
|
|
|
|
|
+ INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
|
|
|
|
|
+ INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
|
|
|
|
|
+ INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
|
|
|
|
|
+ INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
|
|
|
|
|
+ INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
|
|
|
|
|
+ INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+// SVE load/store - unpredicated
|
|
|
|
|
+#define INSN(NAME, op1) \
|
|
|
|
|
+ void NAME(FloatRegister Zt, const Address &a) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ assert(a.index() == noreg, "invalid address variant"); \
|
|
|
|
|
+ f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \
|
|
|
|
|
+ f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_ldr, 0b100); // LDR (vector)
|
|
|
|
|
+ INSN(sve_str, 0b111); // STR (vector)
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+#define INSN(NAME, op) \
|
|
|
|
|
+ void NAME(Register Xd, Register Xn, int imm6) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ f(0b000001000, 31, 23), f(op, 22, 21); \
|
|
|
|
|
+ srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_addvl, 0b01);
|
|
|
|
|
+ INSN(sve_addpl, 0b11);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
|
|
|
|
+// SVE inc/dec register by element count
|
|
|
|
|
+#define INSN(NAME, op) \
|
|
|
|
|
+ void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
|
|
|
|
|
+ starti; \
|
|
|
|
|
+ assert(T != Q, "invalid size"); \
|
|
|
|
|
+ f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); \
|
|
|
|
|
+ f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0); \
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ INSN(sve_inc, 0);
|
|
|
|
|
+ INSN(sve_dec, 1);
|
|
|
|
|
+#undef INSN
|
|
|
|
|
+
|
2021-08-13 14:54:30 +08:00
|
|
|
+// SVE predicate count
|
2020-12-24 15:35:16 +08:00
|
|
|
+ void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
|
|
|
|
|
+ starti;
|
|
|
|
|
+ assert(T != Q, "invalid size");
|
|
|
|
|
+ f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
|
|
|
|
|
+ prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // SVE dup scalar
|
|
|
|
|
+ void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
|
|
|
|
|
+ starti;
|
|
|
|
|
+ assert(T != Q, "invalid size");
|
|
|
|
|
+ f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
|
|
|
|
|
+ srf(Rn, 5), rf(Zd, 0);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // SVE dup imm
|
|
|
|
|
+ void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
|
|
|
|
|
+ starti;
|
|
|
|
|
+ assert(T != Q, "invalid size");
|
|
|
|
|
+ int sh = 0;
|
|
|
|
|
+ if (imm8 <= 127 && imm8 >= -128) {
|
|
|
|
|
+ sh = 0;
|
|
|
|
|
+ } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
|
|
|
|
|
+ sh = 1;
|
|
|
|
|
+ imm8 = (imm8 >> 8);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ guarantee(false, "invalid immediate");
|
|
|
|
|
+ }
|
|
|
|
|
+ f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
|
|
|
|
|
+ f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
|
|
|
|
|
starti;
|
|
|
|
|
- assert(T == T8B || T == T16B, "invalid arrangement");
|
|
|
|
|
- assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
|
|
|
|
|
- f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
|
|
|
|
|
- rf(Vm, 16), f(0, 15), f(index, 14, 11);
|
|
|
|
|
- f(0, 10), rf(Vn, 5), rf(Vd, 0);
|
|
|
|
|
+ f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
|
|
|
|
|
+ f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
|
|
|
|
|
index 6ac54f257..a258528ea 100644
|
|
|
|
|
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
|
|
|
|
|
@@ -456,8 +456,12 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z
|
|
|
|
|
ZSetupArguments setup_arguments(masm, stub);
|
|
|
|
|
__ mov(rscratch1, stub->slow_path());
|
|
|
|
|
__ blr(rscratch1);
|
|
|
|
|
+ if (UseSVE > 0) {
|
|
|
|
|
+ // Reinitialize the ptrue predicate register, in case the external runtime
|
|
|
|
|
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
|
|
|
|
|
+ __ reinitialize_ptrue();
|
|
|
|
|
+ }
|
|
|
|
|
}
|
|
|
|
|
-
|
|
|
|
|
// Stub exit
|
|
|
|
|
__ b(*stub->continuation());
|
|
|
|
|
}
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
|
|
|
|
|
index 071845e5b..f26ea2a8b 100644
|
|
|
|
|
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
|
|
|
|
|
@@ -112,6 +112,9 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
|
|
|
|
"Avoid generating unaligned memory accesses") \
|
|
|
|
|
product(bool, UseLSE, false, \
|
|
|
|
|
"Use LSE instructions") \
|
|
|
|
|
+ product(uint, UseSVE, 0, \
|
|
|
|
|
+ "Highest supported SVE instruction set version") \
|
|
|
|
|
+ range(0, 2) \
|
|
|
|
|
product(bool, UseBlockZeroing, true, \
|
|
|
|
|
"Use DC ZVA for block zeroing") \
|
|
|
|
|
product(intx, BlockZeroingLowLimit, 256, \
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index aecab30c1..b6b070e62 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -53,6 +53,7 @@
|
|
|
|
|
#include "opto/compile.hpp"
|
|
|
|
|
#include "opto/intrinsicnode.hpp"
|
|
|
|
|
#include "opto/node.hpp"
|
2020-12-24 15:35:16 +08:00
|
|
|
+#include "opto/matcher.hpp"
|
2021-08-13 14:54:30 +08:00
|
|
|
#endif
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
#ifdef PRODUCT
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2110,8 +2110,17 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
|
2020-12-24 15:35:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Push lots of registers in the bit set supplied. Don't push sp.
|
|
|
|
|
-// Return the number of words pushed
|
|
|
|
|
+// Return the number of dwords pushed
|
|
|
|
|
int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
|
|
|
|
|
+ int words_pushed = 0;
|
|
|
|
|
+ bool use_sve = false;
|
|
|
|
|
+ int sve_vector_size_in_bytes = 0;
|
|
|
|
|
+
|
|
|
|
|
+#ifdef COMPILER2
|
|
|
|
|
+ use_sve = Matcher::supports_scalable_vector();
|
|
|
|
|
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
// Scan bitset to accumulate register pairs
|
|
|
|
|
unsigned char regs[32];
|
|
|
|
|
int count = 0;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2126,8 +2135,18 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // SVE
|
|
|
|
|
+ if (use_sve && sve_vector_size_in_bytes > 16) {
|
|
|
|
|
+ sub(stack, stack, sve_vector_size_in_bytes * count);
|
|
|
|
|
+ for (int i = 0; i < count; i++) {
|
|
|
|
|
+ sve_str(as_FloatRegister(regs[i]), Address(stack, i));
|
|
|
|
|
+ }
|
|
|
|
|
+ return count * sve_vector_size_in_bytes / 8;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
add(stack, stack, -count * wordSize * 2);
|
|
|
|
|
|
|
|
|
|
+ // NEON
|
|
|
|
|
if (count & 1) {
|
|
|
|
|
strq(as_FloatRegister(regs[0]), Address(stack));
|
|
|
|
|
i += 1;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2140,7 +2159,16 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+// Return the number of dwords poped
|
|
|
|
|
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
|
|
|
|
|
+ int words_pushed = 0;
|
|
|
|
|
+ bool use_sve = false;
|
|
|
|
|
+ int sve_vector_size_in_bytes = 0;
|
|
|
|
|
+
|
|
|
|
|
+#ifdef COMPILER2
|
|
|
|
|
+ use_sve = Matcher::supports_scalable_vector();
|
|
|
|
|
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
|
|
|
|
|
+#endif
|
|
|
|
|
// Scan bitset to accumulate register pairs
|
|
|
|
|
unsigned char regs[32];
|
|
|
|
|
int count = 0;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2155,6 +2183,16 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // SVE
|
|
|
|
|
+ if (use_sve && sve_vector_size_in_bytes > 16) {
|
|
|
|
|
+ for (int i = count - 1; i >= 0; i--) {
|
|
|
|
|
+ sve_ldr(as_FloatRegister(regs[i]), Address(stack, i));
|
|
|
|
|
+ }
|
|
|
|
|
+ add(stack, stack, sve_vector_size_in_bytes * count);
|
|
|
|
|
+ return count * sve_vector_size_in_bytes / 8;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // NEON
|
|
|
|
|
if (count & 1) {
|
|
|
|
|
ldrq(as_FloatRegister(regs[0]), Address(stack));
|
|
|
|
|
i += 1;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2630,23 +2668,39 @@ void MacroAssembler::pop_call_clobbered_registers() {
|
2020-12-24 15:35:16 +08:00
|
|
|
pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-void MacroAssembler::push_CPU_state(bool save_vectors) {
|
|
|
|
|
- int step = (save_vectors ? 8 : 4) * wordSize;
|
|
|
|
|
+void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
|
|
|
|
|
+ int sve_vector_size_in_bytes) {
|
|
|
|
|
push(0x3fffffff, sp); // integer registers except lr & sp
|
|
|
|
|
- mov(rscratch1, -step);
|
|
|
|
|
- sub(sp, sp, step);
|
|
|
|
|
- for (int i = 28; i >= 4; i -= 4) {
|
|
|
|
|
- st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
|
|
|
|
|
- as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
|
|
|
|
|
+ if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) {
|
|
|
|
|
+ sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
|
|
|
|
|
+ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
|
|
|
|
|
+ sve_str(as_FloatRegister(i), Address(sp, i));
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ int step = (save_vectors ? 8 : 4) * wordSize;
|
|
|
|
|
+ mov(rscratch1, -step);
|
|
|
|
|
+ sub(sp, sp, step);
|
|
|
|
|
+ for (int i = 28; i >= 4; i -= 4) {
|
|
|
|
|
+ st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
|
|
|
|
|
+ as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
|
|
|
|
|
+ }
|
|
|
|
|
+ st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
|
|
|
|
|
}
|
|
|
|
|
- st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-void MacroAssembler::pop_CPU_state(bool restore_vectors) {
|
|
|
|
|
- int step = (restore_vectors ? 8 : 4) * wordSize;
|
|
|
|
|
- for (int i = 0; i <= 28; i += 4)
|
|
|
|
|
- ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
|
|
|
|
|
- as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
|
|
|
|
|
+void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve,
|
|
|
|
|
+ int sve_vector_size_in_bytes) {
|
|
|
|
|
+ if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) {
|
|
|
|
|
+ for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) {
|
|
|
|
|
+ sve_ldr(as_FloatRegister(i), Address(sp, i));
|
|
|
|
|
+ }
|
|
|
|
|
+ add(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ int step = (restore_vectors ? 8 : 4) * wordSize;
|
|
|
|
|
+ for (int i = 0; i <= 28; i += 4)
|
|
|
|
|
+ ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
|
|
|
|
|
+ as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
|
|
|
|
|
+ }
|
|
|
|
|
pop(0x3fffffff, sp); // integer registers except lr & sp
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2695,6 +2749,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp)
|
2020-12-24 15:35:16 +08:00
|
|
|
return Address(base, offset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) {
|
|
|
|
|
+ assert(offset >= 0, "spill to negative address?");
|
|
|
|
|
+
|
|
|
|
|
+ Register base = sp;
|
|
|
|
|
+
|
|
|
|
|
+ // An immediate offset in the range 0 to 255 which is multiplied
|
|
|
|
|
+ // by the current vector or predicate register size in bytes.
|
|
|
|
|
+ if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) {
|
|
|
|
|
+ return Address(base, offset / sve_reg_size_in_bytes);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ add(tmp, base, offset);
|
|
|
|
|
+ return Address(tmp);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
// Checks whether offset is aligned.
|
|
|
|
|
// Returns true if it is, else false.
|
|
|
|
|
bool MacroAssembler::merge_alignment_check(Register base,
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -5879,3 +5948,24 @@ void MacroAssembler::get_thread(Register dst) {
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
pop(saved_regs, sp);
|
|
|
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+void MacroAssembler::verify_sve_vector_length() {
|
|
|
|
|
+ Label verify_ok;
|
|
|
|
|
+ assert(UseSVE > 0, "should only be used for SVE");
|
|
|
|
|
+ movw(rscratch1, zr);
|
|
|
|
|
+ sve_inc(rscratch1, B);
|
|
|
|
|
+ subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length());
|
|
|
|
|
+ br(EQ, verify_ok);
|
|
|
|
|
+ stop("Error: SVE vector length has changed since jvm startup");
|
|
|
|
|
+ bind(verify_ok);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+void MacroAssembler::verify_ptrue() {
|
|
|
|
|
+ Label verify_ok;
|
|
|
|
|
+ assert(UseSVE > 0, "should only be used for SVE");
|
|
|
|
|
+ sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count.
|
|
|
|
|
+ sve_dec(rscratch1, B);
|
|
|
|
|
+ cbz(rscratch1, verify_ok);
|
|
|
|
|
+ stop("Error: the preserved predicate register (p7) elements are not all true");
|
|
|
|
|
+ bind(verify_ok);
|
|
|
|
|
+}
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 85fdc0c88..dccd24911 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
|
|
|
|
|
@@ -862,8 +862,10 @@ public:
|
|
|
|
|
|
|
|
|
|
DEBUG_ONLY(void verify_heapbase(const char* msg);)
|
|
|
|
|
|
|
|
|
|
- void push_CPU_state(bool save_vectors = false);
|
|
|
|
|
- void pop_CPU_state(bool restore_vectors = false) ;
|
|
|
|
|
+ void push_CPU_state(bool save_vectors = false, bool use_sve = false,
|
|
|
|
|
+ int sve_vector_size_in_bytes = 0);
|
|
|
|
|
+ void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
|
|
|
|
|
+ int sve_vector_size_in_bytes = 0);
|
|
|
|
|
|
|
|
|
|
// Round up to a power of two
|
|
|
|
|
void round_to(Register reg, int modulus);
|
|
|
|
|
@@ -938,6 +940,11 @@ public:
|
|
|
|
|
|
|
|
|
|
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
|
|
|
|
|
|
|
|
|
|
+ void verify_sve_vector_length();
|
|
|
|
|
+ void reinitialize_ptrue() {
|
|
|
|
|
+ sve_ptrue(ptrue, B);
|
|
|
|
|
+ }
|
|
|
|
|
+ void verify_ptrue();
|
|
|
|
|
|
|
|
|
|
// Debugging
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1319,6 +1326,7 @@ private:
|
2020-12-24 15:35:16 +08:00
|
|
|
// Returns an address on the stack which is reachable with a ldr/str of size
|
|
|
|
|
// Uses rscratch2 if the address is not directly reachable
|
|
|
|
|
Address spill_address(int size, int offset, Register tmp=rscratch2);
|
|
|
|
|
+ Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
|
|
|
|
|
|
|
|
|
|
bool merge_alignment_check(Register base, size_t size, long cur_offset, long prev_offset) const;
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1342,6 +1350,9 @@ public:
|
2020-12-24 15:35:16 +08:00
|
|
|
void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
|
|
|
|
|
str(Vx, T, spill_address(1 << (int)T, offset));
|
|
|
|
|
}
|
|
|
|
|
+ void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
|
|
|
|
|
+ sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
|
|
|
|
|
+ }
|
|
|
|
|
void unspill(Register Rx, bool is64, int offset) {
|
|
|
|
|
if (is64) {
|
|
|
|
|
ldr(Rx, spill_address(8, offset));
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1352,6 +1363,9 @@ public:
|
2020-12-24 15:35:16 +08:00
|
|
|
void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
|
|
|
|
|
ldr(Vx, T, spill_address(1 << (int)T, offset));
|
|
|
|
|
}
|
|
|
|
|
+ void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
|
|
|
|
|
+ sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
|
|
|
|
|
+ }
|
|
|
|
|
void spill_copy128(int src_offset, int dst_offset,
|
|
|
|
|
Register tmp1=rscratch1, Register tmp2=rscratch2) {
|
|
|
|
|
if (src_offset < 512 && (src_offset & 7) == 0 &&
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1365,6 +1379,15 @@ public:
|
2020-12-24 15:35:16 +08:00
|
|
|
spill(tmp1, true, dst_offset+8);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
+ void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset,
|
|
|
|
|
+ int sve_vec_reg_size_in_bytes) {
|
|
|
|
|
+ assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size");
|
|
|
|
|
+ for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) {
|
|
|
|
|
+ spill_copy128(src_offset, dst_offset);
|
|
|
|
|
+ src_offset += 16;
|
|
|
|
|
+ dst_offset += 16;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#ifdef ASSERT
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/register_aarch64.cpp b/src/hotspot/cpu/aarch64/register_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 36cbe3fee..3db8e8337 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/register_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp
|
|
|
|
|
@@ -1,6 +1,6 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -33,6 +33,9 @@ const int ConcreteRegisterImpl::max_fpr
|
|
|
|
|
= ConcreteRegisterImpl::max_gpr +
|
|
|
|
|
FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
+const int ConcreteRegisterImpl::max_pr
|
|
|
|
|
+ = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers;
|
2021-08-13 14:54:30 +08:00
|
|
|
+
|
2020-12-24 15:35:16 +08:00
|
|
|
const char* RegisterImpl::name() const {
|
|
|
|
|
const char* names[number_of_registers] = {
|
2021-08-13 14:54:30 +08:00
|
|
|
"c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
|
|
|
|
|
@@ -54,3 +57,10 @@ const char* FloatRegisterImpl::name() const {
|
2020-12-24 15:35:16 +08:00
|
|
|
};
|
|
|
|
|
return is_valid() ? names[encoding()] : "noreg";
|
|
|
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+const char* PRegisterImpl::name() const {
|
|
|
|
|
+ const char* names[number_of_registers] = {
|
|
|
|
|
+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7"
|
|
|
|
|
+ };
|
|
|
|
|
+ return is_valid() ? names[encoding()] : "noreg";
|
|
|
|
|
+}
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 75db19977..f6ab3b640 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/register_aarch64.hpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -129,9 +129,10 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
|
2020-12-24 15:35:16 +08:00
|
|
|
public:
|
|
|
|
|
enum {
|
2021-08-13 14:54:30 +08:00
|
|
|
number_of_registers = 32,
|
|
|
|
|
- max_slots_per_register = 4,
|
2020-12-24 15:35:16 +08:00
|
|
|
+ max_slots_per_register = 8,
|
2021-08-13 14:54:30 +08:00
|
|
|
save_slots_per_register = 2,
|
|
|
|
|
- extra_save_slots_per_register = max_slots_per_register - save_slots_per_register
|
2020-12-24 15:35:16 +08:00
|
|
|
+ slots_per_neon_register = 4,
|
|
|
|
|
+ extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// construction
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -187,6 +188,79 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29));
|
2020-12-24 15:35:16 +08:00
|
|
|
CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30));
|
|
|
|
|
CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31));
|
|
|
|
|
|
|
|
|
|
+// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0].
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31));
|
|
|
|
|
+
|
|
|
|
|
+class PRegisterImpl;
|
|
|
|
|
+typedef PRegisterImpl* PRegister;
|
|
|
|
|
+inline PRegister as_PRegister(int encoding) {
|
|
|
|
|
+ return (PRegister)(intptr_t)encoding;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// The implementation of predicate registers for the architecture
|
|
|
|
|
+class PRegisterImpl: public AbstractRegisterImpl {
|
|
|
|
|
+ public:
|
|
|
|
|
+ enum {
|
|
|
|
|
+ number_of_registers = 8,
|
|
|
|
|
+ max_slots_per_register = 1
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ // construction
|
|
|
|
|
+ inline friend PRegister as_PRegister(int encoding);
|
|
|
|
|
+
|
|
|
|
|
+ VMReg as_VMReg();
|
|
|
|
|
+
|
|
|
|
|
+ // derived registers, offsets, and addresses
|
|
|
|
|
+ PRegister successor() const { return as_PRegister(encoding() + 1); }
|
|
|
|
|
+
|
|
|
|
|
+ // accessors
|
|
|
|
|
+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
|
|
|
|
|
+ int encoding_nocheck() const { return (intptr_t)this; }
|
|
|
|
|
+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
|
|
|
|
|
+ const char* name() const;
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+// The predicate registers of SVE.
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6));
|
|
|
|
|
+CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7));
|
|
|
|
|
+
|
|
|
|
|
// Need to know the total number of registers of all sorts for SharedInfo.
|
|
|
|
|
// Define a class that exports it.
|
|
|
|
|
class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -199,12 +273,14 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
|
|
|
|
|
|
|
|
|
number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
|
|
|
|
|
FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
|
|
|
|
|
+ PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers +
|
2020-12-24 15:35:16 +08:00
|
|
|
1) // flags
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// added to make it compile
|
|
|
|
|
static const int max_gpr;
|
|
|
|
|
static const int max_fpr;
|
|
|
|
|
+ static const int max_pr;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// A set of registers
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
|
|
|
|
|
index c18109087..e337f582a 100644
|
|
|
|
|
--- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
@@ -154,3 +154,47 @@ REGISTER_DEFINITION(Register, rthread);
|
|
|
|
|
REGISTER_DEFINITION(Register, rheapbase);
|
|
|
|
|
|
|
|
|
|
REGISTER_DEFINITION(Register, r31_sp);
|
|
|
|
|
+
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z0);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z1);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z2);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z3);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z4);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z5);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z6);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z7);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z8);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z9);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z10);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z11);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z12);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z13);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z14);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z15);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z16);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z17);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z18);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z19);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z20);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z21);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z22);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z23);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z24);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z25);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z26);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z27);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z28);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z29);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z30);
|
|
|
|
|
+REGISTER_DEFINITION(FloatRegister, z31);
|
|
|
|
|
+
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, p0);
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, p1);
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, p2);
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, p3);
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, p4);
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, p5);
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, p6);
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, p7);
|
|
|
|
|
+
|
|
|
|
|
+REGISTER_DEFINITION(PRegister, ptrue);
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 3d3cc3a1e..6242cce08 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -111,11 +111,28 @@ class RegisterSaver {
|
2020-12-24 15:35:16 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
|
|
|
|
|
+ bool use_sve = false;
|
|
|
|
|
+ int sve_vector_size_in_bytes = 0;
|
|
|
|
|
+ int sve_vector_size_in_slots = 0;
|
|
|
|
|
+
|
|
|
|
|
+#ifdef COMPILER2
|
|
|
|
|
+ use_sve = Matcher::supports_scalable_vector();
|
|
|
|
|
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
|
|
|
|
|
+ sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
#if COMPILER2_OR_JVMCI
|
|
|
|
|
if (save_vectors) {
|
|
|
|
|
+ int vect_words = 0;
|
|
|
|
|
+ int extra_save_slots_per_register = 0;
|
|
|
|
|
// Save upper half of vector registers
|
2021-08-13 14:54:30 +08:00
|
|
|
- int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register /
|
|
|
|
|
- VMRegImpl::slots_per_word;
|
2020-12-24 15:35:16 +08:00
|
|
|
+ if (use_sve) {
|
|
|
|
|
+ extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
|
|
|
|
|
+ }
|
|
|
|
|
+ vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
|
|
|
|
|
+ VMRegImpl::slots_per_word;
|
|
|
|
|
additional_frame_words += vect_words;
|
|
|
|
|
}
|
|
|
|
|
#else
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -134,7 +151,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
// Save Integer and Float registers.
|
|
|
|
|
__ enter();
|
|
|
|
|
- __ push_CPU_state(save_vectors);
|
|
|
|
|
+ __ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
|
|
|
|
|
|
|
|
|
|
// Set an oopmap for the call site. This oopmap will map all
|
|
|
|
|
// oop-registers and debug-info registers as callee-saved. This
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -158,8 +175,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
|
|
|
|
|
FloatRegister r = as_FloatRegister(i);
|
2021-08-13 14:54:30 +08:00
|
|
|
- int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) :
|
|
|
|
|
- (FloatRegisterImpl::save_slots_per_register * i);
|
2020-12-24 15:35:16 +08:00
|
|
|
+ int sp_offset = 0;
|
|
|
|
|
+ if (save_vectors) {
|
|
|
|
|
+ sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
|
|
|
|
|
+ (FloatRegisterImpl::slots_per_neon_register * i);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ sp_offset = FloatRegisterImpl::save_slots_per_register * i;
|
|
|
|
|
+ }
|
|
|
|
|
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
|
|
|
|
|
r->as_VMReg());
|
|
|
|
|
}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -168,10 +190,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
2020-12-24 15:35:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
|
|
|
|
|
-#ifndef COMPILER2
|
|
|
|
|
+#ifdef COMPILER2
|
|
|
|
|
+ __ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
|
|
|
|
|
+ Matcher::scalable_vector_reg_size(T_BYTE));
|
|
|
|
|
+#else
|
|
|
|
|
+#if !INCLUDE_JVMCI
|
|
|
|
|
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
|
|
|
|
|
#endif
|
|
|
|
|
__ pop_CPU_state(restore_vectors);
|
|
|
|
|
+#endif
|
|
|
|
|
__ leave();
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1829,6 +1856,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
|
2020-12-24 15:35:16 +08:00
|
|
|
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ if (UseSVE > 0) {
|
|
|
|
|
+ // Make sure that jni code does not change SVE vector length.
|
|
|
|
|
+ __ verify_sve_vector_length();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
// check for safepoint operation in progress and/or pending suspend requests
|
|
|
|
|
Label safepoint_in_progress, safepoint_in_progress_done;
|
|
|
|
|
{
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2759,6 +2791,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
|
2020-12-24 15:35:16 +08:00
|
|
|
__ maybe_isb();
|
|
|
|
|
__ membar(Assembler::LoadLoad | Assembler::LoadStore);
|
|
|
|
|
|
|
|
|
|
+ if (UseSVE > 0 && save_vectors) {
|
|
|
|
|
+ // Reinitialize the ptrue predicate register, in case the external runtime
|
|
|
|
|
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
|
|
|
|
|
+ __ reinitialize_ptrue();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
__ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
|
|
|
|
|
__ cbz(rscratch1, noException);
|
|
|
|
|
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 26a54c87e..85f64c007 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -488,6 +488,11 @@ class StubGenerator: public StubCodeGenerator {
|
2020-12-24 15:35:16 +08:00
|
|
|
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
|
|
|
|
|
SharedRuntime::exception_handler_for_return_address),
|
|
|
|
|
rthread, c_rarg1);
|
|
|
|
|
+ if (UseSVE > 0 ) {
|
|
|
|
|
+ // Reinitialize the ptrue predicate register, in case the external runtime
|
|
|
|
|
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
|
|
|
|
|
+ __ reinitialize_ptrue();
|
|
|
|
|
+ }
|
|
|
|
|
// we should not really care that lr is no longer the callee
|
|
|
|
|
// address. we saved the value the handler needs in r19 so we can
|
|
|
|
|
// just copy it to r3. however, the C2 handler will push its own
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -5092,6 +5097,12 @@ class StubGenerator: public StubCodeGenerator {
|
2020-12-24 15:35:16 +08:00
|
|
|
__ reset_last_Java_frame(true);
|
|
|
|
|
__ maybe_isb();
|
|
|
|
|
|
|
|
|
|
+ if (UseSVE > 0) {
|
|
|
|
|
+ // Reinitialize the ptrue predicate register, in case the external runtime
|
|
|
|
|
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
|
|
|
|
|
+ __ reinitialize_ptrue();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
__ leave();
|
|
|
|
|
|
|
|
|
|
// check for pending exceptions
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 03d7a6e2d..42f301531 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
|
|
|
|
|
@@ -1377,6 +1377,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
|
|
|
|
__ push(dtos);
|
|
|
|
|
__ push(ltos);
|
|
|
|
|
|
|
|
|
|
+ if (UseSVE > 0) {
|
|
|
|
|
+ // Make sure that jni code does not change SVE vector length.
|
|
|
|
|
+ __ verify_sve_vector_length();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
// change thread state
|
|
|
|
|
__ mov(rscratch1, _thread_in_native_trans);
|
|
|
|
|
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index e906454f1..7ae881b74 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -30,12 +30,14 @@
|
2020-12-24 15:35:16 +08:00
|
|
|
#include "runtime/java.hpp"
|
|
|
|
|
#include "runtime/stubCodeGenerator.hpp"
|
2021-08-13 14:54:30 +08:00
|
|
|
#include "runtime/vm_version.hpp"
|
2020-12-24 15:35:16 +08:00
|
|
|
+#include "utilities/formatBuffer.hpp"
|
|
|
|
|
#include "utilities/macros.hpp"
|
|
|
|
|
|
|
|
|
|
#include OS_HEADER_INLINE(os)
|
|
|
|
|
|
|
|
|
|
-#include <sys/auxv.h>
|
|
|
|
|
#include <asm/hwcap.h>
|
|
|
|
|
+#include <sys/auxv.h>
|
|
|
|
|
+#include <sys/prctl.h>
|
|
|
|
|
|
|
|
|
|
#ifndef HWCAP_AES
|
|
|
|
|
#define HWCAP_AES (1<<3)
|
|
|
|
|
@@ -61,12 +63,27 @@
|
|
|
|
|
#define HWCAP_ATOMICS (1<<8)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
+#ifndef HWCAP_SVE
|
|
|
|
|
+#define HWCAP_SVE (1 << 22)
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
+#ifndef HWCAP2_SVE2
|
|
|
|
|
+#define HWCAP2_SVE2 (1 << 1)
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
+#ifndef PR_SVE_GET_VL
|
|
|
|
|
+// For old toolchains which do not have SVE related macros defined.
|
|
|
|
|
+#define PR_SVE_SET_VL 50
|
|
|
|
|
+#define PR_SVE_GET_VL 51
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
int VM_Version::_cpu;
|
|
|
|
|
int VM_Version::_model;
|
|
|
|
|
int VM_Version::_model2;
|
|
|
|
|
int VM_Version::_variant;
|
|
|
|
|
int VM_Version::_revision;
|
|
|
|
|
int VM_Version::_stepping;
|
|
|
|
|
+int VM_Version::_initial_sve_vector_length;
|
|
|
|
|
VM_Version::PsrInfo VM_Version::_psr_info = { 0, };
|
|
|
|
|
|
|
|
|
|
static BufferBlob* stub_blob;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -164,6 +181,7 @@ void VM_Version::get_processor_features() {
|
2020-12-24 15:35:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned long auxv = getauxval(AT_HWCAP);
|
|
|
|
|
+ unsigned long auxv2 = getauxval(AT_HWCAP2);
|
|
|
|
|
|
|
|
|
|
char buf[512];
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -269,6 +287,8 @@ void VM_Version::get_processor_features() {
|
2020-12-24 15:35:16 +08:00
|
|
|
if (auxv & HWCAP_SHA1) strcat(buf, ", sha1");
|
|
|
|
|
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
|
|
|
|
|
if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
|
|
|
|
|
+ if (auxv & HWCAP_SVE) strcat(buf, ", sve");
|
|
|
|
|
+ if (auxv2 & HWCAP2_SVE2) strcat(buf, ", sve2");
|
|
|
|
|
|
|
|
|
|
_features_string = os::strdup(buf);
|
|
|
|
|
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -402,6 +422,18 @@ void VM_Version::get_processor_features() {
|
2020-12-24 15:35:16 +08:00
|
|
|
FLAG_SET_DEFAULT(UseBlockZeroing, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ if (auxv & HWCAP_SVE) {
|
|
|
|
|
+ if (FLAG_IS_DEFAULT(UseSVE)) {
|
|
|
|
|
+ FLAG_SET_DEFAULT(UseSVE, (auxv2 & HWCAP2_SVE2) ? 2 : 1);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (UseSVE > 0) {
|
|
|
|
|
+ _initial_sve_vector_length = prctl(PR_SVE_GET_VL);
|
|
|
|
|
+ }
|
|
|
|
|
+ } else if (UseSVE > 0) {
|
|
|
|
|
+ warning("UseSVE specified, but not supported on current CPU. Disabling SVE.");
|
|
|
|
|
+ FLAG_SET_DEFAULT(UseSVE, 0);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
// This machine allows unaligned memory accesses
|
|
|
|
|
if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
|
|
|
|
|
FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -435,6 +467,50 @@ void VM_Version::get_processor_features() {
|
2020-12-24 15:35:16 +08:00
|
|
|
UseMontgomerySquareIntrinsic = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ if (UseSVE > 0) {
|
|
|
|
|
+ if (FLAG_IS_DEFAULT(MaxVectorSize)) {
|
|
|
|
|
+ MaxVectorSize = _initial_sve_vector_length;
|
|
|
|
|
+ } else if (MaxVectorSize < 16) {
|
|
|
|
|
+ warning("SVE does not support vector length less than 16 bytes. Disabling SVE.");
|
|
|
|
|
+ UseSVE = 0;
|
|
|
|
|
+ } else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) {
|
|
|
|
|
+ int new_vl = prctl(PR_SVE_SET_VL, MaxVectorSize);
|
|
|
|
|
+ _initial_sve_vector_length = new_vl;
|
|
|
|
|
+ // If MaxVectorSize is larger than system largest supported SVE vector length, above prctl()
|
|
|
|
|
+ // call will set task vector length to the system largest supported value. So, we also update
|
|
|
|
|
+ // MaxVectorSize to that largest supported value.
|
|
|
|
|
+ if (new_vl < 0) {
|
|
|
|
|
+ vm_exit_during_initialization(
|
|
|
|
|
+ err_msg("Current system does not support SVE vector length for MaxVectorSize: %d",
|
|
|
|
|
+ (int)MaxVectorSize));
|
|
|
|
|
+ } else if (new_vl != MaxVectorSize) {
|
|
|
|
|
+ warning("Current system only supports max SVE vector length %d. Set MaxVectorSize to %d",
|
|
|
|
|
+ new_vl, new_vl);
|
|
|
|
|
+ }
|
|
|
|
|
+ MaxVectorSize = new_vl;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (UseSVE == 0) { // NEON
|
|
|
|
|
+ int min_vector_size = 8;
|
|
|
|
|
+ int max_vector_size = 16;
|
|
|
|
|
+ if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
|
|
|
|
|
+ if (!is_power_of_2(MaxVectorSize)) {
|
|
|
|
|
+ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
|
|
|
|
|
+ } else if (MaxVectorSize < min_vector_size) {
|
|
|
|
|
+ warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
|
|
|
|
|
+ FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
|
|
|
|
|
+ } else if (MaxVectorSize > max_vector_size) {
|
|
|
|
|
+ warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
|
|
|
|
|
+ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ FLAG_SET_DEFAULT(MaxVectorSize, 16);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
if (FLAG_IS_DEFAULT(OptoScheduling)) {
|
|
|
|
|
OptoScheduling = true;
|
2021-08-13 14:54:30 +08:00
|
|
|
}
|
2020-12-24 15:35:16 +08:00
|
|
|
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index dcb6342e1..ae2715102 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
|
|
|
|
|
@@ -40,6 +40,7 @@ protected:
|
|
|
|
|
static int _variant;
|
|
|
|
|
static int _revision;
|
|
|
|
|
static int _stepping;
|
|
|
|
|
+ static int _initial_sve_vector_length;
|
|
|
|
|
|
|
|
|
|
struct PsrInfo {
|
|
|
|
|
uint32_t dczid_el0;
|
|
|
|
|
@@ -101,6 +102,7 @@ public:
|
|
|
|
|
static int cpu_model2() { return _model2; }
|
|
|
|
|
static int cpu_variant() { return _variant; }
|
|
|
|
|
static int cpu_revision() { return _revision; }
|
|
|
|
|
+ static int get_initial_sve_vector_length() { return _initial_sve_vector_length; };
|
|
|
|
|
static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
|
|
|
|
|
static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); }
|
|
|
|
|
static bool is_zva_enabled() {
|
|
|
|
|
diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index c5d4383b4..dde7a7a91 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
|
|
|
|
|
+++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp
|
|
|
|
|
@@ -1,6 +1,6 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -36,4 +36,8 @@ inline VMReg FloatRegisterImpl::as_VMReg() {
|
|
|
|
|
ConcreteRegisterImpl::max_gpr);
|
2020-12-24 15:35:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+inline VMReg PRegisterImpl::as_VMReg() {
|
|
|
|
|
+ return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr);
|
2021-08-13 14:54:30 +08:00
|
|
|
+}
|
|
|
|
|
+
|
2020-12-24 15:35:16 +08:00
|
|
|
#endif // CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP
|
|
|
|
|
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
|
2021-08-13 14:54:30 +08:00
|
|
|
index 4a32af54e..03248b2e0 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/arm/arm.ad
|
|
|
|
|
+++ b/src/hotspot/cpu/arm/arm.ad
|
|
|
|
|
@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
|
|
|
return true; // Per default match rules are supported.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
|
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
|
|
|
|
|
|
// TODO
|
|
|
|
|
// identify extra cases that we might want to provide match rules for
|
|
|
|
|
@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
|
|
|
|
|
return MaxVectorSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
|
|
|
+ return false;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
|
|
|
+ return -1;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
// Vector ideal reg corresponding to specified size in bytes
|
|
|
|
|
const uint Matcher::vector_ideal_reg(int size) {
|
|
|
|
|
assert(MaxVectorSize >= size, "");
|
|
|
|
|
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
|
2021-08-13 14:54:30 +08:00
|
|
|
index 36cbada53..571a6aeb0 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/ppc/ppc.ad
|
|
|
|
|
+++ b/src/hotspot/cpu/ppc/ppc.ad
|
|
|
|
|
@@ -2242,7 +2242,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
|
|
|
return true; // Per default match rules are supported.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
|
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
|
|
|
|
|
|
// TODO
|
|
|
|
|
// identify extra cases that we might want to provide match rules for
|
|
|
|
|
@@ -2310,6 +2310,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
|
|
|
|
return max_vector_size(bt); // Same as max.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
|
|
|
+ return false;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
|
|
|
+ return -1;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
// PPC implementation uses VSX load/store instructions (if
|
|
|
|
|
// SuperwordUseVSX) which support 4 byte but not arbitrary alignment
|
|
|
|
|
const bool Matcher::misaligned_vectors_ok() {
|
|
|
|
|
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
|
|
|
|
|
index 96c231b0a..782c1c7c4 100644
|
|
|
|
|
--- a/src/hotspot/cpu/s390/s390.ad
|
|
|
|
|
+++ b/src/hotspot/cpu/s390/s390.ad
|
|
|
|
|
@@ -1522,7 +1522,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
|
|
|
// BUT: make sure match rule is not disabled by a false predicate!
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
|
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
|
// TODO
|
|
|
|
|
// Identify extra cases that we might want to provide match rules for
|
|
|
|
|
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
|
|
|
|
|
@@ -1573,6 +1573,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
|
|
|
|
return max_vector_size(bt); // Same as max.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
|
|
|
+ return false;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
|
|
|
+ return -1;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
const uint Matcher::vector_shift_count_ideal_reg(int size) {
|
|
|
|
|
fatal("vector shift is not supported");
|
|
|
|
|
return Node::NotAMachineReg;
|
|
|
|
|
diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad
|
|
|
|
|
index a09c795c9..3b1b1046e 100644
|
|
|
|
|
--- a/src/hotspot/cpu/sparc/sparc.ad
|
|
|
|
|
+++ b/src/hotspot/cpu/sparc/sparc.ad
|
|
|
|
|
@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
|
|
|
return true; // Per default match rules are supported.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
|
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
|
|
|
|
|
|
// TODO
|
|
|
|
|
// identify extra cases that we might want to provide match rules for
|
|
|
|
|
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
|
2021-08-13 14:54:30 +08:00
|
|
|
index abdd7483d..93aee6d6c 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/x86/x86.ad
|
|
|
|
|
+++ b/src/hotspot/cpu/x86/x86.ad
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
//
|
2021-08-13 14:54:30 +08:00
|
|
|
-// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
|
2020-12-24 15:35:16 +08:00
|
|
|
+// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
//
|
|
|
|
|
// This code is free software; you can redistribute it and/or modify it
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1354,7 +1354,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return ret_value; // Per default match rules are supported.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
|
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
|
// identify extra cases that we might want to provide match rules for
|
|
|
|
|
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
|
|
|
|
bool ret_value = match_rule_supported(opcode);
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1485,6 +1485,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
2020-12-24 15:35:16 +08:00
|
|
|
return MIN2(size,max_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
|
|
|
+ return false;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
|
|
|
+ return -1;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
// Vector ideal reg corresponding to specified size in bytes
|
|
|
|
|
const uint Matcher::vector_ideal_reg(int size) {
|
|
|
|
|
assert(MaxVectorSize >= size, "");
|
|
|
|
|
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
|
2021-08-13 14:54:30 +08:00
|
|
|
index 8904bba97..e09cdd061 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/cpu/x86/x86_64.ad
|
|
|
|
|
+++ b/src/hotspot/cpu/x86/x86_64.ad
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2968,7 +2968,7 @@ frame
|
2020-12-24 15:35:16 +08:00
|
|
|
RAX_H_num // Op_RegL
|
|
|
|
|
};
|
|
|
|
|
// Excluded flags and vector registers.
|
|
|
|
|
- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
|
|
|
|
|
+ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
|
|
|
|
|
return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
|
|
|
|
|
%}
|
|
|
|
|
%}
|
|
|
|
|
diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp
|
|
|
|
|
index ba61aa4c0..9e41b2dc6 100644
|
|
|
|
|
--- a/src/hotspot/share/adlc/archDesc.cpp
|
|
|
|
|
+++ b/src/hotspot/share/adlc/archDesc.cpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
//
|
|
|
|
|
-// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
//
|
|
|
|
|
// This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
|
|
|
|
|
// Match Vector types.
|
|
|
|
|
if (strncmp(idealOp, "Vec",3)==0) {
|
|
|
|
|
switch(last_char) {
|
|
|
|
|
+ case 'A': return "TypeVect::VECTA";
|
|
|
|
|
case 'S': return "TypeVect::VECTS";
|
|
|
|
|
case 'D': return "TypeVect::VECTD";
|
|
|
|
|
case 'X': return "TypeVect::VECTX";
|
|
|
|
|
@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ if (strncmp(idealOp, "RegVMask", 8) == 0) {
|
|
|
|
|
+ return "Type::BOTTOM";
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
// !!!!!
|
|
|
|
|
switch(last_char) {
|
|
|
|
|
case 'I': return "TypeInt::INT";
|
|
|
|
|
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index c7b855a7e..a37866824 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/share/adlc/formssel.cpp
|
|
|
|
|
+++ b/src/hotspot/share/adlc/formssel.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -3963,6 +3963,8 @@ bool MatchRule::is_base_register(FormDict &globals) const {
|
2020-12-24 15:35:16 +08:00
|
|
|
strcmp(opType,"RegL")==0 ||
|
|
|
|
|
strcmp(opType,"RegF")==0 ||
|
|
|
|
|
strcmp(opType,"RegD")==0 ||
|
|
|
|
|
+ strcmp(opType,"RegVMask")==0 ||
|
|
|
|
|
+ strcmp(opType,"VecA")==0 ||
|
|
|
|
|
strcmp(opType,"VecS")==0 ||
|
|
|
|
|
strcmp(opType,"VecD")==0 ||
|
|
|
|
|
strcmp(opType,"VecX")==0 ||
|
|
|
|
|
diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp
|
|
|
|
|
index 914dc43f6..710af9de8 100644
|
|
|
|
|
--- a/src/hotspot/share/opto/chaitin.cpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/chaitin.cpp
|
|
|
|
|
@@ -77,6 +77,7 @@ void LRG::dump() const {
|
|
|
|
|
if( _is_oop ) tty->print("Oop ");
|
|
|
|
|
if( _is_float ) tty->print("Float ");
|
|
|
|
|
if( _is_vector ) tty->print("Vector ");
|
|
|
|
|
+ if( _is_scalable ) tty->print("Scalable ");
|
|
|
|
|
if( _was_spilled1 ) tty->print("Spilled ");
|
|
|
|
|
if( _was_spilled2 ) tty->print("Spilled2 ");
|
|
|
|
|
if( _direct_conflict ) tty->print("Direct_conflict ");
|
|
|
|
|
@@ -646,7 +647,15 @@ void PhaseChaitin::Register_Allocate() {
|
|
|
|
|
// Live ranges record the highest register in their mask.
|
|
|
|
|
// We want the low register for the AD file writer's convenience.
|
|
|
|
|
OptoReg::Name hi = lrg.reg(); // Get hi register
|
|
|
|
|
- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
|
|
|
|
|
+ int num_regs = lrg.num_regs();
|
|
|
|
|
+ if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
|
|
|
|
|
+ // For scalable vector registers, when they are allocated in physical
|
|
|
|
|
+ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
|
|
|
|
|
+ // vector. If they are allocated on stack, we need to get the actual
|
|
|
|
|
+ // num_regs, which reflects the physical length of scalable registers.
|
|
|
|
|
+ num_regs = lrg.scalable_reg_slots();
|
|
|
|
|
+ }
|
|
|
|
|
+ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
|
|
|
|
|
// We have to use pair [lo,lo+1] even for wide vectors because
|
|
|
|
|
// the rest of code generation works only with pairs. It is safe
|
|
|
|
|
// since for registers encoding only 'lo' is used.
|
|
|
|
|
@@ -801,8 +810,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
|
|
|
|
// Check for vector live range (only if vector register is used).
|
|
|
|
|
// On SPARC vector uses RegD which could be misaligned so it is not
|
|
|
|
|
// processes as vector in RA.
|
|
|
|
|
- if (RegMask::is_vector(ireg))
|
|
|
|
|
+ if (RegMask::is_vector(ireg)) {
|
|
|
|
|
lrg._is_vector = 1;
|
|
|
|
|
+ if (ireg == Op_VecA) {
|
|
|
|
|
+ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
|
|
|
|
|
+ lrg._is_scalable = 1;
|
|
|
|
|
+ // For scalable vector, when it is allocated in physical register,
|
|
|
|
|
+ // num_regs is RegMask::SlotsPerVecA for reg mask,
|
|
|
|
|
+ // which may not be the actual physical register size.
|
|
|
|
|
+ // If it is allocated in stack, we need to get the actual
|
|
|
|
|
+ // physical length of scalable vector register.
|
|
|
|
|
+ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
|
|
|
|
|
"vector must be in vector registers");
|
|
|
|
|
|
|
|
|
|
@@ -912,6 +932,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
|
|
|
|
lrg.set_reg_pressure(1);
|
|
|
|
|
#endif
|
|
|
|
|
break;
|
|
|
|
|
+ case Op_VecA:
|
|
|
|
|
+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
|
|
|
|
|
+ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
|
|
|
|
|
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
|
|
|
|
|
+ lrg.set_num_regs(RegMask::SlotsPerVecA);
|
|
|
|
|
+ lrg.set_reg_pressure(1);
|
|
|
|
|
+ break;
|
|
|
|
|
case Op_VecS:
|
|
|
|
|
assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
|
|
|
|
|
assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
|
|
|
|
|
@@ -1358,6 +1385,46 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
|
|
|
|
|
+ int num_regs = lrg.num_regs();
|
|
|
|
|
+ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
|
|
|
|
|
+
|
|
|
|
|
+ if (lrg.is_scalable()) {
|
|
|
|
|
+ // a physical register is found
|
|
|
|
|
+ if (chunk == 0 && OptoReg::is_reg(assigned)) {
|
|
|
|
|
+ return assigned;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // find available stack slots for scalable register
|
|
|
|
|
+ if (lrg._is_vector) {
|
|
|
|
|
+ num_regs = lrg.scalable_reg_slots();
|
|
|
|
|
+ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
|
|
|
|
|
+ if (num_regs == RegMask::SlotsPerVecA) {
|
|
|
|
|
+ return assigned;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
|
|
|
|
|
+ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
|
|
|
|
|
+ // instead of SlotsPerVecA bits.
|
|
|
|
|
+ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
|
|
|
|
|
+ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
|
|
|
|
|
+ // Verify the found reg has scalable_reg_slots() bits set.
|
|
|
|
|
+ if (mask.is_valid_reg(assigned, num_regs)) {
|
|
|
|
|
+ return assigned;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // Remove more for each iteration
|
|
|
|
|
+ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
|
|
|
|
|
+ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
|
|
|
|
|
+ assigned = mask.find_first_set(lrg, num_regs);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return OptoReg::Bad; // will cause chunk change, and retry next chunk
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return assigned;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
// Choose a color using the biasing heuristic
|
|
|
|
|
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
|
|
|
|
|
|
|
|
|
@@ -1391,7 +1458,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
|
|
|
|
RegMask tempmask = lrg.mask();
|
|
|
|
|
tempmask.AND(lrgs(copy_lrg).mask());
|
|
|
|
|
tempmask.clear_to_sets(lrg.num_regs());
|
|
|
|
|
- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
|
|
|
|
|
+ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
|
|
|
|
|
if (OptoReg::is_valid(reg))
|
|
|
|
|
return reg;
|
|
|
|
|
}
|
|
|
|
|
@@ -1400,7 +1467,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
|
|
|
|
// If no bias info exists, just go with the register selection ordering
|
|
|
|
|
if (lrg._is_vector || lrg.num_regs() == 2) {
|
|
|
|
|
// Find an aligned set
|
|
|
|
|
- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
|
|
|
|
|
+ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
|
|
|
|
|
@@ -1455,7 +1522,6 @@ uint PhaseChaitin::Select( ) {
|
|
|
|
|
LRG *lrg = &lrgs(lidx);
|
|
|
|
|
_simplified = lrg->_next;
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
|
#ifndef PRODUCT
|
|
|
|
|
if (trace_spilling()) {
|
|
|
|
|
ttyLocker ttyl;
|
|
|
|
|
@@ -1539,7 +1605,6 @@ uint PhaseChaitin::Select( ) {
|
|
|
|
|
// Bump register mask up to next stack chunk
|
|
|
|
|
chunk += RegMask::CHUNK_SIZE;
|
|
|
|
|
lrg->Set_All();
|
|
|
|
|
-
|
|
|
|
|
goto retry_next_chunk;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -1564,12 +1629,21 @@ uint PhaseChaitin::Select( ) {
|
|
|
|
|
int n_regs = lrg->num_regs();
|
|
|
|
|
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
|
|
|
|
|
if (n_regs == 1 || !lrg->_fat_proj) {
|
|
|
|
|
- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
|
|
|
|
|
+ if (Matcher::supports_scalable_vector()) {
|
|
|
|
|
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
|
|
|
|
|
+ }
|
|
|
|
|
lrg->Clear(); // Clear the mask
|
|
|
|
|
lrg->Insert(reg); // Set regmask to match selected reg
|
|
|
|
|
// For vectors and pairs, also insert the low bit of the pair
|
|
|
|
|
- for (int i = 1; i < n_regs; i++)
|
|
|
|
|
+ // We always choose the high bit, then mask the low bits by register size
|
|
|
|
|
+ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
|
|
|
|
|
+ n_regs = lrg->scalable_reg_slots();
|
|
|
|
|
+ }
|
|
|
|
|
+ for (int i = 1; i < n_regs; i++) {
|
|
|
|
|
lrg->Insert(OptoReg::add(reg,-i));
|
|
|
|
|
+ }
|
|
|
|
|
lrg->set_mask_size(n_regs);
|
|
|
|
|
} else { // Else fatproj
|
|
|
|
|
// mask must be equal to fatproj bits, by definition
|
|
|
|
|
diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index e5be5b966..5408a24ef 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/share/opto/chaitin.hpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/chaitin.hpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
@@ -115,7 +115,9 @@ public:
|
|
|
|
|
_msize_valid=1;
|
|
|
|
|
if (_is_vector) {
|
|
|
|
|
assert(!_fat_proj, "sanity");
|
|
|
|
|
- _mask.verify_sets(_num_regs);
|
|
|
|
|
+ if (!(_is_scalable && OptoReg::is_stack(_reg))) {
|
|
|
|
|
+ _mask.verify_sets(_num_regs);
|
|
|
|
|
+ }
|
|
|
|
|
} else if (_num_regs == 2 && !_fat_proj) {
|
|
|
|
|
_mask.verify_pairs();
|
|
|
|
|
}
|
|
|
|
|
@@ -139,14 +141,37 @@ public:
|
|
|
|
|
void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
|
|
|
|
|
void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
|
|
|
|
|
|
|
|
|
|
- // Number of registers this live range uses when it colors
|
|
|
|
|
private:
|
|
|
|
|
+ // Number of registers this live range uses when it colors
|
|
|
|
|
uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else
|
|
|
|
|
// except _num_regs is kill count for fat_proj
|
|
|
|
|
+
|
|
|
|
|
+ // For scalable register, num_regs may not be the actual physical register size.
|
|
|
|
|
+ // We need to get the actual physical length of scalable register when scalable
|
|
|
|
|
+ // register is spilled. The size of one slot is 32-bit.
|
|
|
|
|
+ uint _scalable_reg_slots; // Actual scalable register length of slots.
|
|
|
|
|
+ // Meaningful only when _is_scalable is true.
|
|
|
|
|
public:
|
|
|
|
|
int num_regs() const { return _num_regs; }
|
|
|
|
|
void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
|
|
|
|
|
|
|
|
|
|
+ uint scalable_reg_slots() { return _scalable_reg_slots; }
|
|
|
|
|
+ void set_scalable_reg_slots(uint slots) {
|
|
|
|
|
+ assert(_is_scalable, "scalable register");
|
|
|
|
|
+ assert(slots > 0, "slots of scalable register is not valid");
|
|
|
|
|
+ _scalable_reg_slots = slots;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool is_scalable() {
|
|
|
|
|
+#ifdef ASSERT
|
|
|
|
|
+ if (_is_scalable) {
|
|
|
|
|
+ // Should only be a vector for now, but it could also be a RegVMask in future.
|
|
|
|
|
+ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
+ return _is_scalable;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
private:
|
|
|
|
|
// Number of physical registers this live range uses when it colors
|
|
|
|
|
// Architecture and register-set dependent
|
|
|
|
|
@@ -172,6 +197,8 @@ public:
|
|
|
|
|
uint _is_oop:1, // Live-range holds an oop
|
|
|
|
|
_is_float:1, // True if in float registers
|
|
|
|
|
_is_vector:1, // True if in vector registers
|
|
|
|
|
+ _is_scalable:1, // True if register size is scalable
|
|
|
|
|
+ // e.g. Arm SVE vector/predicate registers.
|
|
|
|
|
_was_spilled1:1, // True if prior spilling on def
|
|
|
|
|
_was_spilled2:1, // True if twice prior spilling on def
|
|
|
|
|
_is_bound:1, // live range starts life with no
|
|
|
|
|
diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 4cc7580a8..4fb732161 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/share/opto/matcher.cpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/matcher.cpp
|
|
|
|
|
@@ -84,6 +84,7 @@ Matcher::Matcher()
|
|
|
|
|
idealreg2spillmask [Op_RegF] = NULL;
|
|
|
|
|
idealreg2spillmask [Op_RegD] = NULL;
|
|
|
|
|
idealreg2spillmask [Op_RegP] = NULL;
|
|
|
|
|
+ idealreg2spillmask [Op_VecA] = NULL;
|
|
|
|
|
idealreg2spillmask [Op_VecS] = NULL;
|
|
|
|
|
idealreg2spillmask [Op_VecD] = NULL;
|
|
|
|
|
idealreg2spillmask [Op_VecX] = NULL;
|
|
|
|
|
@@ -97,6 +98,7 @@ Matcher::Matcher()
|
|
|
|
|
idealreg2debugmask [Op_RegF] = NULL;
|
|
|
|
|
idealreg2debugmask [Op_RegD] = NULL;
|
|
|
|
|
idealreg2debugmask [Op_RegP] = NULL;
|
|
|
|
|
+ idealreg2debugmask [Op_VecA] = NULL;
|
|
|
|
|
idealreg2debugmask [Op_VecS] = NULL;
|
|
|
|
|
idealreg2debugmask [Op_VecD] = NULL;
|
|
|
|
|
idealreg2debugmask [Op_VecX] = NULL;
|
|
|
|
|
@@ -110,6 +112,7 @@ Matcher::Matcher()
|
|
|
|
|
idealreg2mhdebugmask[Op_RegF] = NULL;
|
|
|
|
|
idealreg2mhdebugmask[Op_RegD] = NULL;
|
|
|
|
|
idealreg2mhdebugmask[Op_RegP] = NULL;
|
|
|
|
|
+ idealreg2mhdebugmask[Op_VecA] = NULL;
|
|
|
|
|
idealreg2mhdebugmask[Op_VecS] = NULL;
|
|
|
|
|
idealreg2mhdebugmask[Op_VecD] = NULL;
|
|
|
|
|
idealreg2mhdebugmask[Op_VecX] = NULL;
|
|
|
|
|
@@ -417,6 +420,8 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
|
|
|
|
|
return rms;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+#define NOF_STACK_MASKS (3*6+6)
|
|
|
|
|
+
|
|
|
|
|
//---------------------------init_first_stack_mask-----------------------------
|
|
|
|
|
// Create the initial stack mask used by values spilling to the stack.
|
|
|
|
|
// Disallow any debug info in outgoing argument areas by setting the
|
|
|
|
|
@@ -424,7 +429,12 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
|
|
|
|
|
void Matcher::init_first_stack_mask() {
|
|
|
|
|
|
|
|
|
|
// Allocate storage for spill masks as masks for the appropriate load type.
|
|
|
|
|
- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5));
|
|
|
|
|
+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * NOF_STACK_MASKS);
|
|
|
|
|
+
|
|
|
|
|
+ // Initialize empty placeholder masks into the newly allocated arena
|
|
|
|
|
+ for (int i = 0; i < NOF_STACK_MASKS; i++) {
|
|
|
|
|
+ new (rms + i) RegMask();
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
|
|
idealreg2spillmask [Op_RegN] = &rms[0];
|
|
|
|
|
idealreg2spillmask [Op_RegI] = &rms[1];
|
|
|
|
|
@@ -447,11 +457,12 @@ void Matcher::init_first_stack_mask() {
|
|
|
|
|
idealreg2mhdebugmask[Op_RegD] = &rms[16];
|
|
|
|
|
idealreg2mhdebugmask[Op_RegP] = &rms[17];
|
|
|
|
|
|
|
|
|
|
- idealreg2spillmask [Op_VecS] = &rms[18];
|
|
|
|
|
- idealreg2spillmask [Op_VecD] = &rms[19];
|
|
|
|
|
- idealreg2spillmask [Op_VecX] = &rms[20];
|
|
|
|
|
- idealreg2spillmask [Op_VecY] = &rms[21];
|
|
|
|
|
- idealreg2spillmask [Op_VecZ] = &rms[22];
|
|
|
|
|
+ idealreg2spillmask [Op_VecA] = &rms[18];
|
|
|
|
|
+ idealreg2spillmask [Op_VecS] = &rms[19];
|
|
|
|
|
+ idealreg2spillmask [Op_VecD] = &rms[20];
|
|
|
|
|
+ idealreg2spillmask [Op_VecX] = &rms[21];
|
|
|
|
|
+ idealreg2spillmask [Op_VecY] = &rms[22];
|
|
|
|
|
+ idealreg2spillmask [Op_VecZ] = &rms[23];
|
|
|
|
|
|
|
|
|
|
OptoReg::Name i;
|
|
|
|
|
|
|
|
|
|
@@ -478,6 +489,7 @@ void Matcher::init_first_stack_mask() {
|
|
|
|
|
// Keep spill masks aligned.
|
|
|
|
|
aligned_stack_mask.clear_to_pairs();
|
|
|
|
|
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
|
|
|
|
|
+ RegMask scalable_stack_mask = aligned_stack_mask;
|
|
|
|
|
|
|
|
|
|
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
|
|
|
|
|
#ifdef _LP64
|
|
|
|
|
@@ -548,28 +560,48 @@ void Matcher::init_first_stack_mask() {
|
|
|
|
|
*idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
|
|
|
|
|
idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
|
|
|
|
|
}
|
|
|
|
|
- if (UseFPUForSpilling) {
|
|
|
|
|
- // This mask logic assumes that the spill operations are
|
|
|
|
|
- // symmetric and that the registers involved are the same size.
|
|
|
|
|
- // On sparc for instance we may have to use 64 bit moves will
|
|
|
|
|
- // kill 2 registers when used with F0-F31.
|
|
|
|
|
- idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
|
|
|
|
|
- idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
|
|
|
|
|
+
|
|
|
|
|
+ if (Matcher::supports_scalable_vector()) {
|
|
|
|
|
+ int k = 1;
|
|
|
|
|
+ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
|
|
|
|
|
+ // Exclude last input arg stack slots to avoid spilling vector register there,
|
|
|
|
|
+ // otherwise vector spills could stomp over stack slots in caller frame.
|
|
|
|
|
+ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
|
|
|
|
|
+ scalable_stack_mask.Remove(in);
|
|
|
|
|
+ in = OptoReg::add(in, -1);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // For VecA
|
|
|
|
|
+ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
|
|
|
|
|
+ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
|
|
|
|
|
+ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
|
|
|
|
|
+ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ *idealreg2spillmask[Op_VecA] = RegMask::Empty;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (UseFPUForSpilling) {
|
|
|
|
|
+ // This mask logic assumes that the spill operations are
|
|
|
|
|
+ // symmetric and that the registers involved are the same size.
|
|
|
|
|
+ // On sparc for instance we may have to use 64 bit moves will
|
|
|
|
|
+ // kill 2 registers when used with F0-F31.
|
|
|
|
|
+ idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
|
|
|
|
|
+ idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
|
|
|
|
|
#ifdef _LP64
|
|
|
|
|
- idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
|
|
|
|
|
- idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
|
|
|
|
- idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
|
|
|
|
- idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
|
|
|
|
|
+ idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
|
|
|
|
|
+ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
|
|
|
|
+ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
|
|
|
|
+ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
|
|
|
|
|
#else
|
|
|
|
|
- idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
|
|
|
|
|
+ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
|
|
|
|
|
#ifdef ARM
|
|
|
|
|
- // ARM has support for moving 64bit values between a pair of
|
|
|
|
|
- // integer registers and a double register
|
|
|
|
|
- idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
|
|
|
|
- idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
|
|
|
|
+ // ARM has support for moving 64bit values between a pair of
|
|
|
|
|
+ // integer registers and a double register
|
|
|
|
|
+ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
|
|
|
|
+ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
- }
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
|
|
// Make up debug masks. Any spill slot plus callee-save registers.
|
|
|
|
|
// Caller-save registers are assumed to be trashable by the various
|
|
|
|
|
@@ -872,6 +904,10 @@ void Matcher::init_spill_mask( Node *ret ) {
|
|
|
|
|
idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
|
|
|
|
|
|
|
|
|
|
// Vector regmasks.
|
|
|
|
|
+ if (Matcher::supports_scalable_vector()) {
|
|
|
|
|
+ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA));
|
|
|
|
|
+ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask();
|
|
|
|
|
+ }
|
|
|
|
|
if (Matcher::vector_size_supported(T_BYTE,4)) {
|
|
|
|
|
TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
|
|
|
|
|
MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1575,7 +1611,6 @@ Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem)
|
2020-12-24 15:35:16 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
|
// Call DFA to match this node, and return
|
|
|
|
|
svec->DFA( n->Opcode(), n );
|
|
|
|
|
|
|
|
|
|
diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
|
|
|
|
|
index 244e3d1f8..9a8307102 100644
|
|
|
|
|
--- a/src/hotspot/share/opto/matcher.hpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/matcher.hpp
|
|
|
|
|
@@ -310,7 +310,7 @@ public:
|
|
|
|
|
|
|
|
|
|
// identify extra cases that we might want to provide match rules for
|
|
|
|
|
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
|
|
|
|
- static const bool match_rule_supported_vector(int opcode, int vlen);
|
|
|
|
|
+ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt);
|
|
|
|
|
|
|
|
|
|
// Some microarchitectures have mask registers used on vectors
|
|
|
|
|
static const bool has_predicated_vectors(void);
|
|
|
|
|
@@ -333,6 +333,10 @@ public:
|
|
|
|
|
Matcher::min_vector_size(bt) <= size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ static const bool supports_scalable_vector();
|
|
|
|
|
+ // Actual max scalable vector register length.
|
|
|
|
|
+ static const int scalable_vector_reg_size(const BasicType bt);
|
|
|
|
|
+
|
|
|
|
|
// Vector ideal reg
|
|
|
|
|
static const uint vector_ideal_reg(int len);
|
|
|
|
|
static const uint vector_shift_count_ideal_reg(int len);
|
|
|
|
|
diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp
|
|
|
|
|
index e31e8d847..1a826d8ba 100644
|
|
|
|
|
--- a/src/hotspot/share/opto/opcodes.cpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/opcodes.cpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
@@ -38,12 +38,14 @@ const char *NodeClassNames[] = {
|
|
|
|
|
"RegF",
|
|
|
|
|
"RegD",
|
|
|
|
|
"RegL",
|
|
|
|
|
- "RegFlags",
|
|
|
|
|
+ "VecA",
|
|
|
|
|
"VecS",
|
|
|
|
|
"VecD",
|
|
|
|
|
"VecX",
|
|
|
|
|
"VecY",
|
|
|
|
|
"VecZ",
|
|
|
|
|
+ "RegVMask",
|
|
|
|
|
+ "RegFlags",
|
|
|
|
|
"_last_machine_leaf",
|
|
|
|
|
#include "classes.hpp"
|
|
|
|
|
"_last_class_name",
|
|
|
|
|
diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp
|
|
|
|
|
index ae3d61ce0..ec96ba055 100644
|
|
|
|
|
--- a/src/hotspot/share/opto/opcodes.hpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/opcodes.hpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
@@ -37,11 +37,13 @@ enum Opcodes {
|
|
|
|
|
macro(RegF) // Machine float register
|
|
|
|
|
macro(RegD) // Machine double register
|
|
|
|
|
macro(RegL) // Machine long register
|
|
|
|
|
+ macro(VecA) // Machine vectora register
|
|
|
|
|
macro(VecS) // Machine vectors register
|
|
|
|
|
macro(VecD) // Machine vectord register
|
|
|
|
|
macro(VecX) // Machine vectorx register
|
|
|
|
|
macro(VecY) // Machine vectory register
|
|
|
|
|
macro(VecZ) // Machine vectorz register
|
|
|
|
|
+ macro(RegVMask) // Vector mask/predicate register
|
|
|
|
|
macro(RegFlags) // Machine flags register
|
|
|
|
|
_last_machine_leaf, // Split between regular opcodes and machine
|
|
|
|
|
#include "classes.hpp"
|
|
|
|
|
diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp
|
|
|
|
|
index d572ac9fe..3514b37bc 100644
|
|
|
|
|
--- a/src/hotspot/share/opto/postaloc.cpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/postaloc.cpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
@@ -266,9 +266,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
|
|
|
|
|
Node *val = skip_copies(n->in(k));
|
|
|
|
|
if (val == x) return blk_adjust; // No progress?
|
|
|
|
|
|
|
|
|
|
- int n_regs = RegMask::num_registers(val->ideal_reg());
|
|
|
|
|
uint val_idx = _lrg_map.live_range_id(val);
|
|
|
|
|
OptoReg::Name val_reg = lrgs(val_idx).reg();
|
|
|
|
|
+ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
|
|
|
|
|
|
|
|
|
|
// See if it happens to already be in the correct register!
|
|
|
|
|
// (either Phi's direct register, or the common case of the name
|
|
|
|
|
@@ -305,8 +305,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Node *vv = value[reg];
|
|
|
|
|
+ // For scalable register, number of registers may be inconsistent between
|
|
|
|
|
+ // "val_reg" and "reg". For example, when "val" resides in register
|
|
|
|
|
+ // but "reg" is located in stack.
|
|
|
|
|
+ if (lrgs(val_idx).is_scalable()) {
|
|
|
|
|
+ assert(val->ideal_reg() == Op_VecA, "scalable vector register");
|
|
|
|
|
+ if (OptoReg::is_stack(reg)) {
|
|
|
|
|
+ n_regs = lrgs(val_idx).scalable_reg_slots();
|
|
|
|
|
+ } else {
|
|
|
|
|
+ n_regs = RegMask::SlotsPerVecA;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
|
|
|
|
|
- uint last = (n_regs-1); // Looking for the last part of a set
|
|
|
|
|
+ uint last;
|
|
|
|
|
+ if (lrgs(val_idx).is_scalable()) {
|
|
|
|
|
+ assert(val->ideal_reg() == Op_VecA, "scalable vector register");
|
|
|
|
|
+ // For scalable vector register, regmask is always SlotsPerVecA bits aligned
|
|
|
|
|
+ last = RegMask::SlotsPerVecA - 1;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ last = (n_regs-1); // Looking for the last part of a set
|
|
|
|
|
+ }
|
|
|
|
|
if ((reg&last) != last) continue; // Wrong part of a set
|
|
|
|
|
if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
|
|
|
|
|
}
|
|
|
|
|
@@ -591,7 +609,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
|
|
|
|
uint k;
|
|
|
|
|
Node *phi = block->get_node(j);
|
|
|
|
|
uint pidx = _lrg_map.live_range_id(phi);
|
|
|
|
|
- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
|
|
|
|
|
+ OptoReg::Name preg = lrgs(pidx).reg();
|
|
|
|
|
|
|
|
|
|
// Remove copies remaining on edges. Check for junk phi.
|
|
|
|
|
Node *u = NULL;
|
|
|
|
|
@@ -619,7 +637,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
|
|
|
|
if( pidx ) {
|
|
|
|
|
value.map(preg,phi);
|
|
|
|
|
regnd.map(preg,phi);
|
|
|
|
|
- int n_regs = RegMask::num_registers(phi->ideal_reg());
|
|
|
|
|
+ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
|
|
|
|
|
for (int l = 1; l < n_regs; l++) {
|
|
|
|
|
OptoReg::Name preg_lo = OptoReg::add(preg,-l);
|
|
|
|
|
value.map(preg_lo,phi);
|
|
|
|
|
@@ -663,7 +681,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
|
|
|
|
regnd.map(ureg, def);
|
|
|
|
|
// Record other half of doubles
|
|
|
|
|
uint def_ideal_reg = def->ideal_reg();
|
|
|
|
|
- int n_regs = RegMask::num_registers(def_ideal_reg);
|
|
|
|
|
+ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
|
|
|
|
|
for (int l = 1; l < n_regs; l++) {
|
|
|
|
|
OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
|
|
|
|
|
if (!value[ureg_lo] &&
|
|
|
|
|
@@ -707,7 +725,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint n_ideal_reg = n->ideal_reg();
|
|
|
|
|
- int n_regs = RegMask::num_registers(n_ideal_reg);
|
|
|
|
|
+ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
|
|
|
|
|
if (n_regs == 1) {
|
|
|
|
|
// If Node 'n' does not change the value mapped by the register,
|
|
|
|
|
// then 'n' is a useless copy. Do not update the register->node
|
|
|
|
|
diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp
|
|
|
|
|
index 2e04c42eb..dd9b5476b 100644
|
|
|
|
|
--- a/src/hotspot/share/opto/regmask.cpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/regmask.cpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
@@ -24,6 +24,7 @@
|
|
|
|
|
|
|
|
|
|
#include "precompiled.hpp"
|
|
|
|
|
#include "opto/ad.hpp"
|
|
|
|
|
+#include "opto/chaitin.hpp"
|
|
|
|
|
#include "opto/compile.hpp"
|
|
|
|
|
#include "opto/matcher.hpp"
|
|
|
|
|
#include "opto/node.hpp"
|
|
|
|
|
@@ -116,30 +117,47 @@ const RegMask RegMask::Empty(
|
|
|
|
|
|
|
|
|
|
//=============================================================================
|
|
|
|
|
bool RegMask::is_vector(uint ireg) {
|
|
|
|
|
- return (ireg == Op_VecS || ireg == Op_VecD ||
|
|
|
|
|
+ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
|
|
|
|
|
ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int RegMask::num_registers(uint ireg) {
|
|
|
|
|
switch(ireg) {
|
|
|
|
|
case Op_VecZ:
|
|
|
|
|
- return 16;
|
|
|
|
|
+ return SlotsPerVecZ;
|
|
|
|
|
case Op_VecY:
|
|
|
|
|
- return 8;
|
|
|
|
|
+ return SlotsPerVecY;
|
|
|
|
|
case Op_VecX:
|
|
|
|
|
- return 4;
|
|
|
|
|
+ return SlotsPerVecX;
|
|
|
|
|
case Op_VecD:
|
|
|
|
|
+ return SlotsPerVecD;
|
|
|
|
|
case Op_RegD:
|
|
|
|
|
case Op_RegL:
|
|
|
|
|
#ifdef _LP64
|
|
|
|
|
case Op_RegP:
|
|
|
|
|
#endif
|
|
|
|
|
return 2;
|
|
|
|
|
+ case Op_VecA:
|
|
|
|
|
+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
|
|
|
|
|
+ return SlotsPerVecA;
|
|
|
|
|
}
|
|
|
|
|
// Op_VecS and the rest ideal registers.
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+int RegMask::num_registers(uint ireg, LRG &lrg) {
|
|
|
|
|
+ int n_regs = num_registers(ireg);
|
|
|
|
|
+
|
|
|
|
|
+ // assigned is OptoReg which is selected by register allocator
|
|
|
|
|
+ OptoReg::Name assigned = lrg.reg();
|
|
|
|
|
+ assert(OptoReg::is_valid(assigned), "should be valid opto register");
|
|
|
|
|
+
|
|
|
|
|
+ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
|
|
|
|
|
+ n_regs = lrg.scalable_reg_slots();
|
|
|
|
|
+ }
|
|
|
|
|
+ return n_regs;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
//------------------------------find_first_pair--------------------------------
|
|
|
|
|
// Find the lowest-numbered register pair in the mask. Return the
|
|
|
|
|
// HIGHEST register number in the pair, or BAD if no pairs.
|
|
|
|
|
@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+// Check that whether given reg number with size is valid
|
|
|
|
|
+// for current regmask, where reg is the highest number.
|
|
|
|
|
+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
|
|
|
|
|
+ for (int i = 0; i < size; i++) {
|
|
|
|
|
+ if (!Member(reg - i)) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return true;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
// only indicies of power 2 are accessed, so index 3 is only filled in for storage.
|
|
|
|
|
static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
|
|
|
|
|
//------------------------------find_first_set---------------------------------
|
|
|
|
|
// Find the lowest-numbered register set in the mask. Return the
|
|
|
|
|
// HIGHEST register number in the set, or BAD if no sets.
|
|
|
|
|
// Works also for size 1.
|
|
|
|
|
-OptoReg::Name RegMask::find_first_set(const int size) const {
|
|
|
|
|
- verify_sets(size);
|
|
|
|
|
+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
|
|
|
|
|
+ if (lrg.is_scalable()) {
|
|
|
|
|
+ // For scalable vector register, regmask is SlotsPerVecA bits aligned.
|
|
|
|
|
+ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
|
|
|
|
|
+ }
|
|
|
|
|
for (int i = 0; i < RM_SIZE; i++) {
|
|
|
|
|
if (_A[i]) { // Found some bits
|
|
|
|
|
int bit = _A[i] & -_A[i]; // Extract low bit
|
|
|
|
|
@@ -325,12 +359,16 @@ bool RegMask::is_aligned_sets(const int size) const {
|
|
|
|
|
while (bits) { // Check bits for pairing
|
|
|
|
|
int bit = bits & -bits; // Extract low bit
|
|
|
|
|
// Low bit is not odd means its mis-aligned.
|
|
|
|
|
- if ((bit & low_bits_mask) == 0) return false;
|
|
|
|
|
+ if ((bit & low_bits_mask) == 0) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
// Do extra work since (bit << size) may overflow.
|
|
|
|
|
int hi_bit = bit << (size-1); // high bit
|
|
|
|
|
int set = hi_bit + ((hi_bit-1) & ~(bit-1));
|
|
|
|
|
// Check for aligned adjacent bits in this set
|
|
|
|
|
- if ((bits & set) != set) return false;
|
|
|
|
|
+ if ((bits & set) != set) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
bits -= set; // Remove this set
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
|
|
|
|
|
index c64d08795..b733b87ad 100644
|
|
|
|
|
--- a/src/hotspot/share/opto/regmask.hpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/regmask.hpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
@@ -28,6 +28,8 @@
|
|
|
|
|
#include "code/vmreg.hpp"
|
|
|
|
|
#include "opto/optoreg.hpp"
|
|
|
|
|
|
|
|
|
|
+class LRG;
|
|
|
|
|
+
|
|
|
|
|
// Some fun naming (textual) substitutions:
|
|
|
|
|
//
|
|
|
|
|
// RegMask::get_low_elem() ==> RegMask::find_first_elem()
|
|
|
|
|
@@ -95,11 +97,13 @@ public:
|
|
|
|
|
// requirement is internal to the allocator, and independent of any
|
|
|
|
|
// particular platform.
|
|
|
|
|
enum { SlotsPerLong = 2,
|
|
|
|
|
+ SlotsPerVecA = 8,
|
|
|
|
|
SlotsPerVecS = 1,
|
|
|
|
|
SlotsPerVecD = 2,
|
|
|
|
|
SlotsPerVecX = 4,
|
|
|
|
|
SlotsPerVecY = 8,
|
|
|
|
|
- SlotsPerVecZ = 16 };
|
|
|
|
|
+ SlotsPerVecZ = 16,
|
|
|
|
|
+ };
|
|
|
|
|
|
|
|
|
|
// A constructor only used by the ADLC output. All mask fields are filled
|
|
|
|
|
// in directly. Calls to this look something like RM(1,2,3,4);
|
|
|
|
|
@@ -204,10 +208,14 @@ public:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // Check that whether given reg number with size is valid
|
|
|
|
|
+ // for current regmask, where reg is the highest number.
|
|
|
|
|
+ bool is_valid_reg(OptoReg::Name reg, const int size) const;
|
|
|
|
|
+
|
|
|
|
|
// Find the lowest-numbered register set in the mask. Return the
|
|
|
|
|
// HIGHEST register number in the set, or BAD if no sets.
|
|
|
|
|
// Assert that the mask contains only bit sets.
|
|
|
|
|
- OptoReg::Name find_first_set(const int size) const;
|
|
|
|
|
+ OptoReg::Name find_first_set(LRG &lrg, const int size) const;
|
|
|
|
|
|
|
|
|
|
// Clear out partial bits; leave only aligned adjacent bit sets of size.
|
|
|
|
|
void clear_to_sets(const int size);
|
|
|
|
|
@@ -226,6 +234,7 @@ public:
|
|
|
|
|
|
|
|
|
|
static bool is_vector(uint ireg);
|
|
|
|
|
static int num_registers(uint ireg);
|
|
|
|
|
+ static int num_registers(uint ireg, LRG &lrg);
|
|
|
|
|
|
|
|
|
|
// Fast overlap test. Non-zero if any registers in common.
|
|
|
|
|
int overlap( const RegMask &rm ) const {
|
|
|
|
|
diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 92f70b77d..ed67928f5 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/share/opto/superword.cpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/superword.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz
|
2020-12-24 15:35:16 +08:00
|
|
|
//------------------------------transform_loop---------------------------
|
|
|
|
|
void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
|
|
|
|
|
assert(UseSuperWord, "should be");
|
|
|
|
|
- // Do vectors exist on this architecture?
|
|
|
|
|
- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
|
|
|
|
|
+ // SuperWord only works with power of two vector sizes.
|
|
|
|
|
+ int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
|
|
|
|
|
+ if (vector_width < 2 || !is_power_of_2(vector_width)) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
|
|
assert(lpt->_head->is_CountedLoop(), "must be");
|
|
|
|
|
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
|
|
|
|
|
diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 223b7a1c6..1b46cb452 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/share/opto/type.cpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/type.cpp
|
|
|
|
|
@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
|
|
|
|
|
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
|
|
|
|
|
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
|
|
|
|
|
#else // all other
|
|
|
|
|
+ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
|
|
|
|
|
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
|
|
|
|
|
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
|
|
|
|
|
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) {
|
2020-12-24 15:35:16 +08:00
|
|
|
// get_zero_type() should not happen for T_CONFLICT
|
|
|
|
|
_zero_type[T_CONFLICT]= NULL;
|
|
|
|
|
|
|
|
|
|
+ if (Matcher::supports_scalable_vector()) {
|
|
|
|
|
+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
// Vector predefined types, it needs initialized _const_basic_type[].
|
|
|
|
|
if (Matcher::vector_size_supported(T_BYTE,4)) {
|
|
|
|
|
TypeVect::VECTS = TypeVect::make(T_BYTE,4);
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -671,6 +676,8 @@ void Type::Initialize_shared(Compile* current) {
|
2020-12-24 15:35:16 +08:00
|
|
|
if (Matcher::vector_size_supported(T_FLOAT,16)) {
|
|
|
|
|
TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
|
|
|
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ mreg2type[Op_VecA] = TypeVect::VECTA;
|
|
|
|
|
mreg2type[Op_VecS] = TypeVect::VECTS;
|
|
|
|
|
mreg2type[Op_VecD] = TypeVect::VECTD;
|
|
|
|
|
mreg2type[Op_VecX] = TypeVect::VECTX;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -990,6 +997,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
Bad, // Tuple - handled in v-call
|
|
|
|
|
Bad, // Array - handled in v-call
|
|
|
|
|
+ Bad, // VectorA - handled in v-call
|
|
|
|
|
Bad, // VectorS - handled in v-call
|
|
|
|
|
Bad, // VectorD - handled in v-call
|
|
|
|
|
Bad, // VectorX - handled in v-call
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1890,7 +1898,6 @@ const TypeTuple *TypeTuple::LONG_PAIR;
|
2020-12-24 15:35:16 +08:00
|
|
|
const TypeTuple *TypeTuple::INT_CC_PAIR;
|
|
|
|
|
const TypeTuple *TypeTuple::LONG_CC_PAIR;
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
|
//------------------------------make-------------------------------------------
|
|
|
|
|
// Make a TypeTuple from the range of a method signature
|
|
|
|
|
const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2262,6 +2269,7 @@ bool TypeAry::ary_must_be_exact() const {
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
//==============================TypeVect=======================================
|
|
|
|
|
// Convenience common pre-built types.
|
|
|
|
|
+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
|
|
|
|
|
const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors
|
|
|
|
|
const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
|
|
|
|
|
const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2272,10 +2280,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
|
2020-12-24 15:35:16 +08:00
|
|
|
const TypeVect* TypeVect::make(const Type *elem, uint length) {
|
|
|
|
|
BasicType elem_bt = elem->array_element_basic_type();
|
|
|
|
|
assert(is_java_primitive(elem_bt), "only primitive types in vector");
|
|
|
|
|
- assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
|
|
|
|
|
assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
|
|
|
|
|
int size = length * type2aelembytes(elem_bt);
|
|
|
|
|
switch (Matcher::vector_ideal_reg(size)) {
|
|
|
|
|
+ case Op_VecA:
|
|
|
|
|
+ return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
|
|
|
|
|
case Op_VecS:
|
|
|
|
|
return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
|
|
|
|
|
case Op_RegL:
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2307,7 +2316,7 @@ const Type *TypeVect::xmeet( const Type *t ) const {
|
2020-12-24 15:35:16 +08:00
|
|
|
|
|
|
|
|
default: // All else is a mistake
|
|
|
|
|
typerr(t);
|
|
|
|
|
-
|
|
|
|
|
+ case VectorA:
|
|
|
|
|
case VectorS:
|
|
|
|
|
case VectorD:
|
|
|
|
|
case VectorX:
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -2362,6 +2371,8 @@ bool TypeVect::empty(void) const {
|
2020-12-24 15:35:16 +08:00
|
|
|
#ifndef PRODUCT
|
|
|
|
|
void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
|
|
|
|
|
switch (base()) {
|
|
|
|
|
+ case VectorA:
|
|
|
|
|
+ st->print("vectora["); break;
|
|
|
|
|
case VectorS:
|
|
|
|
|
st->print("vectors["); break;
|
|
|
|
|
case VectorD:
|
|
|
|
|
diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index a7eec281e..6787b947d 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/share/opto/type.hpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/type.hpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
@@ -53,6 +53,7 @@ class TypeNarrowKlass;
|
|
|
|
|
class TypeAry;
|
|
|
|
|
class TypeTuple;
|
|
|
|
|
class TypeVect;
|
|
|
|
|
+class TypeVectA;
|
|
|
|
|
class TypeVectS;
|
|
|
|
|
class TypeVectD;
|
|
|
|
|
class TypeVectX;
|
|
|
|
|
@@ -87,6 +88,7 @@ public:
|
|
|
|
|
|
|
|
|
|
Tuple, // Method signature or object layout
|
|
|
|
|
Array, // Array types
|
|
|
|
|
+ VectorA, // (Scalable) Vector types for vector length agnostic
|
|
|
|
|
VectorS, // 32bit Vector types
|
|
|
|
|
VectorD, // 64bit Vector types
|
|
|
|
|
VectorX, // 128bit Vector types
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -758,6 +760,7 @@ public:
|
2020-12-24 15:35:16 +08:00
|
|
|
virtual const Type *xmeet( const Type *t) const;
|
|
|
|
|
virtual const Type *xdual() const; // Compute dual right now.
|
|
|
|
|
|
|
|
|
|
+ static const TypeVect *VECTA;
|
|
|
|
|
static const TypeVect *VECTS;
|
|
|
|
|
static const TypeVect *VECTD;
|
|
|
|
|
static const TypeVect *VECTX;
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -769,6 +772,11 @@ public:
|
2020-12-24 15:35:16 +08:00
|
|
|
#endif
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
+class TypeVectA : public TypeVect {
|
|
|
|
|
+ friend class TypeVect;
|
|
|
|
|
+ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
class TypeVectS : public TypeVect {
|
|
|
|
|
friend class TypeVect;
|
|
|
|
|
TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -1619,12 +1627,12 @@ inline const TypeAry *Type::is_ary() const {
|
2020-12-24 15:35:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline const TypeVect *Type::is_vect() const {
|
|
|
|
|
- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
|
|
|
|
|
+ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
|
|
|
|
|
return (TypeVect*)this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline const TypeVect *Type::isa_vect() const {
|
|
|
|
|
- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
|
|
|
|
|
+ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline const TypePtr *Type::is_ptr() const {
|
|
|
|
|
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
|
2021-08-13 14:54:30 +08:00
|
|
|
index 1f2cf2c64..6867177c1 100644
|
2020-12-24 15:35:16 +08:00
|
|
|
--- a/src/hotspot/share/opto/vectornode.cpp
|
|
|
|
|
+++ b/src/hotspot/share/opto/vectornode.cpp
|
|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
/*
|
|
|
|
|
- * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+ * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
*
|
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
|
2020-12-24 15:35:16 +08:00
|
|
|
(vlen > 1) && is_power_of_2(vlen) &&
|
|
|
|
|
Matcher::vector_size_supported(bt, vlen)) {
|
|
|
|
|
int vopc = VectorNode::opcode(opc, bt);
|
|
|
|
|
- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
|
|
|
|
|
+ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt);
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2021-08-13 14:54:30 +08:00
|
|
|
@@ -653,7 +653,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
|
2020-12-24 15:35:16 +08:00
|
|
|
(vlen > 1) && is_power_of_2(vlen) &&
|
|
|
|
|
Matcher::vector_size_supported(bt, vlen)) {
|
|
|
|
|
int vopc = ReductionNode::opcode(opc, bt);
|
|
|
|
|
- return vopc != opc && Matcher::match_rule_supported(vopc);
|
|
|
|
|
+ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java
|
|
|
|
|
new file mode 100644
|
|
|
|
|
index 000000000..dc15ca800
|
|
|
|
|
--- /dev/null
|
|
|
|
|
+++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java
|
|
|
|
|
@@ -0,0 +1,128 @@
|
|
|
|
|
+/*
|
|
|
|
|
+* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+* Copyright (c) 2020, Arm Limited. All rights reserved.
|
|
|
|
|
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
+*
|
|
|
|
|
+* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
+* under the terms of the GNU General Public License version 2 only, as
|
|
|
|
|
+* published by the Free Software Foundation.
|
|
|
|
|
+*
|
|
|
|
|
+* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
|
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
|
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
|
+* version 2 for more details (a copy is included in the LICENSE file that
|
|
|
|
|
+* accompanied this code).
|
|
|
|
|
+*
|
|
|
|
|
+* You should have received a copy of the GNU General Public License version
|
|
|
|
|
+* 2 along with this work; if not, write to the Free Software Foundation,
|
|
|
|
|
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
+*
|
|
|
|
|
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
|
|
|
+* or visit www.oracle.com if you need additional information or have any
|
|
|
|
|
+* questions.
|
|
|
|
|
+*
|
|
|
|
|
+*/
|
|
|
|
|
+
|
|
|
|
|
+/**
|
|
|
|
|
+ * @test
|
|
|
|
|
+ *
|
|
|
|
|
+ * @requires os.arch == "aarch64" & vm.compiler2.enabled
|
|
|
|
|
+ * @summary Verify VM SVE checking behavior
|
|
|
|
|
+ * @library /test/lib
|
|
|
|
|
+ * @run main/othervm/native compiler.c2.aarch64.TestSVEWithJNI
|
|
|
|
|
+ *
|
|
|
|
|
+ */
|
|
|
|
|
+
|
|
|
|
|
+package compiler.c2.aarch64;
|
|
|
|
|
+
|
|
|
|
|
+import java.util.ArrayList;
|
|
|
|
|
+import java.util.Collections;
|
|
|
|
|
+import java.util.List;
|
|
|
|
|
+import jdk.test.lib.process.ProcessTools;
|
|
|
|
|
+import jdk.test.lib.process.OutputAnalyzer;
|
|
|
|
|
+
|
|
|
|
|
+public class TestSVEWithJNI {
|
|
|
|
|
+ static {
|
|
|
|
|
+ System.loadLibrary("TestSVEWithJNI");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static final int EXIT_CODE = 99;
|
|
|
|
|
+ // Returns a nonnegative on success, or a negative value on error.
|
|
|
|
|
+ public static native int setVectorLength(int arg);
|
|
|
|
|
+ // Returns a nonnegative value on success, or a negative value on error.
|
|
|
|
|
+ public static native int getVectorLength();
|
|
|
|
|
+
|
|
|
|
|
+ public static final String MSG = "Current Vector Size: ";
|
|
|
|
|
+ public static void testNormal() {
|
|
|
|
|
+ int vlen = getVectorLength();
|
|
|
|
|
+ System.out.println(MSG + vlen);
|
|
|
|
|
+ // Should be fine if no vector length changed.
|
|
|
|
|
+ if (setVectorLength(vlen) < 0) {
|
|
|
|
|
+ throw new Error("Error in setting vector length.");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public static void testAbort() {
|
|
|
|
|
+ int vlen = getVectorLength();
|
|
|
|
|
+ if (vlen <= 16) {
|
|
|
|
|
+ throw new Error("Error: unsupported vector length.");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (setVectorLength(16) < 0) {
|
|
|
|
|
+ throw new Error("Error: setting vector length failed.");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public static ProcessBuilder createProcessBuilder(String [] args, String mode) {
|
|
|
|
|
+ List<String> vmopts = new ArrayList<>();
|
|
|
|
|
+ String testjdkPath = System.getProperty("test.jdk");
|
|
|
|
|
+ Collections.addAll(vmopts, "-Dtest.jdk=" + testjdkPath);
|
|
|
|
|
+ Collections.addAll(vmopts, args);
|
|
|
|
|
+ Collections.addAll(vmopts, TestSVEWithJNI.class.getName(), mode);
|
|
|
|
|
+ return ProcessTools.createJavaProcessBuilder(vmopts.toArray(new String[vmopts.size()]));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public static void main(String [] args) throws Exception {
|
|
|
|
|
+ if (args.length == 0) {
|
|
|
|
|
+ int vlen = getVectorLength();
|
|
|
|
|
+ if (vlen < 0) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ String [][] testOpts = {
|
|
|
|
|
+ {"-Xint", "-XX:UseSVE=1"},
|
|
|
|
|
+ {"-Xcomp", "-XX:UseSVE=1"},
|
|
|
|
|
+ };
|
|
|
|
|
+ ProcessBuilder pb;
|
|
|
|
|
+ OutputAnalyzer output;
|
|
|
|
|
+ for (String [] opts : testOpts) {
|
|
|
|
|
+ pb = createProcessBuilder(opts, "normal");
|
|
|
|
|
+ output = new OutputAnalyzer(pb.start());
|
|
|
|
|
+ output.shouldHaveExitValue(EXIT_CODE);
|
|
|
|
|
+
|
|
|
|
|
+ pb = createProcessBuilder(opts, "abort");
|
|
|
|
|
+ output = new OutputAnalyzer(pb.start());
|
|
|
|
|
+ output.shouldNotHaveExitValue(EXIT_CODE);
|
|
|
|
|
+ output.shouldMatch("(error|Error|ERROR)");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Verify MaxVectorSize
|
|
|
|
|
+
|
|
|
|
|
+ // Any SVE architecture should support 128-bit vector size.
|
|
|
|
|
+ pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=16"}, "normal");
|
|
|
|
|
+ output = new OutputAnalyzer(pb.start());
|
|
|
|
|
+ output.shouldHaveExitValue(EXIT_CODE);
|
|
|
|
|
+ output.shouldContain(MSG + 16);
|
|
|
|
|
+
|
|
|
|
|
+ // An unsupported large vector size value.
|
|
|
|
|
+ pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=512"}, "normal");
|
|
|
|
|
+ output = new OutputAnalyzer(pb.start());
|
|
|
|
|
+ output.shouldHaveExitValue(EXIT_CODE);
|
|
|
|
|
+ output.shouldContain("warning");
|
|
|
|
|
+ } else if (args[0].equals("normal")) {
|
|
|
|
|
+ testNormal();
|
|
|
|
|
+ System.exit(EXIT_CODE);
|
|
|
|
|
+ } else if (args[0].equals("abort")) {
|
|
|
|
|
+ testAbort();
|
|
|
|
|
+ System.exit(EXIT_CODE);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c b/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c
|
|
|
|
|
new file mode 100644
|
|
|
|
|
index 000000000..0cb3ab0b5
|
|
|
|
|
--- /dev/null
|
|
|
|
|
+++ b/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c
|
|
|
|
|
@@ -0,0 +1,68 @@
|
|
|
|
|
+/*
|
|
|
|
|
+* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
+* Copyright (c) 2020, Arm Limited. All rights reserved.
|
|
|
|
|
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
|
+*
|
|
|
|
|
+* This code is free software; you can redistribute it and/or modify it
|
|
|
|
|
+* under the terms of the GNU General Public License version 2 only, as
|
|
|
|
|
+* published by the Free Software Foundation.
|
|
|
|
|
+*
|
|
|
|
|
+* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
|
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
|
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
|
+* version 2 for more details (a copy is included in the LICENSE file that
|
|
|
|
|
+* accompanied this code).
|
|
|
|
|
+*
|
|
|
|
|
+* You should have received a copy of the GNU General Public License version
|
|
|
|
|
+* 2 along with this work; if not, write to the Free Software Foundation,
|
|
|
|
|
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
+*
|
|
|
|
|
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
|
|
|
+* or visit www.oracle.com if you need additional information or have any
|
|
|
|
|
+* questions.
|
|
|
|
|
+*
|
|
|
|
|
+*/
|
|
|
|
|
+
|
|
|
|
|
+#ifdef __aarch64__
|
|
|
|
|
+
|
|
|
|
|
+#include <jni.h>
|
|
|
|
|
+#include <pthread.h>
|
|
|
|
|
+#include <stdio.h>
|
|
|
|
|
+#include <stdlib.h>
|
|
|
|
|
+#include <sys/prctl.h>
|
|
|
|
|
+#include <unistd.h>
|
|
|
|
|
+
|
|
|
|
|
+#ifndef PR_SVE_GET_VL
|
|
|
|
|
+// For old toolchains which do not have SVE related macros defined.
|
|
|
|
|
+#define PR_SVE_SET_VL 50
|
|
|
|
|
+#define PR_SVE_GET_VL 51
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
+int get_current_thread_vl() {
|
|
|
|
|
+ return prctl(PR_SVE_GET_VL);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+int set_current_thread_vl(unsigned long arg) {
|
|
|
|
|
+ return prctl(PR_SVE_SET_VL, arg);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+#ifdef __cplusplus
|
|
|
|
|
+extern "C" {
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
+JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_setVectorLength
|
|
|
|
|
+(JNIEnv * env, jclass clz, jint length) {
|
|
|
|
|
+ return set_current_thread_vl(length);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_getVectorLength
|
|
|
|
|
+(JNIEnv *env, jclass clz) {
|
|
|
|
|
+ return get_current_thread_vl();
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+#ifdef __cplusplus
|
|
|
|
|
+}
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
+#endif
|
|
|
|
|
--
|
2021-08-13 14:54:30 +08:00
|
|
|
2.19.0
|
2020-12-24 15:35:16 +08:00
|
|
|
|