diff --git a/8231441-3-AArch64-Initial-SVE-backend-support.patch b/8231441-3-AArch64-Initial-SVE-backend-support.patch index 3691f9d..3418659 100755 --- a/8231441-3-AArch64-Initial-SVE-backend-support.patch +++ b/8231441-3-AArch64-Initial-SVE-backend-support.patch @@ -1,8 +1,8 @@ diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk -index a39640526..2479853fa 100644 +index 2af2f9a..f23b972 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk -@@ -146,6 +146,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) +@@ -156,6 +156,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) ))) endif @@ -15,11 +15,283 @@ index a39640526..2479853fa 100644 ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ +diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py +index 31c6965..e621402 100644 +--- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py ++++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py +@@ -73,6 +73,48 @@ class GeneralRegisterOrSp(Register): + return self.astr() + else: + return self.astr("r") ++class SVEVectorRegister(FloatRegister): ++ def __str__(self): ++ return self.astr("z") ++ ++class SVEPRegister(Register): ++ def __str__(self): ++ return self.astr("p") ++ ++ def generate(self): ++ self.number = random.randint(0, 15) ++ return self ++ ++class SVEGoverningPRegister(Register): ++ def __str__(self): ++ return self.astr("p") ++ def generate(self): ++ self.number = random.randint(0, 7) ++ return self ++ ++class RegVariant(object): ++ def __init__(self, low, high): ++ self.number = random.randint(low, high) ++ ++ def astr(self): ++ nameMap = { ++ 0: ".b", ++ 1: ".h", ++ 2: ".s", ++ 3: ".d", ++ 4: ".q" ++ } ++ return nameMap.get(self.number) ++ ++ def cstr(self): ++ nameMap = { ++ 0: "__ B", ++ 1: "__ H", ++ 2: "__ S", ++ 3: "__ D", ++ 4: "__ Q" ++ } ++ return nameMap.get(self.number) + + class FloatZero(Operand): + +@@ -88,7 +130,10 @@ class OperandFactory: + 'w' : GeneralRegister, + 's' : FloatRegister, + 'd' : FloatRegister, +- 'z' : FloatZero} ++ 'z' : FloatZero, ++ 'p' : SVEPRegister, ++ 'P' : SVEGoverningPRegister, ++ 'Z' : SVEVectorRegister} + + @classmethod + def create(cls, mode): +@@ -834,6 +879,100 @@ class FloatInstruction(Instruction): + % tuple([Instruction.astr(self)] + + [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)])) + ++class SVEVectorOp(Instruction): ++ def __init__(self, args): ++ name = args[0] ++ regTypes = args[1] ++ regs = [] ++ for c in regTypes: ++ regs.append(OperandFactory.create(c).generate()) ++ self.reg = regs ++ self.numRegs = len(regs) ++ if regTypes[0] != "p" and regTypes[1] == 'P': ++ self._isPredicated = True ++ self._merge = "/m" ++ else: ++ self._isPredicated = False ++ self._merge ="" ++ ++ self._bitwiseop = False ++ if name[0] == 'f': ++ self._width = RegVariant(2, 3) ++ elif not self._isPredicated and (name in ["and", "eor", "orr", "bic"]): ++ self._width = RegVariant(3, 3) ++ self._bitwiseop = True ++ else: ++ self._width = RegVariant(0, 3) ++ if len(args) > 2: ++ self._dnm = args[2] ++ else: ++ self._dnm = None ++ Instruction.__init__(self, name) ++ ++ def cstr(self): ++ formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"]) ++ if self._bitwiseop: ++ width = [] ++ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"]) ++ else: ++ width = [self._width.cstr()] ++ return (formatStr ++ % tuple(["__ sve_" + self._name + "("] + ++ [str(self.reg[0])] + ++ width + ++ [str(self.reg[i]) for i in range(1, self.numRegs)])) ++ def astr(self): ++ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)]) ++ if self._dnm == 'dn': ++ formatStr += ", %s" ++ dnReg = [str(self.reg[0]) + self._width.astr()] ++ else: ++ dnReg = [] ++ ++ if self._isPredicated: ++ restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)] ++ else: ++ restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)] ++ return (formatStr ++ % tuple([Instruction.astr(self)] + ++ [str(self.reg[0]) + self._width.astr()] + ++ restRegs)) ++ def generate(self): ++ return self ++ ++class SVEReductionOp(Instruction): ++ def __init__(self, args): ++ name = args[0] ++ lowRegType = args[1] ++ self.reg = [] ++ Instruction.__init__(self, name) ++ self.reg.append(OperandFactory.create('s').generate()) ++ self.reg.append(OperandFactory.create('P').generate()) ++ self.reg.append(OperandFactory.create('Z').generate()) ++ self._width = RegVariant(lowRegType, 3) ++ def cstr(self): ++ return "__ sve_%s(%s, %s, %s, %s);" % (self.name(), ++ str(self.reg[0]), ++ self._width.cstr(), ++ str(self.reg[1]), ++ str(self.reg[2])) ++ def astr(self): ++ if self.name() == "uaddv": ++ dstRegName = "d" + str(self.reg[0].number) ++ else: ++ dstRegName = self._width.astr()[1] + str(self.reg[0].number) ++ formatStr = "%s %s, %s, %s" ++ if self.name() == "fadda": ++ formatStr += ", %s" ++ moreReg = [dstRegName] ++ else: ++ moreReg = [] ++ return formatStr % tuple([self.name()] + ++ [dstRegName] + ++ [str(self.reg[1])] + ++ moreReg + ++ [str(self.reg[2]) + self._width.astr()]) ++ + class LdStSIMDOp(Instruction): + def __init__(self, args): + self._name, self.regnum, self.arrangement, self.addresskind = args +@@ -1120,7 +1259,42 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", + ["mov", "__ mov(v1, __ T2S, 1, zr);", "mov\tv1.s[1], wzr"], + ["mov", "__ mov(v1, __ T4H, 2, zr);", "mov\tv1.h[2], wzr"], + ["mov", "__ mov(v1, __ T8B, 3, zr);", "mov\tv1.b[3], wzr"], +- ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]]) ++ ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"], ++ # SVE instructions ++ ["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"], ++ ["inc", "__ sve_inc(r0, __ S);", "incw\tx0"], ++ ["dec", "__ sve_dec(r1, __ H);", "dech\tx1"], ++ ["lsl", "__ sve_lsl(z0, __ B, z1, 7);", "lsl\tz0.b, z1.b, #7"], ++ ["lsl", "__ sve_lsl(z21, __ H, z1, 15);", "lsl\tz21.h, z1.h, #15"], ++ ["lsl", "__ sve_lsl(z0, __ S, z1, 31);", "lsl\tz0.s, z1.s, #31"], ++ ["lsl", "__ sve_lsl(z0, __ D, z1, 63);", "lsl\tz0.d, z1.d, #63"], ++ ["lsr", "__ sve_lsr(z0, __ B, z1, 7);", "lsr\tz0.b, z1.b, #7"], ++ ["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"], ++ ["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"], ++ ["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"], ++ ["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"], ++ ["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"], ++ ["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"], ++ ["dup", "__ sve_dup(z0, __ B, 127);", "dup\tz0.b, 127"], ++ ["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"], ++ ["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"], ++ ["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"], ++ ["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"], ++ ["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"], ++ ["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"], ++ ["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"], ++ ["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"], ++ ["ld1d", "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));", "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"], ++ ["st1b", "__ sve_st1b(z22, __ B, p6, Address(sp));", "st1b\t{z22.b}, p6, [sp]"], ++ ["st1b", "__ sve_st1b(z31, __ B, p7, Address(sp, -8));", "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"], ++ ["st1w", "__ sve_st1w(z0, __ S, p1, Address(r0, 7));", "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"], ++ ["st1b", "__ sve_st1b(z0, __ B, p2, Address(sp, r1));", "st1b\t{z0.b}, p2, [sp, x1]"], ++ ["st1h", "__ sve_st1h(z0, __ H, p3, Address(sp, r8));", "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"], ++ ["st1d", "__ sve_st1d(z0, __ D, p4, Address(r0, r8));", "st1d\t{z0.d}, p4, [x0, x8, LSL #3]"], ++ ["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"], ++ ["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"], ++ ["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"], ++]) + + print "\n// FloatImmediateOp" + for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", +@@ -1145,6 +1319,50 @@ for size in ("x", "w"): + ["ldumin", "ldumin", size, suffix], + ["ldumax", "ldumax", size, suffix]]); + ++ ++generate(SVEVectorOp, [["add", "ZZZ"], ++ ["sub", "ZZZ"], ++ ["fadd", "ZZZ"], ++ ["fmul", "ZZZ"], ++ ["fsub", "ZZZ"], ++ ["abs", "ZPZ"], ++ ["add", "ZPZ", "dn"], ++ ["asr", "ZPZ", "dn"], ++ ["cnt", "ZPZ"], ++ ["lsl", "ZPZ", "dn"], ++ ["lsr", "ZPZ", "dn"], ++ ["mul", "ZPZ", "dn"], ++ ["neg", "ZPZ"], ++ ["not", "ZPZ"], ++ ["smax", "ZPZ", "dn"], ++ ["smin", "ZPZ", "dn"], ++ ["sub", "ZPZ", "dn"], ++ ["fabs", "ZPZ"], ++ ["fadd", "ZPZ", "dn"], ++ ["fdiv", "ZPZ", "dn"], ++ ["fmax", "ZPZ", "dn"], ++ ["fmin", "ZPZ", "dn"], ++ ["fmul", "ZPZ", "dn"], ++ ["fneg", "ZPZ"], ++ ["frintm", "ZPZ"], ++ ["frintn", "ZPZ"], ++ ["frintp", "ZPZ"], ++ ["fsqrt", "ZPZ"], ++ ["fsub", "ZPZ", "dn"], ++ ["fmla", "ZPZZ"], ++ ["fmls", "ZPZZ"], ++ ["fnmla", "ZPZZ"], ++ ["fnmls", "ZPZZ"], ++ ["mla", "ZPZZ"], ++ ["mls", "ZPZZ"], ++ ["and", "ZZZ"], ++ ["eor", "ZZZ"], ++ ["orr", "ZZZ"], ++ ]) ++ ++generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0], ++ ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]]) ++ + print "\n __ bind(forth);" + outfile.write("forth:\n") + +@@ -1153,8 +1371,8 @@ outfile.close() + import subprocess + import sys + +-# compile for 8.1 because of lse atomics +-subprocess.check_call([AARCH64_AS, "-march=armv8.1-a", "aarch64ops.s", "-o", "aarch64ops.o"]) ++# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension. ++subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) + + print + print "/*", diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad -index 64985e498..fa434df7d 100644 +index f126488..8a92ff2 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad -@@ -1991,6 +1991,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { +@@ -2006,6 +2006,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { // branch if we need to invalidate the method later __ nop(); @@ -30,7 +302,7 @@ index 64985e498..fa434df7d 100644 int bangsize = C->bang_size_in_bytes(); if (C->need_stack_bang(bangsize) && UseStackBanging) __ generate_stack_overflow_check(bangsize); -@@ -2157,8 +2161,28 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo +@@ -2172,8 +2176,28 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo if (bottom_type()->isa_vect() != NULL) { uint ireg = ideal_reg(); @@ -61,7 +333,7 @@ index 64985e498..fa434df7d 100644 MacroAssembler _masm(cbuf); assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { -@@ -2442,15 +2466,28 @@ const bool Matcher::match_rule_supported(int opcode) { +@@ -2452,15 +2476,28 @@ const bool Matcher::match_rule_supported(int opcode) { return true; // Per default match rules are supported. } @@ -99,19 +371,20 @@ index 64985e498..fa434df7d 100644 } const bool Matcher::has_predicated_vectors(void) { -@@ -3691,6 +3728,11 @@ encode %{ - if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); +@@ -3812,6 +3849,12 @@ encode %{ return; -+ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + } + } ++ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + // Only non uncommon_trap calls need to reinitialize ptrue. + if (uncommon_trap_request() == 0) { + __ reinitialize_ptrue(); + } - } - // Emit stub for static call - address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); -@@ -3708,6 +3750,8 @@ encode %{ ++ } + %} + + enc_class aarch64_enc_java_dynamic_call(method meth) %{ +@@ -3821,6 +3864,8 @@ encode %{ if (call == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; @@ -120,7 +393,7 @@ index 64985e498..fa434df7d 100644 } %} -@@ -3744,6 +3788,9 @@ encode %{ +@@ -3857,6 +3902,9 @@ encode %{ __ bind(retaddr); __ add(sp, sp, 2 * wordSize); } @@ -130,7 +403,7 @@ index 64985e498..fa434df7d 100644 %} enc_class aarch64_enc_rethrow() %{ -@@ -3753,6 +3800,11 @@ encode %{ +@@ -3866,6 +3914,11 @@ encode %{ enc_class aarch64_enc_ret() %{ MacroAssembler _masm(&cbuf); @@ -142,7 +415,7 @@ index 64985e498..fa434df7d 100644 __ ret(lr); %} -@@ -4494,6 +4546,41 @@ operand immLoffset16() +@@ -4607,6 +4660,41 @@ operand immLoffset16() interface(CONST_INTER); %} @@ -184,7 +457,7 @@ index 64985e498..fa434df7d 100644 // 32 bit integer valid for add sub immediate operand immIAddSub() %{ -@@ -16038,7 +16125,7 @@ instruct loadV8(vecD dst, vmem8 mem) +@@ -16433,7 +16521,7 @@ instruct loadV8(vecD dst, vmem8 mem) // Load Vector (128 bits) instruct loadV16(vecX dst, vmem16 mem) %{ @@ -193,7 +466,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (LoadVector mem)); ins_cost(4 * INSN_COST); format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} -@@ -16094,7 +16181,7 @@ instruct replicate8B(vecD dst, iRegIorL2I src) +@@ -16489,7 +16577,7 @@ instruct replicate8B(vecD dst, iRegIorL2I src) instruct replicate16B(vecX dst, iRegIorL2I src) %{ @@ -202,7 +475,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateB src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (16B)" %} -@@ -16119,7 +16206,7 @@ instruct replicate8B_imm(vecD dst, immI con) +@@ -16514,7 +16602,7 @@ instruct replicate8B_imm(vecD dst, immI con) instruct replicate16B_imm(vecX dst, immI con) %{ @@ -211,7 +484,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateB con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(16B)" %} -@@ -16144,7 +16231,7 @@ instruct replicate4S(vecD dst, iRegIorL2I src) +@@ -16539,7 +16627,7 @@ instruct replicate4S(vecD dst, iRegIorL2I src) instruct replicate8S(vecX dst, iRegIorL2I src) %{ @@ -220,7 +493,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateS src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (8S)" %} -@@ -16169,7 +16256,7 @@ instruct replicate4S_imm(vecD dst, immI con) +@@ -16564,7 +16652,7 @@ instruct replicate4S_imm(vecD dst, immI con) instruct replicate8S_imm(vecX dst, immI con) %{ @@ -229,7 +502,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateS con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(8H)" %} -@@ -16193,7 +16280,7 @@ instruct replicate2I(vecD dst, iRegIorL2I src) +@@ -16588,7 +16676,7 @@ instruct replicate2I(vecD dst, iRegIorL2I src) instruct replicate4I(vecX dst, iRegIorL2I src) %{ @@ -238,7 +511,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateI src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4I)" %} -@@ -16217,7 +16304,7 @@ instruct replicate2I_imm(vecD dst, immI con) +@@ -16612,7 +16700,7 @@ instruct replicate2I_imm(vecD dst, immI con) instruct replicate4I_imm(vecX dst, immI con) %{ @@ -247,7 +520,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateI con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(4I)" %} -@@ -16229,7 +16316,7 @@ instruct replicate4I_imm(vecX dst, immI con) +@@ -16624,7 +16712,7 @@ instruct replicate4I_imm(vecX dst, immI con) instruct replicate2L(vecX dst, iRegL src) %{ @@ -256,7 +529,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateL src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2L)" %} -@@ -16241,7 +16328,7 @@ instruct replicate2L(vecX dst, iRegL src) +@@ -16636,7 +16724,7 @@ instruct replicate2L(vecX dst, iRegL src) instruct replicate2L_zero(vecX dst, immI0 zero) %{ @@ -265,7 +538,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateI zero)); ins_cost(INSN_COST); format %{ "movi $dst, $zero\t# vector(4I)" %} -@@ -16268,7 +16355,7 @@ instruct replicate2F(vecD dst, vRegF src) +@@ -16663,7 +16751,7 @@ instruct replicate2F(vecD dst, vRegF src) instruct replicate4F(vecX dst, vRegF src) %{ @@ -274,7 +547,7 @@ index 64985e498..fa434df7d 100644 match(Set dst (ReplicateF src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4F)" %} -@@ -16281,7 +16368,7 @@ instruct replicate4F(vecX dst, vRegF src) +@@ -16676,7 +16764,7 @@ instruct replicate4F(vecX dst, vRegF src) instruct replicate2D(vecX dst, vRegD src) %{ @@ -285,7 +558,7 @@ index 64985e498..fa434df7d 100644 format %{ "dup $dst, $src\t# vector (2D)" %} diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad new file mode 100644 -index 000000000..8d80cb37a +index 0000000..8d80cb3 --- /dev/null +++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad @@ -0,0 +1,1366 @@ @@ -1657,7 +1930,7 @@ index 000000000..8d80cb37a +%} diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 new file mode 100644 -index 000000000..0323f2f8c +index 0000000..0323f2f --- /dev/null +++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 @@ -0,0 +1,727 @@ @@ -2389,7 +2662,7 @@ index 000000000..0323f2f8c +BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub) +BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub) diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp -index 8047ed8fd..32e53336b 100644 +index 8047ed8..32e5333 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp @@ -96,662 +96,746 @@ void entry(CodeBuffer *cb) { @@ -5134,283 +5407,11 @@ index 8047ed8fd..32e53336b 100644 }; // END Generated code -- do not edit -diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py -index 31c6965b7..2211bd25a 100644 ---- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py -+++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py -@@ -73,6 +73,48 @@ class GeneralRegisterOrSp(Register): - return self.astr() - else: - return self.astr("r") -+class SVEVectorRegister(FloatRegister): -+ def __str__(self): -+ return self.astr("z") -+ -+class SVEPRegister(Register): -+ def __str__(self): -+ return self.astr("p") -+ -+ def generate(self): -+ self.number = random.randint(0, 15) -+ return self -+ -+class SVEGoverningPRegister(Register): -+ def __str__(self): -+ return self.astr("p") -+ def generate(self): -+ self.number = random.randint(0, 7) -+ return self -+ -+class RegVariant(object): -+ def __init__(self, low, high): -+ self.number = random.randint(low, high) -+ -+ def astr(self): -+ nameMap = { -+ 0: ".b", -+ 1: ".h", -+ 2: ".s", -+ 3: ".d", -+ 4: ".q" -+ } -+ return nameMap.get(self.number) -+ -+ def cstr(self): -+ nameMap = { -+ 0: "__ B", -+ 1: "__ H", -+ 2: "__ S", -+ 3: "__ D", -+ 4: "__ Q" -+ } -+ return nameMap.get(self.number) - - class FloatZero(Operand): - -@@ -88,7 +130,10 @@ class OperandFactory: - 'w' : GeneralRegister, - 's' : FloatRegister, - 'd' : FloatRegister, -- 'z' : FloatZero} -+ 'z' : FloatZero, -+ 'p' : SVEPRegister, -+ 'P' : SVEGoverningPRegister, -+ 'Z' : SVEVectorRegister} - - @classmethod - def create(cls, mode): -@@ -834,6 +879,100 @@ class FloatInstruction(Instruction): - % tuple([Instruction.astr(self)] + - [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)])) - -+class SVEVectorOp(Instruction): -+ def __init__(self, args): -+ name = args[0] -+ regTypes = args[1] -+ regs = [] -+ for c in regTypes: -+ regs.append(OperandFactory.create(c).generate()) -+ self.reg = regs -+ self.numRegs = len(regs) -+ if regTypes[0] != "p" and regTypes[1] == 'P': -+ self._isPredicated = True -+ self._merge = "/m" -+ else: -+ self._isPredicated = False -+ self._merge ="" -+ -+ self._bitwiseop = False -+ if name[0] == 'f': -+ self._width = RegVariant(2, 3) -+ elif not self._isPredicated and (name in ["and", "eor", "orr", "bic"]): -+ self._width = RegVariant(3, 3) -+ self._bitwiseop = True -+ else: -+ self._width = RegVariant(0, 3) -+ if len(args) > 2: -+ self._dnm = args[2] -+ else: -+ self._dnm = None -+ Instruction.__init__(self, name) -+ -+ def cstr(self): -+ formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"]) -+ if self._bitwiseop: -+ width = [] -+ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"]) -+ else: -+ width = [self._width.cstr()] -+ return (formatStr -+ % tuple(["__ sve_" + self._name + "("] + -+ [str(self.reg[0])] + -+ width + -+ [str(self.reg[i]) for i in range(1, self.numRegs)])) -+ def astr(self): -+ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)]) -+ if self._dnm == 'dn': -+ formatStr += ", %s" -+ dnReg = [str(self.reg[0]) + self._width.astr()] -+ else: -+ dnReg = [] -+ -+ if self._isPredicated: -+ restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)] -+ else: -+ restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)] -+ return (formatStr -+ % tuple([Instruction.astr(self)] + -+ [str(self.reg[0]) + self._width.astr()] + -+ restRegs)) -+ def generate(self): -+ return self -+ -+class SVEReductionOp(Instruction): -+ def __init__(self, args): -+ name = args[0] -+ lowRegType = args[1] -+ self.reg = [] -+ Instruction.__init__(self, name) -+ self.reg.append(OperandFactory.create('s').generate()) -+ self.reg.append(OperandFactory.create('P').generate()) -+ self.reg.append(OperandFactory.create('Z').generate()) -+ self._width = RegVariant(lowRegType, 3) -+ def cstr(self): -+ return "__ sve_%s(%s, %s, %s, %s);" % (self.name(), -+ str(self.reg[0]), -+ self._width.cstr(), -+ str(self.reg[1]), -+ str(self.reg[2])) -+ def astr(self): -+ if self.name() == "uaddv": -+ dstRegName = "d" + str(self.reg[0].number) -+ else: -+ dstRegName = self._width.astr()[1] + str(self.reg[0].number) -+ formatStr = "%s %s, %s, %s" -+ if self.name() == "fadda": -+ formatStr += ", %s" -+ moreReg = [dstRegName] -+ else: -+ moreReg = [] -+ return formatStr % tuple([self.name()] + -+ [dstRegName] + -+ [str(self.reg[1])] + -+ moreReg + -+ [str(self.reg[2]) + self._width.astr()]) -+ - class LdStSIMDOp(Instruction): - def __init__(self, args): - self._name, self.regnum, self.arrangement, self.addresskind = args -@@ -1120,7 +1259,42 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", - ["mov", "__ mov(v1, __ T2S, 1, zr);", "mov\tv1.s[1], wzr"], - ["mov", "__ mov(v1, __ T4H, 2, zr);", "mov\tv1.h[2], wzr"], - ["mov", "__ mov(v1, __ T8B, 3, zr);", "mov\tv1.b[3], wzr"], -- ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]]) -+ ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"], -+ # SVE instructions -+ ["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"], -+ ["inc", "__ sve_inc(r0, __ S);", "incw\tx0"], -+ ["dec", "__ sve_dec(r1, __ H);", "dech\tx1"], -+ ["lsl", "__ sve_lsl(z0, __ B, z1, 7);", "lsl\tz0.b, z1.b, #7"], -+ ["lsl", "__ sve_lsl(z21, __ H, z1, 15);", "lsl\tz21.h, z1.h, #15"], -+ ["lsl", "__ sve_lsl(z0, __ S, z1, 31);", "lsl\tz0.s, z1.s, #31"], -+ ["lsl", "__ sve_lsl(z0, __ D, z1, 63);", "lsl\tz0.d, z1.d, #63"], -+ ["lsr", "__ sve_lsr(z0, __ B, z1, 7);", "lsr\tz0.b, z1.b, #7"], -+ ["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"], -+ ["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"], -+ ["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"], -+ ["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"], -+ ["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"], -+ ["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"], -+ ["dup", "__ sve_dup(z0, __ B, 127);", "dup\tz0.b, 127"], -+ ["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"], -+ ["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"], -+ ["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"], -+ ["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"], -+ ["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"], -+ ["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"], -+ ["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"], -+ ["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"], -+ ["ld1d", "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));", "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"], -+ ["st1b", "__ sve_st1b(z22, __ B, p6, Address(sp));", "st1b\t{z22.b}, p6, [sp]"], -+ ["st1b", "__ sve_st1b(z31, __ B, p7, Address(sp, -8));", "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"], -+ ["st1w", "__ sve_st1w(z0, __ S, p1, Address(r0, 7));", "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"], -+ ["st1b", "__ sve_st1b(z0, __ B, p2, Address(sp, r1));", "st1b\t{z0.b}, p2, [sp, x1]"], -+ ["st1h", "__ sve_st1h(z0, __ H, p3, Address(sp, r8));", "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"], -+ ["st1d", "__ sve_st1d(z0, __ D, p4, Address(r0, r8));", "st1d\t{z0.d}, p4, [x0, x8, LSL #3]"], -+ ["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"], -+ ["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"], -+ ["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"], -+]) - - print "\n// FloatImmediateOp" - for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", -@@ -1145,6 +1319,50 @@ for size in ("x", "w"): - ["ldumin", "ldumin", size, suffix], - ["ldumax", "ldumax", size, suffix]]); - -+ -+generate(SVEVectorOp, [["add", "ZZZ"], -+ ["sub", "ZZZ"], -+ ["fadd", "ZZZ"], -+ ["fmul", "ZZZ"], -+ ["fsub", "ZZZ"], -+ ["abs", "ZPZ"], -+ ["add", "ZPZ", "dn"], -+ ["asr", "ZPZ", "dn"], -+ ["cnt", "ZPZ"], -+ ["lsl", "ZPZ", "dn"], -+ ["lsr", "ZPZ", "dn"], -+ ["mul", "ZPZ", "dn"], -+ ["neg", "ZPZ"], -+ ["not", "ZPZ"], -+ ["smax", "ZPZ", "dn"], -+ ["smin", "ZPZ", "dn"], -+ ["sub", "ZPZ", "dn"], -+ ["fabs", "ZPZ"], -+ ["fadd", "ZPZ", "dn"], -+ ["fdiv", "ZPZ", "dn"], -+ ["fmax", "ZPZ", "dn"], -+ ["fmin", "ZPZ", "dn"], -+ ["fmul", "ZPZ", "dn"], -+ ["fneg", "ZPZ"], -+ ["frintm", "ZPZ"], -+ ["frintn", "ZPZ"], -+ ["frintp", "ZPZ"], -+ ["fsqrt", "ZPZ"], -+ ["fsub", "ZPZ", "dn"], -+ ["fmla", "ZPZZ"], -+ ["fmls", "ZPZZ"], -+ ["fnmla", "ZPZZ"], -+ ["fnmls", "ZPZZ"], -+ ["mla", "ZPZZ"], -+ ["mls", "ZPZZ"], -+ ["and", "ZZZ"], -+ ["eor", "ZZZ"], -+ ["orr", "ZZZ"], -+ ]) -+ -+generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0], -+ ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]]) -+ - print "\n __ bind(forth);" - outfile.write("forth:\n") - -@@ -1153,8 +1372,8 @@ outfile.close() - import subprocess - import sys - --# compile for 8.1 because of lse atomics --subprocess.check_call([AARCH64_AS, "-march=armv8.1-a", "aarch64ops.s", "-o", "aarch64ops.o"]) -+# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension. -+subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) - - print - print "/*", diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp -index 6f4e75ff4..dc2d5e2c9 100644 +index 8f0d7f5..13daa4e 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp -@@ -139,6 +139,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21); +@@ -152,6 +152,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21); // Java stack pointer REGISTER_DECLARATION(Register, esp, r20); @@ -5420,7 +5421,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 #define assert_cond(ARG1) assert(ARG1, #ARG1) namespace asm_util { -@@ -562,6 +565,18 @@ class Address { +@@ -581,6 +584,18 @@ class Address { void lea(MacroAssembler *, Register) const; static bool offset_ok_for_immed(int64_t offset, uint shift = 0); @@ -5439,7 +5440,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 }; // Convience classes -@@ -2445,13 +2460,18 @@ public: +@@ -2473,13 +2488,18 @@ public: f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); } @@ -5463,7 +5464,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 #define INSN(NAME, opc, opc2, isSHR) \ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ starti; \ -@@ -2693,6 +2713,240 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V +@@ -2721,6 +2741,240 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V f(0, 10), rf(Vn, 5), rf(Vd, 0); } @@ -5704,7 +5705,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 // SVE inc/dec register by element count #define INSN(NAME, op) \ void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \ -@@ -2706,6 +2960,45 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V +@@ -2734,6 +2988,45 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V INSN(sve_dec, 1); #undef INSN @@ -5751,7 +5752,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 } diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp -index 6ac54f257..a258528ea 100644 +index 6ac54f2..a258528 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp @@ -456,8 +456,12 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z @@ -5769,10 +5770,10 @@ index 6ac54f257..a258528ea 100644 __ b(*stub->continuation()); } diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -index 265cd0888..590fd8662 100644 +index c70d424..7cfa70a 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -@@ -2110,8 +2110,17 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) { +@@ -2131,8 +2131,17 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) { } // Push lots of registers in the bit set supplied. Don't push sp. @@ -5791,7 +5792,7 @@ index 265cd0888..590fd8662 100644 // Scan bitset to accumulate register pairs unsigned char regs[32]; int count = 0; -@@ -2126,8 +2135,18 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { +@@ -2147,8 +2156,18 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { return 0; } @@ -5810,7 +5811,7 @@ index 265cd0888..590fd8662 100644 if (count & 1) { strq(as_FloatRegister(regs[0]), Address(stack)); i += 1; -@@ -2140,7 +2159,16 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { +@@ -2161,7 +2180,16 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { return count; } @@ -5827,7 +5828,7 @@ index 265cd0888..590fd8662 100644 // Scan bitset to accumulate register pairs unsigned char regs[32]; int count = 0; -@@ -2155,6 +2183,16 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { +@@ -2176,6 +2204,16 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { return 0; } @@ -5844,7 +5845,7 @@ index 265cd0888..590fd8662 100644 if (count & 1) { ldrq(as_FloatRegister(regs[0]), Address(stack)); i += 1; -@@ -2638,23 +2676,39 @@ void MacroAssembler::pop_call_clobbered_registers() { +@@ -2659,23 +2697,39 @@ void MacroAssembler::pop_call_clobbered_registers() { pop(call_clobbered_registers() - RegSet::of(rscratch1, rscratch2), sp); } @@ -5894,10 +5895,10 @@ index 265cd0888..590fd8662 100644 + ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), + as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step))); + } - + // integer registers except lr & sp pop(RegSet::range(r0, r17), sp); -@@ -2703,6 +2757,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp) +@@ -2732,6 +2786,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp) return Address(base, offset); } @@ -5919,7 +5920,7 @@ index 265cd0888..590fd8662 100644 // Checks whether offset is aligned. // Returns true if it is, else false. bool MacroAssembler::merge_alignment_check(Register base, -@@ -5901,3 +5970,13 @@ void MacroAssembler::verify_sve_vector_length() { +@@ -5930,3 +5999,13 @@ void MacroAssembler::verify_sve_vector_length() { stop("Error: SVE vector length has changed since jvm startup"); bind(verify_ok); } @@ -5934,42 +5935,42 @@ index 265cd0888..590fd8662 100644 + bind(verify_ok); +} diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -index 7fd9e3c97..bc3175b2b 100644 +index ec9b3cc..07e3169 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -862,8 +862,10 @@ public: - + DEBUG_ONLY(void verify_heapbase(const char* msg);) - + - void push_CPU_state(bool save_vectors = false); - void pop_CPU_state(bool restore_vectors = false) ; + void push_CPU_state(bool save_vectors = false, bool use_sve = false, + int sve_vector_size_in_bytes = 0); + void pop_CPU_state(bool restore_vectors = false, bool use_sve = false, + int sve_vector_size_in_bytes = 0); - + // Round up to a power of two void round_to(Register reg, int modulus); @@ -939,6 +941,10 @@ public: Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); - + void verify_sve_vector_length(); + void reinitialize_ptrue() { + sve_ptrue(ptrue, B); + } + void verify_ptrue(); - + // Debugging - -@@ -1320,6 +1326,7 @@ private: + +@@ -1338,6 +1344,7 @@ private: // Returns an address on the stack which is reachable with a ldr/str of size // Uses rscratch2 if the address is not directly reachable Address spill_address(int size, int offset, Register tmp=rscratch2); + Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2); - + bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const; - -@@ -1343,6 +1350,9 @@ public: + +@@ -1361,6 +1368,9 @@ public: void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { str(Vx, T, spill_address(1 << (int)T, offset)); } @@ -5979,7 +5980,7 @@ index 7fd9e3c97..bc3175b2b 100644 void unspill(Register Rx, bool is64, int offset) { if (is64) { ldr(Rx, spill_address(8, offset)); -@@ -1353,6 +1363,9 @@ public: +@@ -1371,6 +1381,9 @@ public: void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { ldr(Vx, T, spill_address(1 << (int)T, offset)); } @@ -5989,7 +5990,7 @@ index 7fd9e3c97..bc3175b2b 100644 void spill_copy128(int src_offset, int dst_offset, Register tmp1=rscratch1, Register tmp2=rscratch2) { if (src_offset < 512 && (src_offset & 7) == 0 && -@@ -1366,6 +1379,15 @@ public: +@@ -1384,6 +1397,15 @@ public: spill(tmp1, true, dst_offset+8); } } @@ -6006,7 +6007,7 @@ index 7fd9e3c97..bc3175b2b 100644 #ifdef ASSERT diff --git a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp -index 15131ed32..e337f582a 100644 +index 1602a78..e476456 100644 --- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp @@ -196,3 +196,5 @@ REGISTER_DEFINITION(PRegister, p4); @@ -6016,10 +6017,10 @@ index 15131ed32..e337f582a 100644 + +REGISTER_DEFINITION(PRegister, ptrue); diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp -index 3bf7284a7..6242cce08 100644 +index 4b35aa6..491e29d 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp -@@ -151,7 +151,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ +@@ -152,7 +152,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ // Save Integer and Float registers. __ enter(); @@ -6028,7 +6029,7 @@ index 3bf7284a7..6242cce08 100644 // Set an oopmap for the call site. This oopmap will map all // oop-registers and debug-info registers as callee-saved. This -@@ -190,10 +190,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ +@@ -191,10 +191,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ } void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { @@ -6045,7 +6046,7 @@ index 3bf7284a7..6242cce08 100644 __ leave(); } -@@ -2786,6 +2791,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t +@@ -2810,6 +2815,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t __ maybe_isb(); __ membar(Assembler::LoadLoad | Assembler::LoadStore); @@ -6059,7 +6060,7 @@ index 3bf7284a7..6242cce08 100644 __ cbz(rscratch1, noException); diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp -index 26a54c87e..85f64c007 100644 +index d307871..cd3f6f4 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -488,6 +488,11 @@ class StubGenerator: public StubCodeGenerator { @@ -6074,7 +6075,7 @@ index 26a54c87e..85f64c007 100644 // we should not really care that lr is no longer the callee // address. we saved the value the handler needs in r19 so we can // just copy it to r3. however, the C2 handler will push its own -@@ -5092,6 +5097,12 @@ class StubGenerator: public StubCodeGenerator { +@@ -5421,6 +5426,12 @@ class StubGenerator: public StubCodeGenerator { __ reset_last_Java_frame(true); __ maybe_isb(); @@ -6088,7 +6089,7 @@ index 26a54c87e..85f64c007 100644 // check for pending exceptions diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad -index 0ef4d7f3e..03248b2e0 100644 +index f142afa..98e6780 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) { @@ -6101,7 +6102,7 @@ index 0ef4d7f3e..03248b2e0 100644 // TODO // identify extra cases that we might want to provide match rules for diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad -index 7ee16a975..571a6aeb0 100644 +index b3bf64c..cc1e1a1 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -2242,7 +2242,7 @@ const bool Matcher::match_rule_supported(int opcode) { @@ -6114,7 +6115,7 @@ index 7ee16a975..571a6aeb0 100644 // TODO // identify extra cases that we might want to provide match rules for diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad -index ea09aaafc..782c1c7c4 100644 +index ea09aaa..782c1c7 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1522,7 +1522,7 @@ const bool Matcher::match_rule_supported(int opcode) { @@ -6127,7 +6128,7 @@ index ea09aaafc..782c1c7c4 100644 // Identify extra cases that we might want to provide match rules for // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad -index a09c795c9..3b1b1046e 100644 +index a09c795..3b1b104 100644 --- a/src/hotspot/cpu/sparc/sparc.ad +++ b/src/hotspot/cpu/sparc/sparc.ad @@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) { @@ -6140,10 +6141,10 @@ index a09c795c9..3b1b1046e 100644 // TODO // identify extra cases that we might want to provide match rules for diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad -index 76dd6addd..93aee6d6c 100644 +index 4e1336f..b75d0ff 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad -@@ -1354,7 +1354,7 @@ const bool Matcher::match_rule_supported(int opcode) { +@@ -1379,7 +1379,7 @@ const bool Matcher::match_rule_supported(int opcode) { return ret_value; // Per default match rules are supported. } @@ -6153,7 +6154,7 @@ index 76dd6addd..93aee6d6c 100644 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen bool ret_value = match_rule_supported(opcode); diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp -index ed890f88e..9a8307102 100644 +index ed890f8..9a83071 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -310,7 +310,7 @@ public: @@ -6166,7 +6167,7 @@ index ed890f88e..9a8307102 100644 // Some microarchitectures have mask registers used on vectors static const bool has_predicated_vectors(void); diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp -index 92f70b77d..ed67928f5 100644 +index fed52e4..ee58323 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz @@ -6184,7 +6185,7 @@ index 92f70b77d..ed67928f5 100644 assert(lpt->_head->is_CountedLoop(), "must be"); CountedLoopNode *cl = lpt->_head->as_CountedLoop(); diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp -index 1f2cf2c64..6867177c1 100644 +index 1f2cf2c..6867177 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -1,5 +1,5 @@ @@ -6212,6 +6213,3 @@ index 1f2cf2c64..6867177c1 100644 } return false; } --- -2.19.1 - diff --git a/openjdk-11.spec b/openjdk-11.spec index 4e7104d..50bc7bb 100644 --- a/openjdk-11.spec +++ b/openjdk-11.spec @@ -740,7 +740,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release} Name: java-%{javaver}-%{origin} Version: %{newjavaver}.%{buildver} -Release: 0 +Release: 1 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -1677,6 +1677,9 @@ cjc.mainProgram(arg) %changelog +* Wed Aug 2023 noah - 1:11.0.20.8-1 +- fix CPUBench kmeans random fails + * Wed Jul 2023 DXwangg - 1:11.0.20.8-0 - update to 11.0.20+8(GA) - modified delete_expired_certificates.patch